## Preparing data

In [None]:
import pandas as pd
import talib
import numpy as np
import yfinance as yf

In [None]:
# Load the CSV file into a Pandas DataFrame
df = yf.download('AAPL', interval='15m', period='59d')

# df = pd.read_csv('GBPCAD5m.csv')
# Drop any rows with missing data
df = df.dropna()

In [None]:
df.head()


In [None]:
# Convert the 'Date' column to a datetime object
# df['Datetime'] = pd.to_datetime(df['Datetime'])

In [None]:
# Reset the index
df = df.reset_index()

## Setting target

In [None]:

# 1 if the next close price is greater than the current close price, 0 otherwise
df['Target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)


## Adding indicators as Features

In [None]:
# Cast the arrays to double before passing to talib functions
close = np.double(df['Close'].values)
high = np.double(df['High'].values)
low = np.double(df['Low'].values)
volume = np.double(df['Volume'].values)

In [None]:
# Calculate the technical indicators using talib
df['SMA'] = talib.SMA(close, timeperiod=14)
df['EMA'] = talib.EMA(close, timeperiod=14)
macd, macdsignal, macdhist = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9)
df['MACD'] = macd
df['RSI'] = talib.RSI(close, timeperiod=14)
df['ADX'] = talib.ADX(high, low, close, timeperiod=14)
df['ATR'] = talib.ATR(high, low, close, timeperiod=14)
df['OBV'] = talib.OBV(close, volume)

df.dropna(inplace=True)

In [None]:
lookback = 5
INDICATORS = ['SMA', 'EMA', 'MACD', 'RSI', 'ADX', 'ATR', 'OBV']
features = ['High', 'Low', 'SMA', 'EMA', 'MACD', 'RSI', 'ADX', 'ATR', 'OBV', 'Volume']
for indicator in INDICATORS:
    for i in range(1, lookback+1):
        df[f'{indicator}_{i}'] = df[indicator].shift(i)
        features.append(f'{indicator}_{i}')
        


In [None]:
df.dropna(inplace=True)
df.reset_index(inplace=True)


## Splitting data

In [None]:
from sklearn.model_selection import train_test_split



# Calculate the index to split the data
split_index = int(0.8 * len(df))

# Split the data into training and testing sets
df_train = df.iloc[:split_index]
df_test = df.iloc[split_index:]


# Split the data into training and testing sets
X_train = df_train[features]
X_test = df_test[features]
y_train = df_train['Target']
y_test = df_test['Target']


## Selecting Model

In [None]:
# import xgboost as xgb
# # model = xgb.XGBClassifier()

## Training Model

In [None]:
# Train the model
# model.fit(X_train, y_train)

### Bagging Model

In [None]:
from sklearn.ensemble import RandomForestClassifier


n_estimators = 25  # Number of decision trees in the ensemble
model = RandomForestClassifier(n_estimators=n_estimators)

In [None]:
model.fit(X_train, y_train)

## Making Prediction with Model

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

## Evaluate Models Accuracy

In [None]:
from sklearn.metrics import accuracy_score
# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

In [None]:
preds = pd.DataFrame(y_pred)

preds.count()


# Optimizer

In [303]:
# generate random values from 0 to 10000 with random  



def optimizer(number):
   
    model = RandomForestClassifier(n_estimators=int(number))
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    preds = pd.DataFrame(y_pred)
    no_trades = preds.count()
    return accuracy, no_trades, model

while True:
    number = np.random.randint(0, 1000, size=1)
    accuracy, no_trades, model = optimizer(number)
    no_trades = int(no_trades[0])
    accuracy = accuracy * 100
    if accuracy > 65 and no_trades > 0:
        print(f'Accuracy: {accuracy}% No of trades: {no_trades} Number: {number}')
        import joblib
        import datetime as dt
        time = dt.datetime.now()
        time = time.strftime("%Y-%m-%d_%H-%M-%S")
        joblib.dump(model, f'trained_models/model_{time}.pkl')
        break
        
  


KeyboardInterrupt: 

In [None]:
# import matplotlib.pyplot as plt
# fig, ax = plt.subplots(figsize=(10, 10))
# xgb.plot_importance(model, max_num_features=15, height=0.8, ax=ax)

In [None]:
import joblib
import datetime as dt
time = dt.datetime.now()
time = time.strftime("%Y-%m-%d_%H-%M-%S")
joblib.dump(model, f'trained_models/model_{time}.pkl')

# Testing saved Model

In [None]:
# model = joblib.load(f'trained_models/model_2023-05-27_03-52-00.pkl')

In [None]:
# X_train.head()

In [None]:
# new_pred = model.predict(X_train)

# accuracy = accuracy_score(y_train, new_pred)

In [None]:
accuracy