In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam, RMSprop
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, classification_report, confusion_matrix

In [1]:
import tensorflow as tf

In [12]:
df = pd.read_csv(r'DataSets\prices.csv')
info = pd.read_csv(r'DataSets\securities.csv')
df['Daily Return'] = df['close'] - df['open']
df['WentUp?'] = df['Daily Return'] > 0


In [13]:
info = info[info['Ticker symbol'].isin(df['symbol'])]
info = info[['Ticker symbol', 'GICS Sector', 'GICS Sub Industry']]

In [14]:
df = df.merge(info, left_on='symbol', right_on='Ticker symbol')

In [15]:
def add_moving_averages(df, column='Close', sma_window=5, ema_window=10):

    df[f'SMA_{sma_window}'] = df.groupby('symbol')[column].transform(lambda x: x.rolling(window=sma_window).mean())
    df[f'EMA_{ema_window}'] = df.groupby('symbol')[column].transform(lambda x: x.ewm(span=ema_window, adjust=False).mean())

    return df
def add_rsi(df, column='Close', window=14):
    delta = df[column].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()

    rs = gain / loss
    df[f'RSI_{window}'] = 100 - (100 / (1 + rs))
    return df

def add_macd(df, column='Close', short_window=12, long_window=26, signal_window=9):
    df['MACD_line'] = df[column].ewm(span=short_window, adjust=False).mean() - df[column].ewm(span=long_window, adjust=False).mean()
    df['MACD_signal'] = df['MACD_line'].ewm(span=signal_window, adjust=False).mean()
    return df




In [16]:
add_moving_averages(df, column='close', sma_window=5, ema_window=10)
add_moving_averages(df, column='close', sma_window=20, ema_window=50)
add_rsi(df, column='close', window=14)
add_macd(df, column='close', short_window=12, long_window=26, signal_window=9)

Unnamed: 0,date,symbol,open,close,low,high,volume,Daily Return,WentUp?,Ticker symbol,GICS Sector,GICS Sub Industry,SMA_5,EMA_10,SMA_20,EMA_50,RSI_14,MACD_line,MACD_signal
0,2016-01-05 00:00:00,WLTW,123.430000,125.839996,122.309998,126.250000,2163600.0,2.409996,True,WLTW,Financials,Insurance Brokers,,125.839996,,125.839996,,0.000000,0.000000
1,2016-01-06 00:00:00,WLTW,125.239998,119.980003,119.940002,125.540001,2386400.0,-5.259995,False,WLTW,Financials,Insurance Brokers,,124.774543,,125.610192,,-0.467464,-0.093493
2,2016-01-07 00:00:00,WLTW,116.379997,114.949997,114.930000,119.739998,2489500.0,-1.430000,False,WLTW,Financials,Insurance Brokers,,122.988262,,125.192145,,-1.229637,-0.320722
3,2016-01-08 00:00:00,WLTW,115.480003,116.620003,113.500000,117.440002,2006300.0,1.140000,True,WLTW,Financials,Insurance Brokers,,121.830396,,124.855983,,-1.679549,-0.592487
4,2016-01-11 00:00:00,WLTW,117.010002,114.970001,114.089996,117.330002,1408600.0,-2.040001,False,WLTW,Financials,Insurance Brokers,118.472000,120.583052,,124.468297,,-2.144528,-0.902895
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
851259,2016-12-30,ZBH,103.309998,103.199997,102.849998,103.930000,973800.0,-0.110001,False,ZBH,Health Care,Health Care Equipment,103.043999,103.008135,103.352500,107.030916,58.789265,-2.805761,-5.486220
851260,2016-12-30,ZION,43.070000,43.040001,42.689999,43.310001,1938100.0,-0.029999,False,ZION,Financials,Regional Banks,43.380000,43.114522,42.709000,39.090891,51.256525,-4.163148,-5.221606
851261,2016-12-30,ZTS,53.639999,53.529999,53.270000,53.740002,1701200.0,-0.110000,False,ZTS,Health Care,Pharmaceuticals,53.617999,53.107125,52.075999,51.365061,47.618451,-4.342375,-5.045759
851262,2016-12-30 00:00:00,AIV,44.730000,45.450001,44.410000,45.590000,1380900.0,0.720001,True,AIV,Real Estate,REITs,44.686001,44.375040,43.460000,43.265573,45.700163,-5.077868,-5.052181


In [17]:
df['Yesterday High'] = df.groupby('symbol')['high'].shift(1)
df['Yesterday Low'] = df.groupby('symbol')['low'].shift(1)
df['Yesterday return'] = df.groupby('symbol')['Daily Return'].shift(1)

In [18]:
df.dropna(inplace=True)
df.shape

(841745, 22)

In [19]:
X = df.drop(['date', 'symbol', 'close', 'Daily Return', 'high', 'low', 'WentUp?'], axis=1)
y = df['WentUp?']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:

prep = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['open', 'volume', 'EMA_50', 'SMA_20', 'RSI_14',  'SMA_5',  'EMA_10','Yesterday High', 'Yesterday Low', 'Yesterday return', 'MACD_line', 'MACD_signal']),
        ('cat', OneHotEncoder(handle_unknown='ignore'), ['GICS Sector', 'GICS Sub Industry'])
    ])

In [21]:
scaler = prep.fit(X)
X_scaled = scaler.fit_transform(X) 


In [22]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [23]:
def create_regression_model(optimizer='adam', learning_rate=0.001, neurons1=16, neurons2=8):
    model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1)  # Output layer with 1 unit for regression
])

# Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

In [24]:
param_grid = {
'batch_size': [10],
'epochs': [5],
'optimizer': ['adam'], # Use strings, not class references
'neurons1': [12], # Varying neurons in the first hidden layer
'neurons2': [6], # Varying neurons in the second hidden layer
'learning_rate': [0.001] # Varying learning rates for all optimi
}

In [25]:
def fit_rmodel_with_params(params):
    model = create_regression_model(
    optimizer=params['optimizer'],
    learning_rate=params['learning_rate'],
    neurons1=params['neurons1'],
    neurons2=params['neurons2']
    )
    model.fit(X_train, y_train, batch_size=params['batch_size'], epochs=params['epochs'], verbose=2 )
    return model

15 Minute run time

In [None]:
# best_score = -np.inf
# best_params = None
# best_model = None
# for params in ParameterGrid(param_grid):
#     print(f"Training model with parameters: {params}")
#     model = fit_rmodel_with_params(params)
#     y_pred = model.predict(X_test, batch_size=params['batch_size'])
#     score = r2_score(y_test, y_pred)
#     if score > best_score:
#         best_score = score
#         best_params = params
#         best_model = model

NameError: name 'np' is not defined

In [None]:
# print("\nBest Hyperparameters:", best_params)
# print("Best R² Score:", best_score)
# # Evaluate Best Model
# y_pred = best_model.predict(X_test)
# print("\nModel Evaluation Metrics:")
# print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
# print("Mean Squared Error:", mean_squared_error(y_test, y_pred))


Best Hyperparameters: {'batch_size': 10, 'epochs': 5, 'learning_rate': 0.001, 'neurons1': 12, 'neurons2': 6, 'optimizer': 'adam'}
Best R² Score: -9.023848048772543e-05
[1m5321/5321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step

Model Evaluation Metrics:
Mean Absolute Error: 0.6983801782013946
Mean Squared Error: 2.135959004343957


In [26]:
def create_classification_model(optimizer='adam', learning_rate=0.001, neurons1=16, neurons2=8):
    model = Sequential([
        Dense(neurons1, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(neurons2, activation='relu'),
        Dense(1, activation='sigmoid')  # Output layer with 1 unit for binary classification
    ])
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [27]:
def fit_cmodel_with_params(params):
    model = create_classification_model(
        optimizer=params['optimizer'],
        learning_rate=params['learning_rate'],
        neurons1=params['neurons1'],
        neurons2=params['neurons2']
    )
    model.fit(X_train, y_train, batch_size=params['batch_size'], epochs=params['epochs'], verbose=2)
    return model

In [28]:
best_score = -np.inf
best_params = None
best_model = None
for params in ParameterGrid(param_grid):
    print(f"Training model with parameters: {params}")
    model = fit_cmodel_with_params(params)
    y_pred = model.predict(X_test, batch_size=params['batch_size'])
    score = r2_score(y_test, y_pred)
    if score > best_score:
        best_score = score
        best_params = params
        best_model = model

Training model with parameters: {'batch_size': 10, 'epochs': 5, 'learning_rate': 0.001, 'neurons1': 12, 'neurons2': 6, 'optimizer': 'adam'}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
67340/67340 - 167s - 2ms/step - accuracy: 0.5142 - loss: 0.6927
Epoch 2/5
67340/67340 - 167s - 2ms/step - accuracy: 0.5174 - loss: 0.6924
Epoch 3/5
67340/67340 - 168s - 3ms/step - accuracy: 0.5181 - loss: 0.6923
Epoch 4/5
67340/67340 - 167s - 2ms/step - accuracy: 0.5193 - loss: 0.6921
Epoch 5/5
67340/67340 - 167s - 2ms/step - accuracy: 0.5197 - loss: 0.6920
[1m16835/16835[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 2ms/step


In [31]:
print("\nBest Hyperparameters:", best_params)
print("Best Accuracy Score:", best_score)
classification_report(y_test, y_pred)


Best Hyperparameters: {'batch_size': 10, 'epochs': 5, 'learning_rate': 0.001, 'neurons1': 12, 'neurons2': 6, 'optimizer': 'adam'}
Best Accuracy Score: 0.0008633732795715332


ValueError: Classification metrics can't handle a mix of binary and continuous targets