In [1]:
import tensorflow as tf

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

In [27]:
df = pd.read_csv(r'DataSets\prices.csv')
df['Daily Return'] = df['close'] - df['open']

In [72]:
df2 = pd.read_csv(r'DataSets\indexData.csv')
df2['Daily Return'] = df2['Close'] - df2['Open']
df2.head()

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return
0,NYA,1965-12-31,528.690002,528.690002,528.690002,528.690002,528.690002,0.0,0.0
1,NYA,1966-01-03,527.210022,527.210022,527.210022,527.210022,527.210022,0.0,0.0
2,NYA,1966-01-04,527.840027,527.840027,527.840027,527.840027,527.840027,0.0,0.0
3,NYA,1966-01-05,531.119995,531.119995,531.119995,531.119995,531.119995,0.0,0.0
4,NYA,1966-01-06,532.070007,532.070007,532.070007,532.070007,532.070007,0.0,0.0


In [45]:
AAPL = df[df['symbol'] == 'AAPL']

In [46]:
AAPL = AAPL.drop(columns=['open', 'high', 'low', 'close', 'volume', 'date', 'symbol'])

In [47]:
AAPL

Unnamed: 0,Daily Return
254,0.580000
721,-0.220005
1189,-3.409998
1657,-1.170000
2125,1.680011
...,...
848767,0.930001
849267,0.740005
849767,-0.759995
850267,0.280006


In [48]:
scaler = MinMaxScaler(feature_range=(0, 1))
AAPL_scaled = scaler.fit_transform(AAPL)

In [49]:
def create_sequences(data, seq_length):
    X,y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

seq_length = 10
X, y = create_sequences(AAPL_scaled, seq_length)

In [50]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [51]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [52]:
param_grid = {
    'units': [32, 50],
    'batch_size': [16, 32],
    'epochs': [50, 100]
}

best_model = None
best_r2 = float('-inf')

In [None]:
for params in ParameterGrid(param_grid):
    print(f"Training with params: {params}")
    model = Sequential([
    SimpleRNN(units=params['units'], activation='relu', return_sequences = True, input_shape=(seq_length, 1)), 
    SimpleRNN(units=params['units'], activation='relu'),
    Dense(1)])

    model.compile(optimizer='adam', loss='mse')

    model.fit(X_train, y_train, batch_size=params['batch_size'], epochs=params['epochs'], validation_data = (X_test, y_test), verbose=0)
    y_pred = model.predict(X_test)
    y_pred_actual = scaler.inverse_transform(y_pred)
    y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

    r2 = r2_score(y_test_actual, y_pred_actual)
    print(f"R^2 Score: {r2}")

    if r2 > best_r2:
        best_r2 = r2
        best_model = model


In [73]:
NYA = df2[df2['Index'] == 'NYA']

In [74]:
NYA['Date'] = pd.to_datetime(NYA['Date'])
NYA = NYA[NYA['Date'] >= '2003-01-09']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  NYA['Date'] = pd.to_datetime(NYA['Date'])


In [75]:
NYA = NYA.drop(columns=['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close', 'Index', 'Date'])

In [76]:
scaler = MinMaxScaler(feature_range=(0, 1))
NYA_scaled = scaler.fit_transform(NYA)

In [77]:
X, y = create_sequences(NYA_scaled, seq_length)

In [78]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [79]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [80]:
param_grid = {
    'units': [32, 50],
    'batch_size': [16, 32],
    'epochs': [50, 100]
}

best_model = None
best_r2 = float('-inf')

In [81]:
for params in ParameterGrid(param_grid):
    print(f"Training with params: {params}")
    model = Sequential([
    SimpleRNN(units=params['units'], activation='relu', return_sequences = True, input_shape=(seq_length, 1)), 
    SimpleRNN(units=params['units'], activation='relu'),
    Dense(1)])

    model.compile(optimizer='adam', loss='mse')

    model.fit(X_train, y_train, batch_size=params['batch_size'], epochs=params['epochs'], validation_data = (X_test, y_test), verbose=0)
    y_pred = model.predict(X_test)
    y_pred_actual = scaler.inverse_transform(y_pred)
    y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

    r2 = r2_score(y_test_actual, y_pred_actual)
    print(f"R^2 Score: {r2}")

    if r2 > best_r2:
        best_r2 = r2
        best_model = model


Training with params: {'batch_size': 16, 'epochs': 50, 'units': 32}


  super().__init__(**kwargs)


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
R^2 Score: -0.13898798360032916
Training with params: {'batch_size': 16, 'epochs': 50, 'units': 50}


  super().__init__(**kwargs)


KeyboardInterrupt: 