In [3]:
#Classification - Random Forest
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

#Load S&P Dataset
df = pd.read_csv('/Users/cwaldner/Library/CloudStorage/OneDrive-NorthcentralUniversity/PhD Program/TIM-8131/Week8/SP_Data/sp500_stocks.csv')

#Preproccessing
    #Drop Missing
df.dropna(inplace=True)

    #Create DF for Price Move - 1 Up else 0
df['Next_High'] = df['High'].shift(-1)  
df['Price_Movement'] = (df['Next_High'] > df['High']).astype(int)  

#Features
X = df[['Open', 'High', 'Low', 'Volume']]
y = df['Price_Movement']

#Split, Train, Test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest w/ parallel processing
clf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
clf.fit(X_train, y_train)

#Predict
y_pred = clf.predict(X_test)

#Results
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.57      0.54      0.55    172909
           1       0.58      0.60      0.59    180230

    accuracy                           0.57    353139
   macro avg       0.57      0.57      0.57    353139
weighted avg       0.57      0.57      0.57    353139

[[ 93223  79686]
 [ 71451 108779]]


In [4]:
#Neural Network - Shallow
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense

#Load S&P Dataset
df = pd.read_csv('/Users/cwaldner/Library/CloudStorage/OneDrive-NorthcentralUniversity/PhD Program/TIM-8131/Week8/SP_Data/sp500_stocks.csv')

#Preproccessing
    #Drop Missing
df.dropna(inplace=True)

#Prepare Features
df['Next_High'] = df['High'].shift(-1)
df['Target'] = (df['Next_High'] > df['High']).astype(int)
df.drop(['Date', 'Symbol', 'Adj Close', 'Close', 'Next_High'], axis=1, inplace=True)

#Set Target
X = df.drop('Target', axis=1)
y = df['Target']

#Split, Train, Test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#Build SNN w/ 1 layer hidden
model = Sequential()
model.add(Dense(10, activation='relu', input_shape=(X_train.shape[1],)))  
model.add(Dense(1, activation='sigmoid'))  

#Compile
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#Train
model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1)

#Evaluate
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy:.4f}')

#Predict
y_pred = (model.predict(X_test) > 0.5).astype("int32")


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m44143/44143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 237us/step - accuracy: 0.5166 - loss: 0.6928
Epoch 2/20
[1m44143/44143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 237us/step - accuracy: 0.5254 - loss: 0.6912
Epoch 3/20
[1m44143/44143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 236us/step - accuracy: 0.5384 - loss: 0.6894
Epoch 4/20
[1m44143/44143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 235us/step - accuracy: 0.5497 - loss: 0.6871
Epoch 5/20
[1m44143/44143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 235us/step - accuracy: 0.5604 - loss: 0.6841
Epoch 6/20
[1m44143/44143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 235us/step - accuracy: 0.5678 - loss: 0.6816
Epoch 7/20
[1m44143/44143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 235us/step - accuracy: 0.5721 - loss: 0.6794
Epoch 8/20
[1m44143/44143[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 235us/step - accuracy: 0.5772 - loss: 0.