In [5]:
# Random Forest Classifier (CS-677 Assignment)
# Upload 'Processed_Stock_Data.csv' and 'NVDA.csv' manually in Colab before running

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load data
stock_df = pd.read_csv("Processed_Stock_Data.csv")
stock_df['Label_Num'] = stock_df['Label'].map({'Green': 1, 'Red': 0})

# Split train and test
train_df = stock_df[stock_df['Year'].between(2020, 2022)]
test_df = stock_df[stock_df['Year'].between(2023, 2024)]
X_train = train_df[['mean_return', 'volatility']]
y_train = train_df['Label_Num']
X_test = test_df[['mean_return', 'volatility']]
y_test = test_df['Label_Num']

# Train Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

# Evaluate
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()
tpr = tp / (tp + fn)
tnr = tn / (tn + fp)

# Load NVDA prices for trading simulation
nvda_df = pd.read_csv("NVDA.csv")
weekly_prices = nvda_df[nvda_df['Year'].between(2023, 2024)].groupby(['Year', 'Week_Number']).agg(
    Open_Price=('Open', 'first'),
    Close_Price=('Close', 'last')
).reset_index()

# Merge predictions with test set
test_df = test_df.copy()
test_df['RF_Pred'] = y_pred
test_df = test_df.merge(weekly_prices, on=['Year', 'Week_Number'], how='left')

# Simulate trading strategy
def simulate_trading(df, label_col):
    cash = 100
    shares = 0
    position = False
    for _, row in df.iterrows():
        label = row[label_col]
        open_price = row['Open_Price']
        close_price = row['Close_Price']
        if pd.isna(open_price) or pd.isna(close_price):
            continue
        if not position and label == 1:
            shares = cash / open_price
            cash = 0
            position = True
        elif position and label == 0:
            cash = shares * close_price
            shares = 0
            position = False
    return round(cash + (shares * close_price if position else 0), 2)

# Final portfolio value using Random Forest strategy
rf_strategy_value = simulate_trading(test_df, 'RF_Pred')

# Buy-and-Hold strategy
initial_open = test_df.iloc[0]['Open_Price']
final_close = test_df.iloc[-1]['Close_Price']
buy_hold_value = round((100 / initial_open) * final_close, 2)

# Summary
print("Accuracy:", round(acc * 100, 2), "%")
print("Confusion Matrix:", cm.tolist())
print("TPR:", round(tpr * 100, 2), "%")
print("TNR:", round(tnr * 100, 2), "%")
print("Random Forest Strategy Value:", rf_strategy_value)
print("Buy-and-Hold Strategy Value:", buy_hold_value)


Accuracy: 99.05 %
Confusion Matrix: [[58, 1], [0, 46]]
TPR: 100.0 %
TNR: 98.31 %
Random Forest Strategy Value: 1042.7
Buy-and-Hold Strategy Value: 925.86
