In [1]:
from google.colab import files

# Upload one or more files
uploaded = files.upload()

# Example: Load your CSV after uploading
import pandas as pd

stock_df = pd.read_csv("Processed_Stock_Data.csv")
nvda_df = pd.read_csv("NVDA.csv")

# Preview
print(stock_df.head())
print(nvda_df.head())


Saving NVDA.csv to NVDA.csv
Saving Processed_Stock_Data.csv to Processed_Stock_Data.csv
   Year  Week_Number  mean_return  volatility  Label
0  2020            0       -80.05      113.21    Red
1  2020            1        69.02       44.37  Green
2  2020            2        41.74      192.39  Green
3  2020            3        12.40      101.47  Green
4  2020            4      -111.10      302.78    Red
         Date  Year  Month  Day    Weekday  Week_Number Year_Week  Open  High  \
0  2020-01-02  2020      1    2   Thursday            0   2020-00  5.97  6.00   
1  2020-01-03  2020      1    3     Friday            0   2020-00  5.88  5.95   
2  2020-01-06  2020      1    6     Monday            1   2020-01  5.81  5.93   
3  2020-01-07  2020      1    7    Tuesday            1   2020-01  5.95  6.04   
4  2020-01-08  2020      1    8  Wednesday            1   2020-01  5.99  6.05   

    Low  Close     Volume  Adj Close    Return  Short_MA   Long_MA  
0  5.92   6.00  237536000       5.97  

In [2]:
# Hamming Distance-Based Classifier (CS-677 Assignment)
# Upload 'Processed_Stock_Data.csv' and 'NVDA.csv' manually in Colab

import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score

# Load data
stock_df = pd.read_csv("Processed_Stock_Data.csv")
stock_df['Label_Num'] = stock_df['Label'].map({'Green': 1, 'Red': 0})

# Discretize features into 3 bins
bins = 3
stock_df['mean_bin'] = pd.qcut(stock_df['mean_return'], q=bins, labels=False)
stock_df['vol_bin'] = pd.qcut(stock_df['volatility'], q=bins, labels=False)

# Create binary feature vectors
stock_df['discrete_vec'] = list(zip(stock_df['mean_bin'], stock_df['vol_bin']))

# Split train and test
train_df = stock_df[stock_df['Year'].between(2020, 2022)].reset_index()
test_df = stock_df[stock_df['Year'].between(2023, 2024)].reset_index()

# Predict using Hamming distance
preds = []
for test_vec in test_df['discrete_vec']:
    distances = train_df['discrete_vec'].apply(lambda x: sum(a != b for a, b in zip(test_vec, x)))
    closest_idx = distances.idxmin()
    preds.append(train_df.loc[closest_idx, 'Label_Num'])

test_df['Hamming_Pred'] = preds

# Evaluation
y_true = test_df['Label_Num']
y_pred = test_df['Hamming_Pred']
acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()
tpr = tp / (tp + fn)
tnr = tn / (tn + fp)

# Load NVDA weekly prices
nvda_df = pd.read_csv("NVDA.csv")
weekly_prices = nvda_df[nvda_df['Year'].between(2023, 2024)].groupby(['Year', 'Week_Number']).agg(
    Open_Price=('Open', 'first'),
    Close_Price=('Close', 'last')
).reset_index()

# Merge with test set
test_df = test_df.merge(weekly_prices, on=['Year', 'Week_Number'], how='left')

# Trading simulation
def simulate_trading(df, label_col):
    cash, shares, position = 100, 0, False
    for _, row in df.iterrows():
        if pd.isna(row['Open_Price']) or pd.isna(row['Close_Price']):
            continue
        if not position and row[label_col] == 1:
            shares = cash / row['Open_Price']
            cash = 0
            position = True
        elif position and row[label_col] == 0:
            cash = shares * row['Close_Price']
            shares = 0
            position = False
    return round(cash + (shares * row['Close_Price'] if position else 0), 2)

# Strategy value
hamming_value = simulate_trading(test_df, 'Hamming_Pred')
initial_open = test_df.iloc[0]['Open_Price']
final_close = test_df.iloc[-1]['Close_Price']
buy_hold_value = round((100 / initial_open) * final_close, 2)

# Output results
print("Accuracy:", round(acc * 100, 2), "%")
print("Confusion Matrix:", cm.tolist())
print("TPR:", round(tpr * 100, 2), "%")
print("TNR:", round(tnr * 100, 2), "%")
print("Hamming Strategy Value:", hamming_value)
print("Buy-and-Hold Strategy Value:", buy_hold_value)


Accuracy: 87.62 %
Confusion Matrix: [[55, 4], [9, 37]]
TPR: 80.43 %
TNR: 93.22 %
Hamming Strategy Value: 1141.87
Buy-and-Hold Strategy Value: 925.86
