In [2]:
# Upload CSV manually in Colab
from google.colab import files
uploaded = files.upload()

# Load the uploaded file
import pandas as pd

# Replace with your file name if different
df = pd.read_csv("Processed_Stock_Data.csv")

# Preview the data
df.head()


Saving Processed_Stock_Data.csv to Processed_Stock_Data.csv


Unnamed: 0,Year,Week_Number,mean_return,volatility,Label
0,2020,0,-80.05,113.21,Red
1,2020,1,69.02,44.37,Green
2,2020,2,41.74,192.39,Green
3,2020,3,12.4,101.47,Green
4,2020,4,-111.1,302.78,Red


In [3]:
# LDA and QDA Classifier for Weekly Stock Labels (CS-677 Assignment)

# Install requirements on Colab (if needed)
# !pip install pandas scikit-learn

import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score, confusion_matrix

# Upload the file manually on Colab if needed:
# from google.colab import files
# uploaded = files.upload()

# Load the data
df = pd.read_csv("Processed_Stock_Data.csv")

# Map labels
# Green = 1, Red = 0
df['Label_Num'] = df['Label'].map({'Green': 1, 'Red': 0})

# Split train/test based on year
train_df = df[df['Year'].between(2020, 2022)]
test_df = df[df['Year'].between(2023, 2024)]

X_train = train_df[['mean_return', 'volatility']]
y_train = train_df['Label_Num']

X_test = test_df[['mean_return', 'volatility']]
y_test = test_df['Label_Num']

# Train LDA
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
y_pred_lda = lda.predict(X_test)

# Train QDA
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)
y_pred_qda = qda.predict(X_test)

# Evaluate LDA
lda_acc = accuracy_score(y_test, y_pred_lda)
lda_cm = confusion_matrix(y_test, y_pred_lda)

# Evaluate QDA
qda_acc = accuracy_score(y_test, y_pred_qda)
qda_cm = confusion_matrix(y_test, y_pred_qda)

# Print results
print("LDA Accuracy:", round(lda_acc * 100, 2), "%")
print("LDA Confusion Matrix:\n", lda_cm)

print("\nQDA Accuracy:", round(qda_acc * 100, 2), "%")
print("QDA Confusion Matrix:\n", qda_cm)

# Summary:
# Confusion matrix: [[TN, FP], [FN, TP]]
# TP = correctly predicted Green, TN = correctly predicted Red


LDA Accuracy: 86.67 %
LDA Confusion Matrix:
 [[56  3]
 [11 35]]

QDA Accuracy: 92.38 %
QDA Confusion Matrix:
 [[57  2]
 [ 6 40]]


In [4]:
# Custom Naive Bayes vs Sklearn GaussianNB (CS-677 Assignment)

# Upload the CSV manually on Colab:
# from google.colab import files
# uploaded = files.upload()

import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix

# Load data
df = pd.read_csv("Processed_Stock_Data.csv")
df['Label_Num'] = df['Label'].map({'Green': 1, 'Red': 0})

# Train/Test Split
train_df = df[df['Year'].between(2020, 2022)]
test_df = df[df['Year'].between(2023, 2024)]

X_train = train_df[['mean_return', 'volatility']].values
y_train = train_df['Label_Num'].values

X_test = test_df[['mean_return', 'volatility']].values
y_test = test_df['Label_Num'].values

# Custom Naive Bayes
class CustomGaussianNB:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.prior = {}
        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = X_c.mean(axis=0)
            self.var[c] = X_c.var(axis=0) + 1e-9
            self.prior[c] = X_c.shape[0] / X.shape[0]

    def predict(self, X):
        return [self._predict_single(x) for x in X]

    def _predict_single(self, x):
        posteriors = []
        for c in self.classes:
            prior = np.log(self.prior[c])
            log_likelihood = -0.5 * np.sum(np.log(2 * np.pi * self.var[c]))
            log_likelihood -= 0.5 * np.sum(((x - self.mean[c]) ** 2) / self.var[c])
            posteriors.append(prior + log_likelihood)
        return self.classes[np.argmax(posteriors)]

# Train & predict using custom NB
custom_nb = CustomGaussianNB()
custom_nb.fit(X_train, y_train)
y_pred_custom = custom_nb.predict(X_test)

# Train & predict using sklearn
sklearn_nb = GaussianNB()
sklearn_nb.fit(X_train, y_train)
y_pred_sklearn = sklearn_nb.predict(X_test)

# Evaluation
acc_custom = accuracy_score(y_test, y_pred_custom)
cm_custom = confusion_matrix(y_test, y_pred_custom)

acc_sklearn = accuracy_score(y_test, y_pred_sklearn)
cm_sklearn = confusion_matrix(y_test, y_pred_sklearn)

# Results
print("Custom Naive Bayes Accuracy:", round(acc_custom * 100, 2), "%")
print("Custom NB Confusion Matrix:\n", cm_custom)

print("\nSklearn GaussianNB Accuracy:", round(acc_sklearn * 100, 2), "%")
print("Sklearn NB Confusion Matrix:\n", cm_sklearn)


Custom Naive Bayes Accuracy: 88.57 %
Custom NB Confusion Matrix:
 [[55  4]
 [ 8 38]]

Sklearn GaussianNB Accuracy: 88.57 %
Sklearn NB Confusion Matrix:
 [[55  4]
 [ 8 38]]
