### data_science_assignment_linear_discriminant

In [19]:
import pandas as pd
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the data
data = pd.read_csv('AAL.csv')

# Filter Year 1 and Year 2 data
data['Date'] = pd.to_datetime(data['Date'])
year1_data = data[data['Year'] == 2022]
year2_data = data[data['Year'] == 2023]

# Extract weekly mean and standard deviation of returns for each week in Year 1 and Year 2
def calculate_weekly_features(df):
    weekly_features = df.groupby('Year_Week').agg(
        weekly_mean=('Return', 'mean'),
        weekly_std=('Return', 'std'),
        label=('Label', 'last')  # Using the label from the last day of the week (Friday)
    ).reset_index()
    return weekly_features

year1_features = calculate_weekly_features(year1_data)
year2_features = calculate_weekly_features(year2_data)

# Prepare training and testing data
X_train = year1_features[['weekly_mean', 'weekly_std']]
y_train = year1_features['label']
X_test = year2_features[['weekly_mean', 'weekly_std']]
y_test = year2_features['label']

# Initialize classifiers
lda = LinearDiscriminantAnalysis()
qda = QuadraticDiscriminantAnalysis()

# Train classifiers on Year 1 data
lda.fit(X_train, y_train)
qda.fit(X_train, y_train)

# Predict on Year 2 data
y_pred_lda = lda.predict(X_test)
y_pred_qda = qda.predict(X_test)

# Calculate accuracy for Year 2
accuracy_lda = accuracy_score(y_test, y_pred_lda)
accuracy_qda = accuracy_score(y_test, y_pred_qda)

# Calculate confusion matrix, TPR, TNR for each classifier
conf_matrix_lda = confusion_matrix(y_test, y_pred_lda, labels=['Green', 'Red'])
conf_matrix_qda = confusion_matrix(y_test, y_pred_qda, labels=['Green', 'Red'])

# True Positive Rate (TPR) and True Negative Rate (TNR) for Linear Discriminant
tp_lda = conf_matrix_lda[0, 0]
fn_lda = conf_matrix_lda[0, 1]
fp_lda = conf_matrix_lda[1, 0]
tn_lda = conf_matrix_lda[1, 1]
tpr_lda = tp_lda / (tp_lda + fn_lda)
tnr_lda = tn_lda / (tn_lda + fp_lda)

# True Positive Rate (TPR) and True Negative Rate (TNR) for Quadratic Discriminant
tp_qda = conf_matrix_qda[0, 0]
fn_qda = conf_matrix_qda[0, 1]
fp_qda = conf_matrix_qda[1, 0]
tn_qda = conf_matrix_qda[1, 1]
tpr_qda = tp_qda / (tp_qda + fn_qda)
tnr_qda = tn_qda / (tn_qda + fp_qda)

# Calculate covariance matrices for each class manually
green_class_data = X_train[y_train == 'Green']
red_class_data = X_train[y_train == 'Red']
cov_green = np.cov(green_class_data, rowvar=False)
cov_red = np.cov(red_class_data, rowvar=False)

# Print classifier parameters
print("1. Linear Discriminant Analysis equation parameters:", lda.coef_)
print("   Quadratic Discriminant Analysis parameters:")
print("   - Means for each class (Green, Red):")
print("     Green Mean:", qda.means_[0])
print("     Red Mean:", qda.means_[1])
print("   - Covariance matrices for each class:")
print("     Green Covariance Matrix:\n", cov_green)
print("     Red Covariance Matrix:\n", cov_red)

# Question 2: Accuracy for Year 2 for each classifier
print(f"2. LDA Accuracy for Year 2: {accuracy_lda}")
print(f"   QDA Accuracy for Year 2: {accuracy_qda}")

# Question 3: Confusion Matrix for Year 2 for each classifier
print("3. LDA Confusion Matrix for Year 2:\n", conf_matrix_lda)
print("   QDA Confusion Matrix for Year 2:\n", conf_matrix_qda)

# Question 4: TPR and TNR for Year 2 for each classifier
print(f"4. LDA TPR: {tpr_lda}, LDA TNR: {tnr_lda}")
print(f"   QDA TPR: {tpr_qda}, QDA TNR: {tnr_qda}")

# Adjust simulate_trading to apply gains for "Green" predictions and losses for "Red"
def simulate_trading(predictions, actual_returns, initial_balance=100):
    balance = initial_balance
    for pred, weekly_return in zip(predictions, actual_returns):
        if pred == 'Green':
            balance *= 1 + weekly_return
        else:
            balance *= 1 - abs(weekly_return)
    return balance

# Extract weekly returns for Year 2
weekly_returns = year2_features['weekly_mean']

# Calculate final balances using actual weekly returns
final_balance_lda = simulate_trading(y_pred_lda, weekly_returns)
final_balance_qda = simulate_trading(y_pred_qda, weekly_returns)

# Buy-and-hold strategy
buy_and_hold_balance = initial_balance * (1 + weekly_returns.mean() * 52)

# Question 5: Strategy comparison
print(f"5. Final balance with LDA strategy: ${final_balance_lda}")
print(f"   Final balance with QDA strategy: ${final_balance_qda}")
print(f"   Final balance with buy-and-hold strategy: ${buy_and_hold_balance}")


1. Linear Discriminant Analysis equation parameters: [[-314.94459212   52.64272802]]
   Quadratic Discriminant Analysis parameters:
   - Means for each class (Green, Red):
     Green Mean: [0.01240089 0.0321911 ]
     Red Mean: [-0.01259285  0.0336222 ]
   - Covariance matrices for each class:
     Green Covariance Matrix:
 [[9.13366022e-05 3.19617527e-05]
 [3.19617527e-05 1.38242885e-04]]
     Red Covariance Matrix:
 [[7.75452200e-05 2.57898578e-05]
 [2.57898578e-05 2.55470534e-04]]
2. LDA Accuracy for Year 2: 0.9038461538461539
   QDA Accuracy for Year 2: 0.9038461538461539
3. LDA Confusion Matrix for Year 2:
 [[24  0]
 [ 5 23]]
   QDA Confusion Matrix for Year 2:
 [[24  0]
 [ 5 23]]
4. LDA TPR: 1.0, LDA TNR: 0.8214285714285714
   QDA TPR: 1.0, QDA TNR: 0.8214285714285714
5. Final balance with LDA strategy: $102.10239782129852
   Final balance with QDA strategy: $102.10239782129852
   Final balance with buy-and-hold strategy: $102.42078269999999


### data_science_module_assignment_naive_bayesian

In [21]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the data
data = pd.read_csv('AAL.csv')

# Filter Year 1 and Year 2 data
data['Date'] = pd.to_datetime(data['Date'])
year1_data = data[data['Year'] == 2022]
year2_data = data[data['Year'] == 2023]

# Extract weekly mean and standard deviation of returns for each week in Year 1 and Year 2
def calculate_weekly_features(df):
    weekly_features = df.groupby('Year_Week').agg(
        weekly_mean=('Return', 'mean'),
        weekly_std=('Return', 'std'),
        label=('Label', 'last')  # Using the label from the last day of the week (Friday)
    ).reset_index()
    return weekly_features

year1_features = calculate_weekly_features(year1_data)
year2_features = calculate_weekly_features(year2_data)

# Prepare training and testing data
X_train = year1_features[['weekly_mean', 'weekly_std']]
y_train = year1_features['label']
X_test = year2_features[['weekly_mean', 'weekly_std']]
y_test = year2_features['label']

# Initialize Gaussian Naive Bayes classifier
gnb = GaussianNB()

# Train classifier on Year 1 data
gnb.fit(X_train, y_train)

# Predict on Year 2 data
y_pred_gnb = gnb.predict(X_test)

# 1. Calculate accuracy for Year 2
accuracy_gnb = accuracy_score(y_test, y_pred_gnb)
print(f"1. Gaussian Naive Bayes Accuracy: {accuracy_gnb}")

# 2. Calculate confusion matrix
conf_matrix_gnb = confusion_matrix(y_test, y_pred_gnb, labels=['Green', 'Red'])
print("2. Confusion Matrix for Gaussian Naive Bayes:")
print(conf_matrix_gnb)

# 3. Calculate TPR and TNR for Gaussian Naive Bayes
tp_gnb = conf_matrix_gnb[0, 0]
fn_gnb = conf_matrix_gnb[0, 1]
fp_gnb = conf_matrix_gnb[1, 0]
tn_gnb = conf_matrix_gnb[1, 1]
tpr_gnb = tp_gnb / (tp_gnb + fn_gnb)  # True Positive Rate
tnr_gnb = tn_gnb / (tn_gnb + fp_gnb)  # True Negative Rate
print(f"3. TPR (True Positive Rate): {tpr_gnb}")
print(f"   TNR (True Negative Rate): {tnr_gnb}")

# 4. Implement trading strategy based on Gaussian Naive Bayes predictions for Year 2 using actual returns
initial_balance = 100  # Starting with $100

def simulate_trading(predictions, weekly_returns, initial_balance=100):
    balance = initial_balance
    for pred, weekly_return in zip(predictions, weekly_returns):
        balance *= 1 + weekly_return if pred == 'Green' else 1 - weekly_return
    return balance

# Extract weekly returns for Year 2
weekly_returns = year2_features['weekly_mean']

# Calculate final balance using actual weekly returns with the Naive Bayes strategy
final_balance_gnb = simulate_trading(y_pred_gnb, weekly_returns)

# Buy-and-hold strategy
buy_and_hold_balance = initial_balance * (1 + weekly_returns.mean() * 52)  # Average weekly return over the year

# 5. Strategy comparison
print(f"4. Final balance with Gaussian Naive Bayes strategy: ${final_balance_gnb}")
print(f"   Final balance with buy-and-hold strategy: ${buy_and_hold_balance}")



1. Gaussian Naive Bayes Accuracy: 0.9615384615384616
2. Confusion Matrix for Gaussian Naive Bayes:
[[23  1]
 [ 1 27]]
3. TPR (True Positive Rate): 0.9583333333333334
   TNR (True Negative Rate): 0.9642857142857143
4. Final balance with Gaussian Naive Bayes strategy: $159.35863589095877
   Final balance with buy-and-hold strategy: $102.42078269999999
