## svm

In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the data
data = pd.read_csv('AAL.csv')

# Filter Year 1 and Year 2 data
data['Date'] = pd.to_datetime(data['Date'])
data['Year'] = data['Date'].dt.year
year1_data = data[data['Year'] == 2022]
year2_data = data[data['Year'] == 2023]

# Extract weekly mean and standard deviation of returns for each week in Year 1 and Year 2
def calculate_weekly_features(df):
    weekly_features = df.groupby('Year_Week').agg(
        weekly_mean=('Return', 'mean'),
        weekly_std=('Return', 'std'),
        label=('Label', 'last')  # Using the label from the last day of the week (Friday)
    ).reset_index()
    return weekly_features

year1_features = calculate_weekly_features(year1_data)
year2_features = calculate_weekly_features(year2_data)

# Prepare training and testing data
X_train = year1_features[['weekly_mean', 'weekly_std']]
y_train = year1_features['label']
X_test = year2_features[['weekly_mean', 'weekly_std']]
y_test = year2_features['label']

# Question 1: Linear SVM
linear_svm = SVC(kernel='linear', random_state=42)
linear_svm.fit(X_train, y_train)
y_pred_linear = linear_svm.predict(X_test)
accuracy_linear = accuracy_score(y_test, y_pred_linear)
print(f"1. Linear SVM Accuracy for Year 2: {accuracy_linear}")

# Question 2: Confusion Matrix for Linear SVM
conf_matrix_linear = confusion_matrix(y_test, y_pred_linear, labels=['Green', 'Red'])
print("2. Confusion Matrix for Linear SVM:")
print(conf_matrix_linear)

# Question 3: TPR and TNR for Linear SVM
tp_linear = conf_matrix_linear[0, 0]  # True Positives
fn_linear = conf_matrix_linear[0, 1]  # False Negatives
fp_linear = conf_matrix_linear[1, 0]  # False Positives
tn_linear = conf_matrix_linear[1, 1]  # True Negatives
tpr_linear = tp_linear / (tp_linear + fn_linear) if (tp_linear + fn_linear) > 0 else 0  # True Positive Rate
tnr_linear = tn_linear / (tn_linear + fp_linear) if (tn_linear + fp_linear) > 0 else 0  # True Negative Rate
print(f"3. TPR (True Positive Rate): {tpr_linear}")
print(f"   TNR (True Negative Rate): {tnr_linear}")

# Question 4: Gaussian SVM
gaussian_svm = SVC(kernel='rbf', random_state=42)
gaussian_svm.fit(X_train, y_train)
y_pred_gaussian = gaussian_svm.predict(X_test)
accuracy_gaussian = accuracy_score(y_test, y_pred_gaussian)
print(f"4. Gaussian SVM Accuracy for Year 2: {accuracy_gaussian}")
print(f"   Is Gaussian SVM better than Linear SVM? {'Yes' if accuracy_gaussian > accuracy_linear else 'No'}")

# Question 5: Polynomial SVM (degree 2)
poly_svm = SVC(kernel='poly', degree=2, random_state=42)
poly_svm.fit(X_train, y_train)
y_pred_poly = poly_svm.predict(X_test)
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print(f"5. Polynomial SVM (Degree 2) Accuracy for Year 2: {accuracy_poly}")
print(f"   Is Polynomial SVM better than Linear SVM? {'Yes' if accuracy_poly > accuracy_linear else 'No'}")

# Question 6: Trading Strategy Comparison
initial_balance = 100  # Starting with $100

def simulate_trading(predictions, weekly_returns, initial_balance=100):
    balance = initial_balance
    for pred, weekly_return in zip(predictions, weekly_returns):
        balance *= 1 + weekly_return if pred == 'Green' else 1 - weekly_return
    return balance

# Extract weekly returns for Year 2
weekly_returns = year2_features['weekly_mean']

# Calculate final balance using actual weekly returns with Linear SVM strategy
final_balance_linear = simulate_trading(y_pred_linear, weekly_returns)

# Buy-and-hold strategy
buy_and_hold_balance = initial_balance * (1 + weekly_returns.mean() * 52)  # Average weekly return over the year

print(f"6. Final balance with Linear SVM strategy: ${final_balance_linear:.2f}")
print(f"   Final balance with buy-and-hold strategy: ${buy_and_hold_balance:.2f}")
print(f"   Which strategy performed better? {'Linear SVM' if final_balance_linear > buy_and_hold_balance else 'Buy-and-Hold'}")


1. Linear SVM Accuracy for Year 2: 1.0
2. Confusion Matrix for Linear SVM:
[[24  0]
 [ 0 28]]
3. TPR (True Positive Rate): 1.0
   TNR (True Negative Rate): 1.0
4. Gaussian SVM Accuracy for Year 2: 0.9807692307692307
   Is Gaussian SVM better than Linear SVM? No
5. Polynomial SVM (Degree 2) Accuracy for Year 2: 0.9230769230769231
   Is Polynomial SVM better than Linear SVM? No
6. Final balance with Linear SVM strategy: $159.97
   Final balance with buy-and-hold strategy: $102.42
   Which strategy performed better? Linear SVM
