In [4]:
from google.colab import files
uploaded = files.upload()


Saving Processed_Stock_Data.csv to Processed_Stock_Data (1).csv
Saving NVDA.csv to NVDA.csv


In [5]:
# Trading Strategy Analysis using Linear Models (2020-2024)

# Install required packages (uncomment if running on Google Colab)
# !pip install pandas scikit-learn

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Upload the required files manually on Colab
# Example:
# from google.colab import files
# uploaded = files.upload()

# Load processed stock data (with labels)
stock_df = pd.read_csv("Processed_Stock_Data.csv")

# Convert Label to numeric
stock_df['Label_Num'] = stock_df['Label'].map({'Green': 1, 'Red': 0})

# Split into train (2020-2022) and test (2023-2024)
train_df = stock_df[stock_df['Year'].between(2020, 2022)]
test_df = stock_df[stock_df['Year'].between(2023, 2024)]

X_train_A = train_df[['mean_return']]
X_train_B = train_df[['mean_return', 'volatility']]
y_train = train_df['Label_Num']

X_test_A = test_df[['mean_return']]
X_test_B = test_df[['mean_return', 'volatility']]
y_test = test_df['Label_Num']

# Train models
model_a = LogisticRegression()
model_a.fit(X_train_A, y_train)
y_pred_a = model_a.predict(X_test_A)

model_b = LogisticRegression()
model_b.fit(X_train_B, y_train)
y_pred_b = model_b.predict(X_test_B)

# Accuracy
print("Model A Accuracy:", accuracy_score(y_test, y_pred_a))
print("Model B Accuracy:", accuracy_score(y_test, y_pred_b))

# Load actual weekly open/close prices for 2023-2024
price_df = pd.read_csv("NVDA.csv")
price_df = price_df[price_df['Year'].between(2023, 2024)]
weekly_prices = price_df.groupby(['Year', 'Week_Number']).agg(
    Open_Price=('Open', 'first'),
    Close_Price=('Close', 'last')
).reset_index()

# Merge with predictions
test_df = test_df.copy()
test_df['Predicted_Label_A'] = y_pred_a
test_df['Predicted_Label_B'] = y_pred_b
test_df = test_df.merge(weekly_prices, on=['Year', 'Week_Number'], how='left')

# Trading strategy simulation
def simulate_real_trading(df, label_col):
    cash = 100
    shares = 0
    position = False
    for i in range(len(df)):
        row = df.iloc[i]
        label = row[label_col]
        open_price = row['Open_Price']
        close_price = row['Close_Price']

        if pd.isna(open_price) or pd.isna(close_price):
            continue

        if i == 0 and label == 1:
            shares = cash / open_price
            cash = 0
            position = True

        elif label == 0 and position:
            cash = shares * close_price
            shares = 0
            position = False

        elif label == 1 and not position:
            shares = cash / open_price
            cash = 0
            position = True

    return cash + (shares * close_price if position else 0)

final_val_a = simulate_real_trading(test_df, 'Predicted_Label_A')
final_val_b = simulate_real_trading(test_df, 'Predicted_Label_B')

print("\nFinal Portfolio Value (Model A): $", round(final_val_a, 2))
print("Final Portfolio Value (Model B): $", round(final_val_b, 2))


Model A Accuracy: 0.6666666666666666
Model B Accuracy: 0.8666666666666667

Final Portfolio Value (Model A): $ 1302.6
Final Portfolio Value (Model B): $ 1390.12


In [6]:
# Trend Change Detection using Logistic Regression
# Assignment: CS-677 - Detecting Transitions Between Red/Green Weeks

# Install required packages if using Colab
# !pip install pandas scikit-learn

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Upload the file manually if using Colab:
# from google.colab import files
# uploaded = files.upload()

# Load processed stock data (with red/green labels)
df = pd.read_csv("Processed_Stock_Data.csv")

# Map labels to binary
df['Label_Num'] = df['Label'].map({'Green': 1, 'Red': 0})

# Sort by time
df.sort_values(by=['Year', 'Week_Number'], inplace=True)

# Create Trend_Change label
df['Prev_Label'] = df['Label_Num'].shift(1)
df['Trend_Change'] = (df['Label_Num'] != df['Prev_Label']).astype(int)
df['Trend_Change'] = df['Trend_Change'].fillna(1)  # mark first row as trend change

# Split train/test by year
train_df = df[df['Year'].between(2020, 2022)]
test_df = df[df['Year'].between(2023, 2024)]

# Features and target
X_train = train_df[['mean_return', 'volatility']]
y_train = train_df['Trend_Change']

X_test = test_df[['mean_return', 'volatility']]
y_test = test_df['Trend_Change']

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Output results
print("Accuracy:", round(accuracy * 100, 2), "%")
print("Confusion Matrix:\n", conf_matrix)

# Sample interpretation:
# [TN, FP]
# [FN, TP]  <- change correctly predicted (true positives)


Accuracy: 49.52 %
Confusion Matrix:
 [[33 19]
 [34 19]]
