In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

df = pd.read_csv('NVDA_data.csv')
df.head()

Unnamed: 0,Year,Week_Number,Open,Adj Close,mean_return,volatility,color_label
0,2018,0,48.945,53.287727,1.98875,3.081352,red
1,2018,1,55.099998,55.162941,0.7018,1.397675,red
2,2018,2,56.012501,56.92683,0.802,1.813348,green
3,2018,3,57.607498,60.197319,1.1348,1.691356,green
4,2018,4,60.685001,57.770416,-0.803,2.021694,red


In [2]:
train_data = df[df['Year'] != 2022].copy()
test_data = df[df['Year'] == 2022].copy()

X_train = train_data[['Open', 'Adj Close', 'mean_return', 'volatility']]
y_train = train_data['color_label']

X_test = test_data[['Open', 'Adj Close', 'mean_return', 'volatility']]
y_test_true = test_data['color_label']


In [3]:
# Decision Tree and its accuracy for Year 2
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)
y_test_pred_dt = decision_tree.predict(X_test)
accuracy_dt = accuracy_score(y_test_true, y_test_pred_dt)
print(f'Accuracy of Decision Tree for Year 2: {accuracy_dt}')

# confusion matrix for Decision Tree
conf_matrix_dt = confusion_matrix(y_test_true, y_test_pred_dt)
print('Confusion Matrix for Decision Tree (Year 2):')
print(conf_matrix_dt)

# sensitivity (recall) and specificity for Decision Tree
tn_dt, fp_dt, fn_dt, tp_dt = conf_matrix_dt.ravel()
sensitivity_dt = tp_dt / (tp_dt + fn_dt)
specificity_dt = tn_dt / (tn_dt + fp_dt)
print(f'True Positive Rate (Sensitivity) for Decision Tree: {sensitivity_dt:.4f}')
print(f'True Negative Rate (Specificity) for Decision Tree: {specificity_dt:.4f}')

Accuracy of Decision Tree for Year 2: 0.8846153846153846
Confusion Matrix for Decision Tree (Year 2):
[[12  2]
 [ 4 34]]
True Positive Rate (Sensitivity) for Decision Tree: 0.8947
True Negative Rate (Specificity) for Decision Tree: 0.8571


In [4]:
test_data['predicted_label'] = y_test_pred_dt
test_data.head()

Unnamed: 0,Year,Week_Number,Open,Adj Close,mean_return,volatility,color_label,predicted_label
211,2022,1,298.149994,272.118683,-1.4652,3.57336,red,red
212,2022,2,265.809998,269.072632,-0.1934,2.768013,red,red
213,2022,3,262.600006,233.438583,-3.4885,0.320452,red,red
214,2022,4,223.300003,228.105469,-0.4078,3.64672,red,red
215,2022,5,231.820007,242.876404,1.3404,4.416611,green,green


# Trading strategy for 2022 to calculate the amount at the end of the year using predicted labels

In [6]:
def trading_strategy_predicted_logistic_reg(df, initial_balance=100):
    df = df.reset_index(drop=True)  # Reset the index to numeric values
    balance = initial_balance
    position = None
    balance_history = []

    for week in range(len(df)):  # Adjust the loop condition
        open_price = df.loc[week, 'Open']
        close_price = df.loc[week, 'Adj Close']

        if df.loc[week, 'predicted_label'] == 'green':
            if position is None:
                shares_to_buy = balance / open_price
                print(f"Week {week}: Buying {shares_to_buy:.2f} shares at ${open_price:.2f}")
                position = shares_to_buy
                balance = 0
                print(f"   Shares: {position:.2f}, Portfolio Value: ${balance:.2f}")

        elif df.loc[week, 'predicted_label'] == 'red':
            if position is not None:
                balance_from_selling = position * df.loc[week - 1, 'Adj Close']
                print(f"Week {week}: Selling {position:.2f} shares at ${df.loc[week - 1, 'Adj Close']:.2f}, "
                      f"Portfolio Value: ${balance_from_selling:.2f}")
                balance += balance_from_selling
                position = None
                print(f"   Portfolio Value: ${balance:.2f}")

        if position is None:
            portfolio_value = balance
        else:
            portfolio_value = position * df.loc[week - 1, 'Adj Close']

        balance_history.append(portfolio_value)
        print(f"Week {week}: Portfolio Value: ${portfolio_value:.2f}")

    return balance_history

resulting_balance_history_lr = trading_strategy_predicted_logistic_reg(test_data)
print("Balance History based on Modified Predicted Labels for 2022:", resulting_balance_history_lr[-1])


Week 0: Portfolio Value: $100.00
Week 1: Portfolio Value: $100.00
Week 2: Portfolio Value: $100.00
Week 3: Portfolio Value: $100.00
Week 4: Buying 0.43 shares at $231.82
   Shares: 0.43, Portfolio Value: $0.00
Week 4: Portfolio Value: $98.40
Week 5: Selling 0.43 shares at $242.88, Portfolio Value: $104.77
   Portfolio Value: $104.77
Week 5: Portfolio Value: $104.77
Week 6: Portfolio Value: $104.77
Week 7: Portfolio Value: $104.77
Week 8: Portfolio Value: $104.77
Week 9: Portfolio Value: $104.77
Week 10: Buying 0.48 shares at $218.69
   Shares: 0.48, Portfolio Value: $0.00
Week 10: Portfolio Value: $105.76
Week 11: Selling 0.48 shares at $264.23, Portfolio Value: $126.59
   Portfolio Value: $126.59
Week 11: Portfolio Value: $126.59
Week 12: Portfolio Value: $126.59
Week 13: Portfolio Value: $126.59
Week 14: Portfolio Value: $126.59
Week 15: Portfolio Value: $126.59
Week 16: Portfolio Value: $126.59
Week 17: Buying 0.68 shares at $185.41
   Shares: 0.68, Portfolio Value: $0.00
Week 17: P