In [28]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import yfinance as yf

In [29]:
tickers = ['AAPL', 'MSFT']  # Pairs Trading so 2 tickers
start_date = '2020-01-01'
end_date = '2021-01-01' # 1 year data
data = yf.download(tickers, start=start_date, end=end_date)['Adj Close']

[*********************100%%**********************]  2 of 2 completed


In [30]:
# Data preprocessing and Feature Selection
window = 20
rolling_mean = data.rolling(window=window).mean()
rolling_std = data.rolling(window=window).std()
z_score = (data - rolling_mean) / rolling_std
features = pd.DataFrame()
features['AAPL_z_score'] = z_score['AAPL']  
features['MSFT_z_score'] = z_score['MSFT']
features['AAPL_z_score_lag'] = features['AAPL_z_score'].shift(1)
features['MSFT_z_score_lag'] = features['MSFT_z_score'].shift(1)
features.dropna(inplace=True)

In [31]:
# Generating Trading Signals

signals = pd.DataFrame(index=features.index)
signals['signal'] = 0
signals.loc[features['AAPL_z_score'] < -2, 'signal'] = 1  # Buy signal
signals.loc[features['AAPL_z_score'] > 2, 'signal'] = -1  # Sell signal
signals.loc[features['MSFT_z_score'] < -2, 'signal'] = 1  # Buy signal
signals.loc[features['MSFT_z_score'] > 2, 'signal'] = -1  # Sell signal


In [32]:
X = features[['AAPL_z_score', 'MSFT_z_score', 'AAPL_z_score_lag', 'MSFT_z_score_lag']].values
y = signals['signal'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [33]:
# Train Model
model = LogisticRegression()
model.fit(X_train, y_train)

In [34]:
# Acuuracy Score on Training Data
y_train_pred = model.predict(X_train)
accuracy_train = accuracy_score(y_train, y_train_pred)
report_train = classification_report(y_train, y_train_pred)
print("Accuracy on training set:", accuracy_train)
print("Classification Report on training set:\n", report_train)

Accuracy on training set: 0.9408602150537635
Classification Report on training set:
               precision    recall  f1-score   support

          -1       0.92      0.79      0.85        28
           0       0.94      0.99      0.97       154
           1       1.00      0.25      0.40         4

    accuracy                           0.94       186
   macro avg       0.95      0.67      0.74       186
weighted avg       0.94      0.94      0.94       186



In [35]:
# Accuracy score on Test data
y_test_pred = model.predict(X_test)
accuracy_test = accuracy_score(y_test, y_test_pred)
report_test = classification_report(y_test, y_test_pred)
print("Accuracy on testing set:", accuracy_test)
print("Classification Report on testing set:\n", report_test)

Accuracy on testing set: 0.8723404255319149
Classification Report on testing set:
               precision    recall  f1-score   support

          -1       0.80      0.80      0.80         5
           0       0.88      0.97      0.92        36
           1       1.00      0.33      0.50         6

    accuracy                           0.87        47
   macro avg       0.89      0.70      0.74        47
weighted avg       0.88      0.87      0.85        47

