In [2]:
# Import dependencies
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVR
from pathlib import Path

In [4]:
# Import data
file_path = Path("Resources/sp500_adj_close_raw.csv")
df = pd.read_csv(file_path)

# Convert date to datetime data type
df["Date"] = pd.to_datetime(df["Date"])
df.set_index('Date', inplace=True)


In [6]:
# Feature engineering
df['Year'] = df.index.year
df['Month'] = df.index.month
df['Day'] = df.index.day
df['Day_of_Week'] = df.index.dayofweek

In [8]:
# Initialize the LabelEncoder and fit it to the Action column
le_action = LabelEncoder()
df['Action'] = le_action.fit_transform(df['Action'])
df.head()

Unnamed: 0_level_0,Ticker,Adjusted Close,Next Day Close,Previous Day Close,Return,Volatility,RSI,SMA_50,SMA_100,SMA_200,Upper Band,Lower Band,Support,Resistance,Action,Year,Month,Day,Day_of_Week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2008-01-02,A,23.256376,23.025747,23.538282,-0.011976,0.015705,48.827477,23.314175,23.299887,23.564934,24.727253,22.54023,21.392036,24.351934,3,2008,1,2,2
2008-01-02,AAPL,5.876341,5.879056,5.974059,-0.016357,0.018937,59.067276,5.518483,4.939064,4.19763,6.135834,5.403559,4.637375,6.026841,0,2008,1,2,2
2008-01-02,ABT,18.13021,18.019756,18.24066,-0.006055,0.010484,34.677418,18.138458,17.62825,17.709028,19.233109,18.221804,16.775558,19.134012,3,2008,1,2,2
2008-01-02,ACGL,7.608889,7.764444,7.816667,-0.026581,0.016022,45.15419,7.785511,7.878933,7.874161,8.114465,7.378535,7.463333,8.307778,0,2008,1,2,2
2008-01-02,ACN,26.43708,25.982517,26.415075,0.000833,0.024039,54.81225,26.577984,27.784422,28.471031,28.227203,24.273777,24.765512,29.215675,2,2008,1,2,2


In [10]:
# Define X and Y variables for train and test datasets
y = df['Action'].astype(int)  # Ensure y is an integer
X = df.drop(columns=["Action", "Previous Day Close", "Resistance", "Upper Band", "SMA_50", "SMA_200", "Next Day Close", "Ticker"])

In [12]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [14]:
# Scale the data using standard scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
# Define the parameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 0.5],
    'kernel': ['linear', 'rbf']
}


In [None]:
# Initialize and run GridSearchCV with parallel processing
grid_svr = GridSearchCV(SVR(), param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2, n_jobs=-1)
grid_svr.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


In [None]:
# Best hyperparameters and corresponding score
print("Best Parameters:", grid_svr.best_params_)
print("Best Cross-Validation Score:", -grid_svr.best_score_)

In [None]:
# Fit the model with the best found hyperparameters
best_svr = grid_svr.best_estimator_
best_svr.fit(X_train_scaled, y_train)

In [None]:
# Predict on the test set using the best model
y_pred = best_svr.predict(X_test_scaled)

In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error for Best SVR Model:", mse)

In [None]:
# Predict actions for the last day and export to CSV
last_day_svr = df.loc[df.index == df.index.max()].drop(columns=["Action", "Previous Day Close", "Resistance", "Upper Band", "SMA_50", "SMA_200", "Next Day Close"])


In [None]:
# Preserve the Ticker column from the last day
last_day_tickers = last_day_svr['Ticker']

In [None]:
# Drop the Ticker column before scaling
X_last_day_svr = last_day_svr.drop(columns=["Ticker"])

In [None]:
# Standardize the last day data
last_day_scaled = scaler.transform(X_last_day_svr)
predicted_actions = best_svr.predict(last_day_scaled)

In [None]:
# Add predictions and ticker back to the DataFrame
last_day_svr['Predicted_Action'] = predicted_actions
last_day_svr['Ticker'] = last_day_tickers

In [None]:
# Display the last day's predictions with Ticker
print(last_day_svr[['Ticker', 'Adjusted Close', 'Predicted_Action']])

In [None]:
# Export the predictions
last_day_svr.to_csv('predicted_actions_last_day_SVR.csv')