In [None]:
# pip install pandas numpy scikit-learn matplotlib yfinance

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

from graphing import graph_normal, show_results, graph_line
from results import get_num_correct_direction_difference
from get_data import get_apple_stock_split , get_recent_apple_stock_split

In [None]:
scaler_X = MinMaxScaler(feature_range=(-1, 1))
scaler_Y = MinMaxScaler(feature_range=(-1, 1))

DAYS_LAG = 5

X_train, X_test, y_train, y_test = get_apple_stock_split(DAYS_LAG)

X_train = scaler_X.fit_transform(X_train)
y_train = scaler_Y.fit_transform(y_train)

X_test = scaler_X.transform(X_test)
y_test = scaler_Y.transform(y_test)
print(f"Training samples: {len(X_train)}, Testing samples: {len(X_test)}")

In [None]:
print(np.mean(y_test))
print(np.std(y_test, ddof=1))

print(np.mean(y_train))
print(np.std(y_train, ddof=1))

<h1> GridSearch setup </h1>

In [None]:
#Define the different hyperparameters for a support vector machine
param_grid = {
    'kernel': ['linear', 'rbf'],           # The method for transforming the data into anthor dimension so the SVM can find a hyperplane 
    'C': [0.1, 1],                # Paramter for penalizing misclassifications - effects under or over fitting
    'gamma': ['auto'],            # Controlls weight of each point, lower gamma - smoother decision boundary, more generalised
    'epsilon': [0.01, 0.1],               # Defines the margin where predictions are considered correct.
    'degree': [2]                          # Degee of polynomial
}



# param_grid = {
#     'kernel': ['linear', 'rbf', 'poly'],   # The method for transforming the data into anthor dimension so the SVM can find a hyperplane 
#     'C': [0.1, 1, 10, 100],                # Paramter for penalizing misclassifications - effects under or over fitting
#     'gamma': ['scale', 'auto'],            # Controlls weight of each point, lower gamma - smoother decision boundary, more generalised
#     'epsilon': [0.01, 0.1, 0.5, 1],        # Defines the margin where predictions are considered correct.
#     'degree': [2, 3 , 4]                   # Degee of polynomial
# }

In [None]:
#training
svr = SVR()

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=svr,
    param_grid=param_grid,
    cv=3,                                      # Cross-validation
    scoring='neg_mean_absolute_error',         # Use mse as scoring
    verbose=100,                               # How Much is printed in Console
    n_jobs=-1                                  # Use all available cores
)

# Perform grid search on training data
grid_search.fit(X_train, y_train.ravel())

results_df = pd.DataFrame(grid_search.cv_results_)

In [None]:
# use best model to predict test data
final_model = grid_search.best_estimator_

# Predict on test data
predictions = final_model.predict(X_test)

# Evaluate the final model
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)

print(f"Final Model MAE: {mae}")
print(f"Final Model RMSE: {rmse}")

graph_normal(predictions, y_test, "test data")

In [None]:
inverse_scaled_prediction = scaler_Y.inverse_transform(predictions.reshape(-1,1))
get_num_correct_direction_difference(inverse_scaled_prediction, scaler_Y.inverse_transform(y_test.reshape(-1,1)))

In [None]:
# trying to predict more recent ones
X, y = get_recent_apple_stock_split(DAYS_LAG)

print(f"samples: {len(X)}")

#preprocess
X_scaled = scaler_X.transform(X)
y_scaled = scaler_Y.transform(y)
predictions = grid_search.best_estimator_.predict(X_scaled)

mae = mean_absolute_error(y_scaled, predictions)
mse = mean_squared_error(y_scaled, predictions)
rmse = np.sqrt(mse)

graph_normal(predictions, y_scaled, "recent data predicitons")
graph_line(predictions, y_scaled, "recent data predicitons")
get_num_correct_direction_difference(predictions,y_scaled.reshape(-1,1))