In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import yfinance as yf
 
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

# sklearn Regressor Models
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

# Data Preparation
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

# import Visualisation library
from tabulate import tabulate

import ta
from sklearn.decomposition import PCA

# import shap
# import lime.lime_tabular

import warnings
warnings.filterwarnings(
    'ignore', 'invalid value encountered in double_scalars')


In [2]:
# Set the ticker symbol of the company you're interested in
ticker_symbol = "GOOGL"

# Download historical stock price data for different time periods
periods = ["6mo", "1y", "2y", "5y", "8y", "10y", "12y", "15y"]

# Set empty lists to store evaluation metrics
r2_svr_pred_list = []
mse_svr_pred_list = []
mae_svr_pred_list = []

for period in periods:
    df = yf.download(ticker_symbol, period=period, progress=False)

    # Add technical indicators
    # Simple Moving Average (SMA)
    df['SMA_10'] = ta.trend.SMAIndicator(df['Close'], window=10).sma_indicator()

    # Exponential Moving Average (EMA)
    df['EMA_10'] = ta.trend.EMAIndicator(df['Close'], window=10).ema_indicator()

    # Relative Strength Index (RSI)
    df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=10).rsi()

    # Average True Range (ATR)
    df['ATR'] = ta.volatility.AverageTrueRange(
        df['High'], df['Low'], df['Close'], window=14)

    # Moving Average Convergence Divergence (MACD)
    macd = ta.trend.MACD(df['Close'], window_slow=26,
                        window_fast=12, window_sign=9)
    df['MACD'] = macd.macd()
    df['MACD_Signal'] = macd.macd_signal()

    # Drop rows with missing values
    df.dropna(inplace=True)

    # Define the independent and dependent variables
    X = df[['Open', 'High', 'Low', 'Adj Close', 'Volume']]
    y = df['Close']

    # Perform feature selection by selecting features with a correlation coefficient of at least 0.5 with the target variable
    corr = X.corrwith(y)
    corr_threshold = 0.5
    selected_features = corr[abs(corr) > corr_threshold].index.tolist()
    X = X[selected_features]

    # Split the data into training and testing sets
    test_days = 30
    X_train = X[:-test_days]
    y_train = y[:-test_days]
    X_test = X[-test_days:]
    y_test = y[-test_days:]

    # Preprocess the data by scaling it
    scaler = MinMaxScaler(feature_range=(0, 1))
    X_train = scaler.fit_transform(X_train)

    # Apply PolynomialFeatures to the data
    poly = PolynomialFeatures(degree=3, include_bias=True)
    X_train_poly = poly.fit_transform(X_train)

    # Build SVR model
    # Define the parameter grid
    param_grid = {
        'kernel': ['linear', 'poly', 'rbf'],
        'C': [1, 10, 100, 1000],
        'gamma': ['auto', 'scale'],
    }

    # Create a GridSearchCV object with the parameter grid and the Random Forest Regressor model
    grid_search_svr = GridSearchCV(
        estimator=SVR(),
        param_grid=param_grid,
        cv=5,
        n_jobs=-1,
        scoring='neg_mean_squared_error'
    )

    # Fit the GridSearchCV object to the training data
    grid_search_svr.fit(X_train_poly, y_train)

    # Get the best model
    svr_model = grid_search_svr.best_estimator_
    last_index = y_test.head(1).index[0]

    # Download latest 30 days historical price data from Yahoo Finance and store in a pandas DataFrame
    df_actual = yf.download(
        "GOOGL", start=last_index, progress=False)

    number_of_days = len(df_actual)
    last_n_days = df[-number_of_days:]

    last_n_days_df = pd.DataFrame(
        last_n_days, columns=X.columns)

    X_pred = scaler.transform(last_n_days_df)
    X_pred_poly = poly.transform(X_pred)

    # Make predictions for the future days using the SVR
    y_svr_pred = svr_model.predict(X_pred_poly)

    # Add the predictions to the actual data in a new DataFrame
    df_pred = pd.DataFrame({'SVR Prediction': y_svr_pred}, index=df_actual.index)
    df_combined = pd.concat([df_actual, df_pred], axis=1)
    df_combined= df_combined.head(len(df_pred))

    # Test the models and evaluate the performance metrics
    # Calculate evaluation metrics for SVR model
    r2_svr_actual = r2_score(
        df_combined['Close'], df_combined['SVR Prediction'])
    mse_svr_actual = mean_squared_error(
        df_combined['Close'], df_combined['SVR Prediction'])
    mae_svr_actual = mean_absolute_error(
        df_combined['Close'], df_combined['SVR Prediction'])

    # # Create a table to display the evaluation metrics for both SVR and RFR models
    # table = [["Model", "R² (Actual)", "MSE (Actual)", "MAE (Actual)"],
    #         ["SVR", r2_svr_actual, mse_svr_actual, mae_svr_actual]]
    # print(tabulate(table, headers="firstrow", tablefmt="fancy_grid"))

    # # Plot the actual price and the predictions
    # plt.figure(figsize=(16, 8))
    # plt.plot(df_combined.index, df_combined['Close'], label='Actual')
    # plt.plot(df_combined.index,
    #         df_combined['SVR Prediction'], label='SVR Prediction')
    # plt.xlabel('Date')
    # plt.ylabel('Price')
    # plt.legend()
    # plt.show()

    r2_svr_pred_list.append(r2_svr_actual)
    mse_svr_pred_list.append(mse_svr_actual)
    mae_svr_pred_list.append(mae_svr_actual)

svr_table = []

for i in range(len(periods)):
    year = periods[i][:4]
    svr_row = [year, r2_svr_pred_list[i], mse_svr_pred_list[i], mae_svr_pred_list[i]]
    svr_table.append(svr_row)

print("SVR Model")
print(tabulate(svr_table, headers=["Year", "R² Score", 
    "MSE Score", "MAE Score"], tablefmt="fancy_grid"))

SVR Model
╒════════╤════════════╤═════════════╤═════════════╕
│ Year   │   R² Score │   MSE Score │   MAE Score │
╞════════╪════════════╪═════════════╪═════════════╡
│ 6mo    │   0.995691 │ 0.017282    │   0.110389  │
├────────┼────────────┼─────────────┼─────────────┤
│ 1y     │   0.998637 │ 0.00546861  │   0.0706672 │
├────────┼────────────┼─────────────┼─────────────┤
│ 2y     │   0.999816 │ 0.000739876 │   0.0220363 │
├────────┼────────────┼─────────────┼─────────────┤
│ 5y     │   0.999819 │ 0.000725776 │   0.0198492 │
├────────┼────────────┼─────────────┼─────────────┤
│ 8y     │   0.999731 │ 0.00107862  │   0.0283637 │
├────────┼────────────┼─────────────┼─────────────┤
│ 10y    │   0.999804 │ 0.000786217 │   0.0219084 │
├────────┼────────────┼─────────────┼─────────────┤
│ 12y    │   0.999769 │ 0.00092546  │   0.0245032 │
├────────┼────────────┼─────────────┼─────────────┤
│ 15y    │   0.999858 │ 0.000567996 │   0.0176014 │
╘════════╧════════════╧═════════════╧═════════════╛
