In [1]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import TimeSeriesSplit

# Stock Price Prediction using Linear Regression, Support Vector Machine(SVM) and Random Forest

In [2]:
def Standard_Scaling(df, features, feature_transform):
    # Standardize the features
    scaler = StandardScaler()
    feature_transform = scaler.fit_transform(df[features])
    feature_transform= pd.DataFrame(columns=features, data=feature_transform, index=df.index)
    feature_transform.head()

    return feature_transform

In [3]:
def Train_Test_Split_BasicModel(df,feature_transform):
    output_var = pd.DataFrame(df['Close'])
    
    #Splitting to Training set and Test set
    timesplit= TimeSeriesSplit(n_splits=10)
    for train_index, test_index in timesplit.split(feature_transform):
            X_train, X_test = feature_transform[:len(train_index)], feature_transform[len(train_index): (len(train_index)+len(test_index))]
            y_train, y_test = output_var[:len(train_index)].values.ravel(), output_var[len(train_index): (len(train_index)+len(test_index))].values.ravel()

    return X_train, X_test,y_train, y_test

# Linear Regression Model

In [4]:
def Prediction_LR(Basic_X_train, Basic_X_test,Basic_y_train, Basic_y_test):
    # Linear Regression model
    linear_model=LinearRegression()
    
    linear_model.fit(X_train, y_train)
    linear_y_pred = linear_model.predict(X_test)
    
    # Calculating Evaluation Matrix
    linear_mse = mean_squared_error(y_test, linear_y_pred)
    linear_mae = mean_absolute_error(y_test, linear_y_pred)
    linear_rmse = np.sqrt(linear_mse)

    return linear_model,linear_y_pred,linear_mse,linear_mae,linear_rmse

In [5]:
def Visualize_LR(linear_model,linear_y_pred,linear_mse,linear_mae,linear_rmse):
    
    # MSE,MAE,RMSE
    print(f'LR Mean Squared Error (MSE): {linear_mse}')
    print(f'LR Mean Absolute Error (MAE): {linear_mae}')
    print(f'LR Root Mean Squared Error (RMSE): {linear_rmse}')

    # Plot the results
    plt.figure(figsize=(14, 7))
    plt.plot(y_test, label='True Value')
    plt.plot(linear_y_pred, label=f'{linear_model} Value')
    plt.title(f'Prediction by Linear Regression')
    plt.xlabel('Time Scale')
    plt.ylabel('Scaled USD')
    plt.legend()
    plt.grid(True)
    plt.show() 

# SVM Model

In [6]:
def Prediction_SVM(Basic_X_train, Basic_X_test,Basic_y_train, Basic_y_test):
    # SVM model
    svm_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
    
    svm_model.fit(X_train, y_train)
    svm_y_pred = svm_model.predict(X_test)
    
    # Calculating Evaluation Matrix
    svm_mse = mean_squared_error(y_test, svm_y_pred)
    svm_mae = mean_absolute_error(y_test, svm_y_pred)
    svm_rmse = np.sqrt(svm_mse)

    return svm_model,svm_y_pred, svm_mse,svm_mae,svm_rmse

In [7]:
def Visualize_SVM(svm_model,svm_y_pred, svm_mse,svm_mae,svm_rmse):

    # MSE,MAE,RMSE
    print(f'SVM Mean Squared Error (MSE): {svm_mse}')
    print(f'SVM Mean Absolute Error (MAE): {svm_mae}')
    print(f'SVM Root Mean Squared Error (RMSE): {svm_rmse}')
    
    # Plot the results
    plt.figure(figsize=(14, 7))
    plt.plot(y_test, label='True Value')
    plt.plot(svm_y_pred, label=f'{svm_model} Value')
    plt.title(f'Prediction by SVM')
    plt.xlabel('Time Scale')
    plt.ylabel('Scaled USD')
    plt.legend()
    plt.grid(True)
    plt.show() 

# Random Forest Model

In [8]:
def Prediction_RF(Basic_X_train, Basic_X_test,Basic_y_train, Basic_y_test):
    # Random Forest Model
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    
    rf_model.fit(X_train, y_train)
    rf_y_pred = rf_model.predict(X_test)
    
    # Calculating Evaluation Matrix
    rf_mse = mean_squared_error(y_test, rf_y_pred)
    rf_mae = mean_absolute_error(y_test, rf_y_pred)
    rf_rmse = np.sqrt(rf_mse)
    
    return rf_model,rf_y_pred,rf_mse,rf_mae,rf_rmse

In [9]:
def Visualize_RF(rf_model,rf_y_pred,rf_mse,rf_mae,rf_rmse):

    # MSE,MAE,RMSE
    print(f'RF Mean Squared Error (MSE): {rf_mse}')
    print(f'RF Mean Absolute Error (MAE): {rf_mae}')
    print(f'RF Root Mean Squared Error (RMSE): {rf_rmse}')
    
    # Plot the results
    plt.figure(figsize=(14, 7))
    plt.plot(y_test, label='True Value')
    plt.plot(rf_y_pred, label=f'{rf_model} Value')
    plt.title(f'Prediction by Random Forest')
    plt.xlabel('Time Scale')
    plt.ylabel('Scaled USD')
    plt.legend()
    plt.grid(True)
    plt.show() 