In [1]:
# Use the same features defined for other models 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("archive/Stocks/aapl.us.txt")
df = df.iloc[:,:-1]

df['MA_Open_5'] = df['Open'].rolling(5).mean()
df['MA_Close_5'] = df['Close'].rolling(5).mean()
df['MA_Volume_5'] = df['Volume'].rolling(5).mean()

delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(14).mean()
avg_loss = loss.rolling(14).mean()
rs = avg_gain / avg_loss
df['RSI_14'] = 100 - (100 / (1 + rs))

tp = (df['High'] + df['Low'] + df['Close']) / 3
ma = tp.rolling(20).mean()
dev = (tp - ma).abs().rolling(20).mean()
cci = (tp - ma) / (0.015 * dev)
df['CCI_20'] = cci

df['EMA_Close_10'] = df['Close'].ewm(span=10, adjust=False).mean()

sma = df['Close'].rolling(20).mean()
std = df['Close'].rolling(20).std()
df['BB_upper_20'] = sma + 2 * std
df['BB_lower_20'] = sma - 2 * std

df[df.columns.difference(['Date'])] = df[df.columns.difference(['Date'])].apply(lambda x: x.fillna(x.mean()))

In [2]:
# Will implement Support Vector Machine through sklearn library
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
import datetime as dt

df['Date'] = pd.to_datetime(df['Date'])


cutoff_date = pd.to_datetime('2014-01-01')
train = df[df['Date'] < cutoff_date]
test = df[df['Date'] >= cutoff_date]

# Split the training and test sets into features and target
X_train = train.drop(['Date', 'Close'], axis=1)
y_train = train['Close']
X_test = test.drop(['Date', 'Close'], axis=1)
y_test = test['Close']

# Basic Model
svr = SVR(kernel='rbf', C=1e3, gamma=0.1)
# Train the model 
svr.fit(X_train, y_train)

SVR(C=1000.0, gamma=0.1)

In [3]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Evaluate the model on the test set
y_pred = svr.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_pred, y_test)
rmse = np.sqrt(mse)
print(f'RMSE: {rmse}')

RMSE: 103.30840251379223


In [4]:
# Grid search to find better parameters
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.001, 0.01,0.1, 1, 10, 100, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 
  
grid = GridSearchCV(SVR(), param_grid, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(X_train, y_train)

# print best parameter after tuning
print(grid.best_params_)
  
# print how our model looks after hyper-parameter tuning
print(grid.best_estimator_)


grid_predictions = grid.predict(X_test)
  
mse = mean_squared_error(y_test, grid_predictions)
mae = mean_absolute_error(grid_predictions, y_test)
rmse = np.sqrt(mse)
print(f'Best RMSE: {rmse}')

Fitting 5 folds for each of 35 candidates, totalling 175 fits
[CV 1/5] END .....C=0.001, gamma=1, kernel=rbf;, score=-3.543 total time=   3.3s
[CV 2/5] END .....C=0.001, gamma=1, kernel=rbf;, score=-0.267 total time=   3.2s
[CV 3/5] END .....C=0.001, gamma=1, kernel=rbf;, score=-0.029 total time=   3.3s
[CV 4/5] END .....C=0.001, gamma=1, kernel=rbf;, score=-0.704 total time=   2.9s
[CV 5/5] END .....C=0.001, gamma=1, kernel=rbf;, score=-3.656 total time=   3.1s
[CV 1/5] END ...C=0.001, gamma=0.1, kernel=rbf;, score=-3.543 total time=   3.6s
[CV 2/5] END ...C=0.001, gamma=0.1, kernel=rbf;, score=-0.267 total time=   4.9s
[CV 3/5] END ...C=0.001, gamma=0.1, kernel=rbf;, score=-0.029 total time=   3.2s
[CV 4/5] END ...C=0.001, gamma=0.1, kernel=rbf;, score=-0.704 total time=   2.9s
[CV 5/5] END ...C=0.001, gamma=0.1, kernel=rbf;, score=-3.656 total time=   2.8s
[CV 1/5] END ..C=0.001, gamma=0.01, kernel=rbf;, score=-3.543 total time=   3.6s
[CV 2/5] END ..C=0.001, gamma=0.01, kernel=rbf;

[CV 2/5] END ......C=10, gamma=1, kernel=rbf;, score=-168.030 total time=   4.3s
[CV 3/5] END .......C=10, gamma=1, kernel=rbf;, score=-23.520 total time=   4.2s
[CV 4/5] END ........C=10, gamma=1, kernel=rbf;, score=-0.096 total time=   4.4s
[CV 5/5] END ........C=10, gamma=1, kernel=rbf;, score=-3.464 total time=   4.4s
[CV 1/5] END ....C=10, gamma=0.1, kernel=rbf;, score=-135.325 total time=   4.4s
[CV 2/5] END ....C=10, gamma=0.1, kernel=rbf;, score=-168.030 total time=   4.3s
[CV 3/5] END .....C=10, gamma=0.1, kernel=rbf;, score=-23.520 total time=   4.4s
[CV 4/5] END ......C=10, gamma=0.1, kernel=rbf;, score=-0.096 total time=   4.6s
[CV 5/5] END ......C=10, gamma=0.1, kernel=rbf;, score=-3.464 total time=   4.5s
[CV 1/5] END ...C=10, gamma=0.01, kernel=rbf;, score=-135.325 total time=   4.4s
[CV 2/5] END ...C=10, gamma=0.01, kernel=rbf;, score=-168.030 total time=   4.3s
[CV 3/5] END ....C=10, gamma=0.01, kernel=rbf;, score=-23.520 total time=   4.4s
[CV 4/5] END .....C=10, gamm