In [1]:
import random
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import torch,torchvision
from torch.nn import *
from tqdm import tqdm
import cv2
from torch.optim import *
# Preproccessing
from sklearn.preprocessing import (
    StandardScaler,
    RobustScaler,
    MinMaxScaler,
    MaxAbsScaler,
    OneHotEncoder,
    Normalizer,
    Binarizer
)
# Models
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression,LogisticRegressionCV
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor,AdaBoostRegressor,VotingRegressor,BaggingRegressor,RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from catboost import CatBoost,CatBoostRegressor
from xgboost import XGBRegressor,XGBRFRegressor
from flaml import AutoML
# Decomposition
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA
# Feature Selection
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import RFECV
from sklearn.feature_selection import SelectFromModel
# Model Eval
from sklearn.compose import make_column_transformer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score,train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error
# Other
import pickle
import wandb

PROJECT_NAME = 'Apple-Stock-Price-Prediction-Sklearn'
device = 'cuda:0'
np.random.seed(21)
random.seed(21)
torch.manual_seed(21)

<torch._C.Generator at 0x7f34afaf9890>

In [2]:
def valid(model,X,y,valid=False):
    preds = model.predict(X)
    if valid is True:
        results = {
            'MAE':mean_absolute_error(y_pred=preds,y_true=y),
            'MSE':mean_squared_error(y_pred=preds,y_true=y)
        }
    else:
        results = {
            'Val MAE':mean_absolute_error(y_pred=preds,y_true=y),
            'Val MSE':mean_squared_error(y_pred=preds,y_true=y)
        }
    return results
def train(model,X_train,X_test,y_train,y_test,name):
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    X_train = np.array(X_train)
    X_test = np.array(X_test)
    wandb.init(project=PROJECT_NAME,name=name)
    model.fit(X_train.reshape(-1,1),y_train.reshape(-1,1))
    wandb.log(valid(model,X_train.reshape(-1,1),y_train.reshape(-1,1),True))
    wandb.log(valid(model,X_test.reshape(-1,1),y_test.reshape(-1,1),False))
    pickle.dump(model,open(f'./models/{name}.pkl','wb'))
    wandb.finish()
    return model

In [3]:
data = pd.read_csv('./data/data.csv')

In [4]:
X = data['Date']

In [5]:
y = data['Open']

In [6]:
preprocessings = (
#     StandardScaler,
#     RobustScaler,
#     MinMaxScaler,
#     MaxAbsScaler,
    Normalizer,
    Binarizer
)

In [7]:

decompositions = (
    PCA,
    KernelPCA
)

In [8]:
feature_selections = (
    VarianceThreshold,
    SelectKBest,
)

In [9]:
models = [
    ['KNeighborsRegressor',KNeighborsRegressor],
    ['LogisticRegression',LogisticRegression],
    ['LogisticRegressionCV',LogisticRegressionCV],
    ['DecisionTreeRegressor',DecisionTreeRegressor],
    ['GradientBoostingRegressor',GradientBoostingRegressor],
    ['AdaBoostRegressor',AdaBoostRegressor],
    ['RandomForestRegressor',RandomForestRegressor],
    ['BaggingRegressor',BaggingRegressor],
    ['GaussianNB',GaussianNB],
    ['ExtraTreesRegressor',ExtraTreesRegressor],
    ['CatBoost',CatBoost],
    ['CatBoostRegressor',CatBoostRegressor],
    ['XGBRegressor',XGBRegressor],
    ['XGBRFRegressor',XGBRFRegressor],
    ['ExtraTreesRegressor',ExtraTreesRegressor],
]

In [10]:
new_X = []
for X_batch in X:
    X_batch = X_batch.split('-')
    X_batch = f'{X_batch[0]}{X_batch[1]}{X_batch[2]}'
    X_batch = int(X_batch)
    new_X.append(X_batch)

In [11]:
X = pd.DataFrame(new_X,columns=['Date'])['Date']

In [12]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.125,shuffle=False)

In [13]:
X_train_old = X_train.copy()
X_test_old = X_test.copy()

In [14]:
# train(GradientBoostingRegressor(),X_train,X_test,y_train,y_test,name='baseline')

In [15]:
# for preprocessing in preprocessings:
#     X_train = X_train_old.copy()
#     X_test = X_test_old.copy()
#     X_train = np.array(X_train)
#     X_test = np.array(X_test)
#     preprocessing = preprocessing()
#     X_train = preprocessing.fit_transform(X_train.reshape(-1,1))
#     X_test = preprocessing.transform(X_test.reshape(-1,1))
#     train(GradientBoostingRegressor(),X_train,X_test,y_train,y_test,name=f'{preprocessing}-preprocessing')

In [16]:
# for decomposition in decompositions:
#     X_train = X_train_old.copy()
#     X_test = X_test_old.copy()
#     X_train = np.array(X_train)
#     X_test = np.array(X_test)
#     decomposition = decomposition()
#     X_train = decomposition.fit_transform(X_train.reshape(-1,1))
#     X_test = decomposition.transform(X_test.reshape(-1,1))
#     train(GradientBoostingRegressor(),X_train,X_test,y_train,y_test,name=f'{decomposition}-decomposition')

In [17]:
# for feature_selection in feature_selections:
#     X_train = X_train_old.copy()
#     X_test = X_test_old.copy()
#     X_train = np.array(X_train)
#     X_test = np.array(X_test)
#     y_train = np.array(y_train)
#     y_test = np.array(y_test)
#     feature_selection = feature_selection()
#     try:
#         X_train = feature_selection.fit_transform(X_train.reshape(-1,1))
#         X_test = feature_selection.transform(X_test.reshape(-1,1))
#     except:
#         X_train = feature_selection.fit_transform(X_train.reshape(-1,1),y_train.reshape(-1,1))
#         X_test = feature_selection.transform(X_test.reshape(-1,1),y_test.reshape(-1,1))
#     train(GradientBoostingRegressor(),X_train,X_test,y_train,y_test,name=f'{feature_selection}-feature_selection')

In [18]:
for model in models:
    train(model[1](),X_train,X_test,y_train,y_test,name=f'{model[0]}-model')

[34m[1mwandb[0m: Currently logged in as: [33mranuga-d[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.1 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.03MB of 0.03MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
MAE,0.6112
MSE,1.16444
_runtime,15.0
_timestamp,1630051623.0
_step,1.0
Val MAE,7.38605
Val MSE,75.57354


0,1
MAE,▁
MSE,▁
_runtime,▁▁
_timestamp,▁▁
_step,▁█
Val MAE,▁
Val MSE,▁


[34m[1mwandb[0m: wandb version 0.12.1 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


  return f(*args, **kwargs)


ValueError: Unknown label type: 'continuous'