In [393]:
import numpy as np 
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [394]:
def open_file(path: str) -> pd.DataFrame:
    return pd.DataFrame(pd.read_csv(path))

def trim_data(data_file: pd.DataFrame, field) -> pd.DataFrame:
    return data_file[field]

In [395]:
    
def set_prediction(data_file: pd.DataFrame, shift_period: int):
    data_file["pred"] = data_file["close"].shift(-shift_period)
    
def create_independent_numpy(data_file: pd.DataFrame, shift_period, drop=1):   
    data = data_file['pred']
    if drop:
        return np.array(data.drop(0)[:-shift_period])
    else:
        return np.array(data[:-shift_period-1])

def split_data(x: np.array, y: np.array, proportion: float):
    return train_test_split(x, y, test_size=proportion)

def prepare_forecast(data_file, shift_period) -> np.array:
    data_file = data_file['pred']
    print(data_file)
    return np.array(data_file.drop(0))[-2*shift_period:-shift_period]

In [396]:
def organiser(path: str, shift_period: int):
    df = open_file(path)
    df = trim_data(df, ["close", "date"])
    set_prediction(df, shift_period)
    x = create_independent_numpy(df, shift_period).reshape(-1, 1)
    y = create_independent_numpy(df, shift_period, drop=0).reshape(-1, 1)
    x_train, x_data, y_train, y_data = split_data(x, y, 0.2)
    svr = create_and_train_svm(x_train, y_train)
    print("Cofidence for svr: ", confidence(svr, [x_data, y_data]))
    lr = LinearRegression()
    lr.fit(x_train, y_train)
    print("Cofidence for lr: ", confidence(lr, [x_data, y_data]))
    x_forecast = prepare_forecast(df, shift_period).reshape(-1, 1)
    lr_prediction = lr.predict(x_forecast)
    print(lr_prediction)
    svm_prediction = svr.predict(x_forecast)
    print(svm_prediction)

In [397]:
def create_and_train_svm(x_train, y_train):
    svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
    svr_rbf = svr_rbf.fit(x_train, y_train)
    return svr_rbf

def confidence(model, data):
    return model.score(data[0], data[1])

In [398]:

organiser("data/APEX.csv", 5)

  y = column_or_1d(y, warn=True)


Cofidence for svr:  0.998245521841267
Cofidence for lr:  0.9982111093260644
0       7.375
1       7.375
2       7.375
3       7.250
4       7.250
        ...  
5027      NaN
5028      NaN
5029      NaN
5030      NaN
5031      NaN
Name: pred, Length: 5032, dtype: float64
[[0.75841633]
 [0.73993644]
 [0.73094622]
 [0.7473284 ]
 [0.76730666]]
[0.74990205 0.73311946 0.72500618 0.73981573 0.7580248 ]
