In [39]:
import xgboost as xgb
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, make_scorer, mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV

from prophet.diagnostics import cross_validation, performance_metrics
from prophet.serialize import model_to_json, model_from_json

In [40]:
def get_pred_from_prophet(dates):
    dataset_path = "data/"

    with open('models/prophet.json', 'r') as fin:
        m = model_from_json(fin.read())  # Load model

    future = pd.DataFrame({'ds': dates})
    future['floor'] = 0
    future['cap'] = 2500

    forecast = m.predict(future)

    ans = []
    for forecast_row in forecast.itertuples():
        val = int(forecast_row.yhat)
        val = max(0, val)
        ans.append(val)

    return ans

In [41]:
def get_model(model_name):

    if model_name == 'logistic_regression':
        return LogisticRegression()
    
    elif model_name == 'svm':
        return SVC()
    
    elif model_name == 'random_forest':
        return RandomForestClassifier()
    
    elif model_name == 'xgboost':
        return xgb.XGBRegressor()
    
    elif model_name == 'naive_bayes':
        return GaussianNB()
    
    else:
        raise ValueError(f'Unknown model name: {name}')

In [42]:
def handle_dates(df, keep_date=False):
    date_columns = ['2', '31', '60', '89', '118', '147', '176', '205', '234', '263']

    # for col in date_columns:
    #     dates = list(df[col])
    #     df['prophet_' + col] = get_pred_from_prophet(dates)

    if keep_date:
        for col in date_columns:
            df[col] = pd.to_datetime(df[col])
            # df['day'] = df[col].dt.day
            df['month'] = df[col].dt.month
            # df['year'] = df['date'].dt.year
            df.drop([col], axis=1, inplace=True)
    else:
        for col in date_columns:
            df.drop([col], axis=1, inplace=True)

    return df

In [43]:
name = "xgboost"
dataset_path = "data/"

X = pd.read_csv(dataset_path + "bg.csv")
y = pd.read_csv(dataset_path + "bg_target.csv")

X.drop(['Unnamed: 0', '0', '1'], axis=1, inplace=True) # IZBACIO SAM DATUME OVDE
X = handle_dates(X, keep_date=True)
y.drop(['Unnamed: 0'], axis=1, inplace=True)

In [44]:
def predict_three_days(X, X_test, y):
    model = get_model(name)
    model.fit(X, y['0'])
    yhat1 = model.predict(X_test)
    yhat1[yhat1 < 0] = 0


    X = X.iloc[:, 29:]
    X['day11'] = yhat1

    model = get_model(name)
    model.fit(X, y['1'])
    yhat2 = model.predict(X_test)
    yhat2[yhat2 < 0] = 0


    X = X.iloc[:, 29:]
    X['day12'] = yhat2

    model = get_model(name)
    model.fit(X, y['2'])
    yhat3 = model.predict(X_test)
    yhat3[yhat3 < 0] = 0

    return yhat1, yhat2, yhat3

In [48]:
dataset_test_path = "data/test/"
import os

for file in os.listdir(dataset_test_path):
    X_test = pd.read_csv(dataset_test_path + file)

KeyError: '2'