In [3]:
import pandas as pd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly import graph_objs as go
from scipy.stats import pearsonr
from statsmodels.tsa.arima_model import ARIMA
from fbprophet import Prophet
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from datetime import timedelta
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten, Dropout, Activation
from catboost import CatBoostRegressor

from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

handler = logging.FileHandler('multi_boosting.log')
handler.setLevel(logging.INFO)
logger.addHandler(handler)

%matplotlib inline
init_notebook_mode(connected = True)

In [28]:
class PPPredictor:
    
    def __init__(self):
        self.model = None
        self.week_shift = 13
    
    def prepare_data(self, data, actual_date):
        df = data.copy()
        df.index = pd.to_datetime(df['Date'])
        df.drop(['Date'], axis=1, inplace=True)
        for col in df.columns:
            df[col].interpolate(method='time', inplace=True)
            df[col].fillna(method='bfill', inplace=True)
        df.index = pd.to_datetime(df.index)
        return df[df.index <= pd.to_datetime(actual_date)]

    def fit(self, prepared_data, use_text_model=False):
        model = CatBoostRegressor()
        
        df = prepared_data.drop(['PPSpotAvgPrice'], axis=1)
        df['Date'] = pd.to_numeric(df.index)
        
        self.scaler = MinMaxScaler()
        X_train = self.scaler.fit_transform(df)
        self.X_train = X_train
        
        X_train = X_train[:-self.week_shift]
        
        self.scaler_y = MinMaxScaler()
        y_train = np.reshape(np.array(prepared_data['PPSpotAvgPrice']), (-1, 1))
        y_train = self.scaler_y.fit_transform(y_train)
        y_train = y_train[self.week_shift:, 0]
        
        #X_train = np.reshape(X_train, (X_train.shape[0], 1, 1))
        #y_train = np.reshape(y_train, (y_train.shape[0], 1, 1))
        #print(X_train.shape, y_train.shape)
        
        print(X_train)
        model.fit(X_train, y_train)
        
        self.model = model
        return model

    
    def predict(self, date):
        X_test = np.reshape(self.X_train[-1], (1, -1))
        print(X_test)
        
        forecast = self.model.predict(X_test)
        print(forecast)
        return self.scaler_y.inverse_transform(forecast)

In [29]:
 def get_next_monday(df, date):
    first = True
    while len(df[df.index == str(date).split()[0]]) == 0:
        if first:
            date += timedelta(days=((7 - date.weekday()) % 7))
            first = False
        else:
            date += timedelta(7)
    return date

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def test_sol(date):
    df = pd.read_csv('./data/retrieved_data.csv')
    ppp = PPPredictor()
    prepared = ppp.prepare_data(df, date)
    ppp.fit(prepared)
    
    df.index = pd.to_datetime(df['Date'])
    df.drop(['Date'], axis=1, inplace=True)
    for col in df.columns:
        df[col].interpolate(method='time', inplace=True)
        df[col].fillna(method='bfill', inplace=True)
    
    dt = pd.to_datetime(date) + timedelta(days=84)
    dt = get_next_monday(df, dt)
    pred = []
    actual = []
    pred.append(ppp.predict(str(dt).split()[0]))
    right_val = df[df.index == str(dt).split()[0]]['PPSpotAvgPrice'].iloc[0]
    actual.append(right_val)
    logger.info('MAPE for {}: {:.2f}%'.format(date, mean_absolute_percentage_error(pred, actual)))

for dt in pd.date_range(start=pd.to_datetime('2015-12-01'), 
                        end=pd.to_datetime('2018-07-01'), freq='M'):
    test_sol(str(dt).split()[0])
logger.info('----------------------------')

[[0.9804878  0.79121654 1.         0.09012086 0.78453591 0.90445551
  0.        ]
 [0.97926829 0.79121654 1.         0.09774041 0.78453591 0.90688357
  0.00909091]
 [0.98170732 0.83297324 0.98248229 0.08328954 0.78453591 0.92375865
  0.01818182]
 [0.98780488 0.87472993 0.96496458 0.05885444 0.78453591 0.92958601
  0.02727273]
 [0.98902439 0.91648662 0.94744687 0.03678403 0.78453591 0.89996358
  0.03636364]
 [0.99634146 0.95824331 0.92992916 0.04939569 0.78453591 0.93711303
  0.04545455]
 [1.         1.         0.91241145 0.01103521 0.78453591 0.9433046
  0.05454545]
 [0.99878049 0.83802763 0.82126855 0.00814503 0.78453591 0.91453199
  0.06363636]
 [0.99878049 0.67605527 0.73012564 0.         0.78453591 0.92655093
  0.07272727]
 [0.99756098 0.5140829  0.63898274 0.00867052 0.78453591 0.92703654
  0.08181818]
 [0.99146341 0.35211053 0.54783983 0.00709406 0.78453591 0.93832706
  0.09090909]
 [0.9902439  0.28088031 0.52256369 0.01103521 0.78453591 0.90591235
  0.1       ]
 [0.9897561  0.20

123:	learn: 0.0538402	total: 174ms	remaining: 1.23s
124:	learn: 0.0530554	total: 185ms	remaining: 1.3s
125:	learn: 0.0526728	total: 189ms	remaining: 1.31s
126:	learn: 0.0523429	total: 191ms	remaining: 1.31s
127:	learn: 0.0518655	total: 194ms	remaining: 1.32s
128:	learn: 0.0515172	total: 198ms	remaining: 1.34s
129:	learn: 0.0511859	total: 201ms	remaining: 1.34s
130:	learn: 0.0508879	total: 205ms	remaining: 1.36s
131:	learn: 0.0503840	total: 209ms	remaining: 1.37s
132:	learn: 0.0498437	total: 212ms	remaining: 1.38s
133:	learn: 0.0493000	total: 215ms	remaining: 1.39s
134:	learn: 0.0488611	total: 218ms	remaining: 1.4s
135:	learn: 0.0484630	total: 222ms	remaining: 1.41s
136:	learn: 0.0481252	total: 225ms	remaining: 1.41s
137:	learn: 0.0478517	total: 229ms	remaining: 1.43s
138:	learn: 0.0475838	total: 231ms	remaining: 1.43s
139:	learn: 0.0473237	total: 236ms	remaining: 1.45s
140:	learn: 0.0470910	total: 246ms	remaining: 1.5s
141:	learn: 0.0468474	total: 252ms	remaining: 1.52s
142:	learn: 0.0

345:	learn: 0.0354603	total: 707ms	remaining: 1.33s
346:	learn: 0.0354460	total: 711ms	remaining: 1.34s
347:	learn: 0.0354407	total: 714ms	remaining: 1.34s
348:	learn: 0.0354376	total: 717ms	remaining: 1.34s
349:	learn: 0.0354293	total: 719ms	remaining: 1.33s
350:	learn: 0.0354247	total: 724ms	remaining: 1.34s
351:	learn: 0.0354237	total: 727ms	remaining: 1.34s
352:	learn: 0.0354092	total: 730ms	remaining: 1.34s
353:	learn: 0.0354071	total: 732ms	remaining: 1.34s
354:	learn: 0.0353869	total: 736ms	remaining: 1.34s
355:	learn: 0.0353654	total: 738ms	remaining: 1.33s
356:	learn: 0.0353522	total: 745ms	remaining: 1.34s
357:	learn: 0.0353324	total: 747ms	remaining: 1.34s
358:	learn: 0.0353293	total: 749ms	remaining: 1.34s
359:	learn: 0.0353242	total: 757ms	remaining: 1.34s
360:	learn: 0.0353159	total: 767ms	remaining: 1.36s
361:	learn: 0.0353023	total: 775ms	remaining: 1.36s
362:	learn: 0.0352884	total: 776ms	remaining: 1.36s
363:	learn: 0.0352839	total: 777ms	remaining: 1.36s
364:	learn: 

542:	learn: 0.0339390	total: 1.24s	remaining: 1.04s
543:	learn: 0.0339372	total: 1.24s	remaining: 1.04s
544:	learn: 0.0339359	total: 1.24s	remaining: 1.04s
545:	learn: 0.0339348	total: 1.25s	remaining: 1.04s
546:	learn: 0.0339342	total: 1.25s	remaining: 1.03s
547:	learn: 0.0339337	total: 1.25s	remaining: 1.03s
548:	learn: 0.0339216	total: 1.26s	remaining: 1.03s
549:	learn: 0.0339158	total: 1.26s	remaining: 1.03s
550:	learn: 0.0339052	total: 1.26s	remaining: 1.03s
551:	learn: 0.0339044	total: 1.26s	remaining: 1.03s
552:	learn: 0.0339041	total: 1.26s	remaining: 1.02s
553:	learn: 0.0339020	total: 1.27s	remaining: 1.02s
554:	learn: 0.0338926	total: 1.27s	remaining: 1.02s
555:	learn: 0.0338914	total: 1.27s	remaining: 1.02s
556:	learn: 0.0338903	total: 1.28s	remaining: 1.01s
557:	learn: 0.0338884	total: 1.28s	remaining: 1.01s
558:	learn: 0.0338881	total: 1.28s	remaining: 1.01s
559:	learn: 0.0338877	total: 1.28s	remaining: 1.01s
560:	learn: 0.0338863	total: 1.28s	remaining: 1.01s
561:	learn: 

781:	learn: 0.0307984	total: 1.72s	remaining: 480ms
782:	learn: 0.0304770	total: 1.72s	remaining: 478ms
783:	learn: 0.0304741	total: 1.73s	remaining: 475ms
784:	learn: 0.0304736	total: 1.73s	remaining: 473ms
785:	learn: 0.0304719	total: 1.73s	remaining: 471ms
786:	learn: 0.0304283	total: 1.73s	remaining: 468ms
787:	learn: 0.0304266	total: 1.73s	remaining: 466ms
788:	learn: 0.0303143	total: 1.73s	remaining: 463ms
789:	learn: 0.0303127	total: 1.73s	remaining: 461ms
790:	learn: 0.0302445	total: 1.74s	remaining: 459ms
791:	learn: 0.0302427	total: 1.74s	remaining: 456ms
792:	learn: 0.0302418	total: 1.74s	remaining: 454ms
793:	learn: 0.0300948	total: 1.74s	remaining: 452ms
794:	learn: 0.0300940	total: 1.74s	remaining: 450ms
795:	learn: 0.0300930	total: 1.75s	remaining: 448ms
796:	learn: 0.0300865	total: 1.75s	remaining: 445ms
797:	learn: 0.0299812	total: 1.75s	remaining: 443ms
798:	learn: 0.0297389	total: 1.75s	remaining: 441ms
799:	learn: 0.0295543	total: 1.75s	remaining: 439ms
800:	learn: 

[[0.         0.04287778 0.30585367 0.97346295 0.01683053 0.01347578
  1.        ]]
[0.00760753]


ValueError: Expected 2D array, got 1D array instead:
array=[0.00760753].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.