In [1]:
import pandas as pd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly import graph_objs as go
from scipy.stats import pearsonr
from statsmodels.tsa.arima_model import ARIMA
from fbprophet import Prophet
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from datetime import timedelta
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten, Dropout, Activation
from catboost import CatBoostRegressor

from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

handler = logging.FileHandler('catboosting.log')
handler.setLevel(logging.INFO)
logger.addHandler(handler)

%matplotlib inline
init_notebook_mode(connected = True)

Using TensorFlow backend.

inspect.getargspec() is deprecated, use inspect.signature() or inspect.getfullargspec()



In [4]:
class PPPredictor:
    
    def __init__(self):
        self.model = None
        self.week_shift = 13
    
    def prepare_data(self, data, actual_date):
        df = data.copy()
        df.index = pd.to_datetime(df['Date'])
        df.drop(['Date'], axis=1, inplace=True)
        for col in df.columns:
            df[col].interpolate(method='time', inplace=True)
            df[col].fillna(method='bfill', inplace=True)
        df.index = pd.to_datetime(df.index)
        return df[df.index <= pd.to_datetime(actual_date)]

    def fit(self, prepared_data, use_text_model=False):
        model = CatBoostRegressor(depth=10)
        
        df = pd.DataFrame({'ds': prepared_data.index, 'y': prepared_data['PPSpotAvgPrice']}).reset_index().drop(['Date'], axis=1)
        df['ds'] = pd.to_numeric(df['ds'])
        
        self.scaler_X = MinMaxScaler()
        X_train = np.reshape(np.array(df['ds']), (-1, 1))
        X_train = self.scaler_X.fit_transform(X_train)
        X_train = X_train[:-self.week_shift, 0]
        
        self.scaler_y = MinMaxScaler()
        y_train = np.reshape(np.array(df['y']), (-1, 1))
        y_train = self.scaler_y.fit_transform(y_train)
        y_train = y_train[self.week_shift:, 0]
        
        #X_train = np.reshape(X_train, (X_train.shape[0], 1, 1))
        #y_train = np.reshape(y_train, (y_train.shape[0], 1, 1))
        #print(X_train.shape, y_train.shape)
        model.fit(X_train, y_train)
        
        self.model = model
        return model

    
    def predict(self, date):
        date = pd.to_datetime(date)
        date -= timedelta(self.week_shift * 7)
        X_test = np.array(pd.to_numeric(pd.Series(date)))
        X_test = np.reshape(X_test, (-1, 1))
        X_test = self.scaler_X.transform(X_test)
        X_test = X_test[:, 0]
        forecast = self.model.predict(X_test).reshape((-1, 1))
        return self.scaler_y.inverse_transform(forecast)

In [5]:
 def get_next_monday(df, date):
    first = True
    while len(df[df.index == str(date).split()[0]]) == 0:
        if first:
            date += timedelta(days=((7 - date.weekday()) % 7))
            first = False
        else:
            date += timedelta(7)
    return date

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def test_sol(date):
    df = pd.read_csv('./data/retrieved_data.csv')
    ppp = PPPredictor()
    prepared = ppp.prepare_data(df, date)
    ppp.fit(prepared)
    
    df.index = pd.to_datetime(df['Date'])
    df.drop(['Date'], axis=1, inplace=True)
    for col in df.columns:
        df[col].interpolate(method='time', inplace=True)
        df[col].fillna(method='bfill', inplace=True)
    
    dt = pd.to_datetime(date) + timedelta(days=84)
    dt = get_next_monday(df, dt)
    pred = []
    actual = []
    pred.append(ppp.predict(str(dt).split()[0]))
    right_val = df[df.index == str(dt).split()[0]]['PPSpotAvgPrice'].iloc[0]
    actual.append(right_val)
    logger.info('MAPE for {}: {:.2f}%'.format(date, mean_absolute_percentage_error(pred, actual)))

for dt in pd.date_range(start=pd.to_datetime('2015-12-01'), 
                        end=pd.to_datetime('2018-07-01'), freq='M'):
    test_sol(str(dt).split()[0])
logger.info('----------------------------')

0:	learn: 0.5644750	total: 8.15ms	remaining: 8.15s
1:	learn: 0.5517052	total: 15.7ms	remaining: 7.84s
2:	learn: 0.5388354	total: 23.1ms	remaining: 7.69s
3:	learn: 0.5263686	total: 30.6ms	remaining: 7.61s
4:	learn: 0.5132803	total: 33.6ms	remaining: 6.69s
5:	learn: 0.5010033	total: 40.8ms	remaining: 6.76s
6:	learn: 0.4891943	total: 47.7ms	remaining: 6.77s
7:	learn: 0.4761636	total: 48.6ms	remaining: 6.02s
8:	learn: 0.4652348	total: 55.4ms	remaining: 6.1s
9:	learn: 0.4548503	total: 62.2ms	remaining: 6.16s
10:	learn: 0.4431349	total: 63.5ms	remaining: 5.71s
11:	learn: 0.4323759	total: 70.2ms	remaining: 5.78s
12:	learn: 0.4222699	total: 77.1ms	remaining: 5.85s
13:	learn: 0.4121690	total: 77.9ms	remaining: 5.49s
14:	learn: 0.4018164	total: 79.5ms	remaining: 5.22s
15:	learn: 0.3923699	total: 86.1ms	remaining: 5.3s
16:	learn: 0.3823636	total: 87ms	remaining: 5.03s
17:	learn: 0.3726437	total: 87.5ms	remaining: 4.77s
18:	learn: 0.3630083	total: 88ms	remaining: 4.54s
19:	learn: 0.3542356	total: 

194:	learn: 0.0318847	total: 1.15s	remaining: 4.75s
195:	learn: 0.0318614	total: 1.15s	remaining: 4.73s
196:	learn: 0.0318465	total: 1.16s	remaining: 4.71s
197:	learn: 0.0318040	total: 1.16s	remaining: 4.69s
198:	learn: 0.0317459	total: 1.17s	remaining: 4.71s
199:	learn: 0.0317232	total: 1.17s	remaining: 4.69s
200:	learn: 0.0317096	total: 1.17s	remaining: 4.67s
201:	learn: 0.0316825	total: 1.18s	remaining: 4.67s
202:	learn: 0.0316603	total: 1.19s	remaining: 4.66s
203:	learn: 0.0316381	total: 1.19s	remaining: 4.63s
204:	learn: 0.0316300	total: 1.19s	remaining: 4.61s
205:	learn: 0.0315834	total: 1.2s	remaining: 4.62s
206:	learn: 0.0315740	total: 1.2s	remaining: 4.59s
207:	learn: 0.0315507	total: 1.2s	remaining: 4.57s
208:	learn: 0.0315411	total: 1.2s	remaining: 4.55s
209:	learn: 0.0315278	total: 1.2s	remaining: 4.53s
210:	learn: 0.0314929	total: 1.21s	remaining: 4.53s
211:	learn: 0.0314845	total: 1.21s	remaining: 4.51s
212:	learn: 0.0314212	total: 1.22s	remaining: 4.51s
213:	learn: 0.031

353:	learn: 0.0304449	total: 1.46s	remaining: 2.66s
354:	learn: 0.0304379	total: 1.46s	remaining: 2.65s
355:	learn: 0.0304358	total: 1.46s	remaining: 2.64s
356:	learn: 0.0304346	total: 1.46s	remaining: 2.63s
357:	learn: 0.0304185	total: 1.46s	remaining: 2.62s
358:	learn: 0.0304163	total: 1.46s	remaining: 2.61s
359:	learn: 0.0304004	total: 1.46s	remaining: 2.6s
360:	learn: 0.0303953	total: 1.46s	remaining: 2.59s
361:	learn: 0.0303803	total: 1.47s	remaining: 2.59s
362:	learn: 0.0303799	total: 1.47s	remaining: 2.58s
363:	learn: 0.0303798	total: 1.47s	remaining: 2.57s
364:	learn: 0.0303798	total: 1.47s	remaining: 2.56s
365:	learn: 0.0303798	total: 1.47s	remaining: 2.55s
366:	learn: 0.0303790	total: 1.47s	remaining: 2.54s
367:	learn: 0.0303781	total: 1.48s	remaining: 2.54s
368:	learn: 0.0303773	total: 1.48s	remaining: 2.53s
369:	learn: 0.0303764	total: 1.48s	remaining: 2.52s
370:	learn: 0.0303762	total: 1.48s	remaining: 2.51s
371:	learn: 0.0303753	total: 1.48s	remaining: 2.5s
372:	learn: 0.

575:	learn: 0.0294277	total: 1.73s	remaining: 1.27s
576:	learn: 0.0294276	total: 1.73s	remaining: 1.27s
577:	learn: 0.0294274	total: 1.73s	remaining: 1.27s
578:	learn: 0.0294274	total: 1.74s	remaining: 1.26s
579:	learn: 0.0294274	total: 1.74s	remaining: 1.26s
580:	learn: 0.0294273	total: 1.74s	remaining: 1.25s
581:	learn: 0.0294206	total: 1.74s	remaining: 1.25s
582:	learn: 0.0294203	total: 1.74s	remaining: 1.24s
583:	learn: 0.0294201	total: 1.74s	remaining: 1.24s
584:	learn: 0.0294200	total: 1.74s	remaining: 1.23s
585:	learn: 0.0294200	total: 1.74s	remaining: 1.23s
586:	learn: 0.0294200	total: 1.74s	remaining: 1.23s
587:	learn: 0.0294197	total: 1.74s	remaining: 1.22s
588:	learn: 0.0294195	total: 1.74s	remaining: 1.22s
589:	learn: 0.0293549	total: 1.75s	remaining: 1.22s
590:	learn: 0.0293547	total: 1.75s	remaining: 1.21s
591:	learn: 0.0293494	total: 1.75s	remaining: 1.21s
592:	learn: 0.0293344	total: 1.75s	remaining: 1.2s
593:	learn: 0.0293323	total: 1.75s	remaining: 1.2s
594:	learn: 0.

754:	learn: 0.0258190	total: 2.42s	remaining: 785ms
755:	learn: 0.0258189	total: 2.42s	remaining: 781ms
756:	learn: 0.0258181	total: 2.42s	remaining: 777ms
757:	learn: 0.0258172	total: 2.42s	remaining: 774ms
758:	learn: 0.0258078	total: 2.42s	remaining: 770ms
759:	learn: 0.0258077	total: 2.42s	remaining: 766ms
760:	learn: 0.0257258	total: 2.44s	remaining: 766ms
761:	learn: 0.0257100	total: 2.44s	remaining: 762ms
762:	learn: 0.0256167	total: 2.45s	remaining: 761ms
763:	learn: 0.0255080	total: 2.46s	remaining: 760ms
764:	learn: 0.0254935	total: 2.46s	remaining: 756ms
765:	learn: 0.0254925	total: 2.47s	remaining: 753ms
766:	learn: 0.0254804	total: 2.48s	remaining: 752ms
767:	learn: 0.0254804	total: 2.48s	remaining: 748ms
768:	learn: 0.0254319	total: 2.48s	remaining: 745ms
769:	learn: 0.0254306	total: 2.48s	remaining: 741ms
770:	learn: 0.0254213	total: 2.48s	remaining: 738ms
771:	learn: 0.0254077	total: 2.49s	remaining: 735ms
772:	learn: 0.0254069	total: 2.49s	remaining: 731ms
773:	learn: 

942:	learn: 0.0228502	total: 3.23s	remaining: 195ms
943:	learn: 0.0228487	total: 3.23s	remaining: 192ms
944:	learn: 0.0228161	total: 3.23s	remaining: 188ms
945:	learn: 0.0228110	total: 3.23s	remaining: 185ms
946:	learn: 0.0228059	total: 3.24s	remaining: 181ms
947:	learn: 0.0228015	total: 3.25s	remaining: 178ms
948:	learn: 0.0228008	total: 3.26s	remaining: 175ms
949:	learn: 0.0227730	total: 3.27s	remaining: 172ms
950:	learn: 0.0227615	total: 3.27s	remaining: 169ms
951:	learn: 0.0227568	total: 3.27s	remaining: 165ms
952:	learn: 0.0227524	total: 3.28s	remaining: 162ms
953:	learn: 0.0227498	total: 3.28s	remaining: 158ms
954:	learn: 0.0227375	total: 3.28s	remaining: 155ms
955:	learn: 0.0227304	total: 3.28s	remaining: 151ms
956:	learn: 0.0226786	total: 3.29s	remaining: 148ms
957:	learn: 0.0226741	total: 3.29s	remaining: 144ms
958:	learn: 0.0225991	total: 3.3s	remaining: 141ms
959:	learn: 0.0225984	total: 3.3s	remaining: 138ms
960:	learn: 0.0225965	total: 3.3s	remaining: 134ms
961:	learn: 0.0

INFO:__main__:MAPE for 2015-12-31: 14.55%


992:	learn: 0.0221974	total: 3.39s	remaining: 23.9ms
993:	learn: 0.0221965	total: 3.39s	remaining: 20.5ms
994:	learn: 0.0221955	total: 3.39s	remaining: 17ms
995:	learn: 0.0221849	total: 3.4s	remaining: 13.7ms
996:	learn: 0.0221809	total: 3.4s	remaining: 10.2ms
997:	learn: 0.0221659	total: 3.42s	remaining: 6.84ms
998:	learn: 0.0221625	total: 3.42s	remaining: 3.42ms
999:	learn: 0.0221587	total: 3.42s	remaining: 0us
0:	learn: 0.5624081	total: 11.6ms	remaining: 11.6s
1:	learn: 0.5487939	total: 22.9ms	remaining: 11.4s
2:	learn: 0.5360409	total: 33.9ms	remaining: 11.3s
3:	learn: 0.5244563	total: 44ms	remaining: 11s
4:	learn: 0.5119702	total: 58.4ms	remaining: 11.6s
5:	learn: 0.4994538	total: 62ms	remaining: 10.3s
6:	learn: 0.4870035	total: 74.4ms	remaining: 10.5s
7:	learn: 0.4749918	total: 88ms	remaining: 10.9s
8:	learn: 0.4625193	total: 89.2ms	remaining: 9.82s
9:	learn: 0.4516918	total: 95.2ms	remaining: 9.43s
10:	learn: 0.4407575	total: 96.8ms	remaining: 8.7s
11:	learn: 0.4301399	total: 10

161:	learn: 0.0342820	total: 1.02s	remaining: 5.29s
162:	learn: 0.0342238	total: 1.02s	remaining: 5.25s
163:	learn: 0.0341329	total: 1.02s	remaining: 5.22s
164:	learn: 0.0340645	total: 1.04s	remaining: 5.25s
165:	learn: 0.0339788	total: 1.04s	remaining: 5.22s
166:	learn: 0.0338110	total: 1.05s	remaining: 5.24s
167:	learn: 0.0337573	total: 1.05s	remaining: 5.21s
168:	learn: 0.0336997	total: 1.06s	remaining: 5.22s
169:	learn: 0.0336342	total: 1.07s	remaining: 5.21s
170:	learn: 0.0335122	total: 1.07s	remaining: 5.19s
171:	learn: 0.0334502	total: 1.07s	remaining: 5.17s
172:	learn: 0.0334183	total: 1.07s	remaining: 5.13s
173:	learn: 0.0332991	total: 1.08s	remaining: 5.15s
174:	learn: 0.0331013	total: 1.09s	remaining: 5.12s
175:	learn: 0.0330708	total: 1.09s	remaining: 5.1s
176:	learn: 0.0330430	total: 1.09s	remaining: 5.07s
177:	learn: 0.0329637	total: 1.1s	remaining: 5.08s
178:	learn: 0.0328064	total: 1.1s	remaining: 5.05s
179:	learn: 0.0327791	total: 1.1s	remaining: 5.02s
180:	learn: 0.03

396:	learn: 0.0293300	total: 1.49s	remaining: 2.26s
397:	learn: 0.0293297	total: 1.49s	remaining: 2.25s
398:	learn: 0.0293235	total: 1.49s	remaining: 2.25s
399:	learn: 0.0293233	total: 1.5s	remaining: 2.24s
400:	learn: 0.0293060	total: 1.5s	remaining: 2.24s
401:	learn: 0.0293052	total: 1.5s	remaining: 2.23s
402:	learn: 0.0293045	total: 1.5s	remaining: 2.22s
403:	learn: 0.0293042	total: 1.5s	remaining: 2.21s
404:	learn: 0.0293039	total: 1.5s	remaining: 2.2s
405:	learn: 0.0293028	total: 1.5s	remaining: 2.2s
406:	learn: 0.0292964	total: 1.5s	remaining: 2.19s
407:	learn: 0.0292841	total: 1.5s	remaining: 2.18s
408:	learn: 0.0292839	total: 1.5s	remaining: 2.17s
409:	learn: 0.0292837	total: 1.5s	remaining: 2.16s
410:	learn: 0.0292829	total: 1.5s	remaining: 2.16s
411:	learn: 0.0292720	total: 1.51s	remaining: 2.15s
412:	learn: 0.0291622	total: 1.51s	remaining: 2.14s
413:	learn: 0.0291620	total: 1.51s	remaining: 2.13s
414:	learn: 0.0291608	total: 1.51s	remaining: 2.13s
415:	learn: 0.0291606	tota

590:	learn: 0.0277730	total: 1.77s	remaining: 1.22s
591:	learn: 0.0277720	total: 1.77s	remaining: 1.22s
592:	learn: 0.0277026	total: 1.77s	remaining: 1.22s
593:	learn: 0.0276948	total: 1.78s	remaining: 1.21s
594:	learn: 0.0276869	total: 1.78s	remaining: 1.21s
595:	learn: 0.0276825	total: 1.78s	remaining: 1.21s
596:	learn: 0.0276822	total: 1.78s	remaining: 1.2s
597:	learn: 0.0276359	total: 1.79s	remaining: 1.2s
598:	learn: 0.0276351	total: 1.79s	remaining: 1.2s
599:	learn: 0.0276170	total: 1.79s	remaining: 1.2s
600:	learn: 0.0276141	total: 1.79s	remaining: 1.19s
601:	learn: 0.0276138	total: 1.79s	remaining: 1.19s
602:	learn: 0.0275349	total: 1.81s	remaining: 1.19s
603:	learn: 0.0275245	total: 1.81s	remaining: 1.19s
604:	learn: 0.0274464	total: 1.82s	remaining: 1.19s
605:	learn: 0.0274457	total: 1.82s	remaining: 1.19s
606:	learn: 0.0274398	total: 1.82s	remaining: 1.18s
607:	learn: 0.0273632	total: 1.83s	remaining: 1.18s
608:	learn: 0.0272935	total: 1.84s	remaining: 1.18s
609:	learn: 0.02

KeyboardInterrupt: 