In [1]:
# -*- coding: utf-8 -*-
"""
Created on Mon Dec  19 14:44:06 2019

@author: Mohamed.Imran
"""

import os
import pandas as pd
import numpy as np
import datetime as dt
from sklearn import linear_model
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from fbprophet import Prophet
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')


data = pd.read_csv(r"C:\Users\jayac\Downloads\Data science\Day 17\TS_Decomposition_Data_Workingfile.csv") #Read your decomposition file here

#Regression model
def Regression(data):
    data["Date"] = pd.to_datetime(data["Date"])
    data=data.sort_values(["Date"])
    data["Quarter"] = data["Date"].dt.quarter
    data["Year"] = data["Date"].dt.year
    data["Month"] = data["Date"].dt.month
    
    Train=data[(data.Year>=2015)&(data.Year<2019)] #modify date according to your dataset; Train : 2017-2018
    Test=data[(data.Year==2019)]  #modify date according to your dataset; Test : 2019
    
    
    Train["SI_Y"]=Train["Volume"]/Train.groupby("Year")["Volume"].transform(np.mean)
    Train["F_SI"]=Train.groupby("Month")["SI_Y"].transform(np.mean)
    Train["D_Seasonalised_trend"] = Train["Volume"]/Train["F_SI"]    
    Train["Level_index1"]=np.mean(Train[(Train.Year==2018)&(Train.Quarter==1)]["D_Seasonalised_trend"])/np.mean(Train[(Train.Year==2017)&(Train.Quarter==4)]["D_Seasonalised_trend"])
    
    numer1=np.mean(Train[(Train.Year==2018)&(Train.Quarter==3)]["D_Seasonalised_trend"])/np.mean(Train[(Train.Year==2018)&(Train.Quarter==2)]["D_Seasonalised_trend"])
    numer2=np.mean(Train[(Train.Year==2018)&(Train.Quarter==4)]["D_Seasonalised_trend"])/np.mean(Train[(Train.Year==2018)&(Train.Quarter==3)]["D_Seasonalised_trend"])
    
    
    Train["Level_index2"]=np.mean([numer1,numer2])
    Train=Train.sort_values(["Date"])
    Train.index=range(len(Train))
    Train["ID"]=range(1,(len(Train)+1))
    
    Train["Deleveled_series"]=np.where(Train.Year==2017, Train["D_Seasonalised_trend"]*Train["Level_index1"],Train["D_Seasonalised_trend"])
    
    lm = linear_model.LinearRegression()
    X = np.array(Train[["ID", "Variable_1"]]) # In case of no extra variable in the dataset, remove the extra variable name from the list, then append the line with ".reshape(-1, 1)"
    Y = np.array(Train["Deleveled_series"]).reshape(-1,1)
    
    model = lm.fit(X,Y)
    
    Test["ID"]=range(len(Test))
    Test["ID"]=Test["ID"]+max(Train["ID"])
    X_test=np.array(Test[["ID", "Variable_1"]]) # In case of no extra variable in the dataset, remove the extra variable name from the list, then append the line with ".reshape(-1, 1)"
    Y_test=model.predict(X_test)
    
    Pred1 = Y_test*Train.iloc[0]["Level_index2"]*np.array(Train.iloc[0:len(Y_test)]["F_SI"]).reshape(-1,1)
    Test["Predictions"]=Pred1
    
    return(Test['Predictions'])

#Arima model
def Arima(data): 
    X = data['Volume'].values
    size = np.sum(data['Date']<='12/31/2018')
    train, test = X[0:size], X[size:len(X)]
    history = [x for x in train]
    predictions = list()  
        
    for t in range(len(test)):
    	model = ARIMA(history, order=(1,1,0))
    	model_fit = model.fit(disp=0)
    	output = model_fit.forecast()
    	yhat = output[0]
    	predictions.append(yhat)
    	obs = test[t]
    	history.append(obs)
    return predictions   

#Holts-Winter model
def Holts_winter(data):
    inter_df = data[['Volume']]
    size = np.sum(data['Date']<='12/31/2018')
    train, test = inter_df.iloc[:size, 0], inter_df.iloc[size:, 0]
    model = ExponentialSmoothing(train, seasonal='mul', seasonal_periods=12).fit()
    pred = model.predict(start=test.index[0], end=test.index[-1])
    return pred

#Fbprophet
def Fbprophet(data):
    size = np.sum(data['ds']<='12/31/2018')
    inter_df = data.iloc[:size, :]
    m = Prophet(weekly_seasonality=False, daily_seasonality=False)
    m.fit(inter_df)
    future = m.make_future_dataframe(periods=12, freq='M')
    forecast = m.predict(future)
    fcst = forecast['yhat'].tail(12)
    return fcst

#Simple Exponential Smoothing model
def Ses(data):
    inter_df = data[['Volume']]
    size = np.sum(data['Date']<='12/31/2018')
    train, test = inter_df.iloc[:size, 0], inter_df.iloc[size:, 0]
    model = SimpleExpSmoothing(train).fit()
    pred = model.predict(start=test.index[0], end=test.index[-1])
    return pred



def Regression_2lag(data):
    data["Variable_1"] = data["Variable_1"].shift(2)
    data = data.loc[2:, :]
    return Regression(data)


required_cols = [col for col in data.columns if col not in ['Date', 'Variable_1']]


Result=pd.DataFrame()

for model in [Regression, Arima, Holts_winter, Ses, Fbprophet, Regression_2lag]:
    for i in required_cols:
        data['Date'] = pd.to_datetime(data['Date'])
        to_func = data[["Date", "Variable_1", i]]
        to_func.columns=["Date","Variable_1", "Volume"]
        if model == Fbprophet:
            to_func.columns=["ds","Variable_1", "y"]
            Result_inter = model(to_func[['ds', 'y']])
            Result_inter.name = model.__name__ + "_" +  i
            Result_inter.index = range(len(Result_inter))
            Result = pd.concat([Result, Result_inter], axis = 1)
        elif model == Arima:
            Result_inter = model(to_func)
            Result_inter = pd.DataFrame(Result_inter, columns = ["ARIMA_" + i])
            Result_inter.index=range(len(Result_inter))
            Result = pd.concat([Result, Result_inter], axis = 1)
        else:
            Result_inter = model(to_func)
            Result_inter.name = model.__name__ + "_" +  i
            Result_inter.index=range(len(Result_inter))
            Result = pd.concat([Result, Result_inter], axis = 1)



Result.to_csv('Forecast.csv')



ModuleNotFoundError: No module named 'fbprophet'

In [2]:
import os
import pandas as pd
import numpy as np
import datetime as dt
from sklearn import linear_model
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [44]:
data = pd.read_csv(r"C:\Users\jayac\Downloads\Data science\Day 17\decom2 example.csv") #Read your decomposition file here


In [48]:
data.tail()

Unnamed: 0,Date,Marketing spends,Quarter,Year,Month
19,2018-08-01,1508.25,3,2018,8
20,2018-09-01,1956.76,3,2018,9
21,2018-10-01,2133.19,4,2018,10
22,2018-11-01,1813.04,4,2018,11
23,2018-12-01,1203.39,4,2018,12


In [47]:
data["Date"] = pd.to_datetime(data["Date"],format='%d-%m-%Y')
data=data.sort_values(["Date"])
data["Quarter"] = data["Date"].dt.quarter
data["Year"] = data["Date"].dt.year
data["Month"] = data["Date"].dt.month

In [49]:
data.rename(columns={'Marketing spends':'Volume'},inplace=True)

In [50]:
 
Train=data[(data.Year>=2017)&(data.Year<=2018)] #modify date according to your dataset; Train : 2017-2018
Test=data[(data.Year==2019)]  #modify date according to your dataset; Test : 2019

In [51]:
Train["SI_Y"]=Train["Volume"]/Train.groupby("Year")["Volume"].transform(np.mean)


In [52]:
Train["F_SI"]=Train.groupby("Month")["SI_Y"].transform(np.mean)

In [54]:
Train["F_SI"]=Train.groupby("Month")["SI_Y"].transform(np.mean)
Train["D_Seasonalised_trend"] = Train["Volume"]/Train["F_SI"]    


In [57]:
Train["Level_index1"]=np.mean(Train[(Train.Year==2018)&(Train.Quarter==1)]["D_Seasonalised_trend"])/np.mean(Train[(Train.Year==2017)&(Train.Quarter==4)]["D_Seasonalised_trend"])

In [59]:
numer1=np.mean(Train[(Train.Year==2018)&(Train.Quarter==3)]["D_Seasonalised_trend"])/np.mean(Train[(Train.Year==2018)&(Train.Quarter==2)]["D_Seasonalised_trend"])
numer2=np.mean(Train[(Train.Year==2018)&(Train.Quarter==4)]["D_Seasonalised_trend"])/np.mean(Train[(Train.Year==2018)&(Train.Quarter==3)]["D_Seasonalised_trend"])

Train["Level_index2"]=np.mean([numer1,numer2])
Train=Train.sort_values(["Date"])


In [60]:
Train

Unnamed: 0,Date,Volume,Quarter,Year,Month,SI_Y,F_SI,D_Seasonalised_trend,Level_index1,Level_index2
0,2017-01-01,1942.21,1,2017,1,0.911636,0.997334,1947.402409,0.841589,1.003384
1,2017-02-01,1749.93,1,2017,2,0.821383,0.934584,1872.416839,0.841589,1.003384
2,2017-03-01,2399.42,1,2017,3,1.126241,1.070109,2242.220419,0.841589,1.003384
3,2017-04-01,2126.85,2,2017,4,0.998302,1.005148,2115.957015,0.841589,1.003384
4,2017-05-01,2242.5,2,2017,5,1.052586,1.056178,2123.221263,0.841589,1.003384
5,2017-06-01,2436.44,2,2017,6,1.143618,1.013425,2404.164082,0.841589,1.003384
6,2017-07-01,2016.88,3,2017,7,0.946684,0.990385,2036.460337,0.841589,1.003384
7,2017-08-01,1755.23,3,2017,8,0.823871,0.837916,2094.756233,0.841589,1.003384
8,2017-09-01,2149.94,3,2017,9,1.00914,1.057225,2033.568875,0.841589,1.003384
9,2017-10-01,2905.47,4,2017,10,1.363771,1.28437,2262.174524,0.841589,1.003384


In [62]:
Train.index=range(len(Train))
Train["ID"]=range(1,(len(Train)+1))

In [64]:
Train["Deleveled_series"]=Train["D_Seasonalised_trend"]*Train["Level_index1"]


In [66]:
lm = linear_model.LinearRegression()
X =Train[["ID"]] # In case of no extra variable in the dataset, remove the extra variable name from the list, then append the line with ".reshape(-1, 1)"
Y = np.array(Train["Deleveled_series"]).reshape(-1,1)

model = lm.fit(X,Y)

In [70]:
Test["ID"]=range(len(Train)+1,len(Train)+10)

In [71]:
Test['ID']

0    25
1    26
2    27
3    28
4    29
5    30
6    31
7    32
8    33
Name: ID, dtype: int32

In [72]:
X_test=np.array(Test[["ID"]]) # In case of no extra variable in the dataset, remove the extra variable name from the list, then append the line with ".reshape(-1, 1)"
Y_test=model.predict(X_test)

In [82]:
Pred1 = Y_test*Train.iloc[0]["Level_index2"]*np.array(Train.iloc[0:len(Y_test)]["F_SI"]).reshape(-1,1)
Test["Predictions"]=Pred1

In [81]:
np.array(Train.iloc[0:len(Y_test)]["F_SI"]).reshape(-1,1)

array([[0.99733367],
       [0.93458356],
       [1.07010889],
       [1.00514802],
       [1.05617819],
       [1.01342501],
       [0.99038511],
       [0.83791611],
       [1.05722507]])

In [83]:
Test["Predictions"]

0    1414.504685
1    1308.434303
2    1478.623767
3    1370.502221
4    1420.786978
5    1344.761723
6    1296.096867
7    1081.256926
8    1344.942673
Name: Predictions, dtype: float64