# Forecasting

In [19]:
import warnings
warnings.filterwarnings('ignore')
import pickle

#Preparing, cleaning, analyzing
import pandas as pd
import numpy as np


## Data Preparation

In [3]:
df = pd.read_csv('./data/Alko_Insg.csv', index_col='date')

In [4]:
df.head()

Unnamed: 0_level_0,year,month,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-01,2000,1,78
2000-02-01,2000,2,53
2000-03-01,2000,3,73
2000-04-01,2000,4,78
2000-05-01,2000,5,96


In [11]:
#Model loading
from tensorflow.keras.models import load_model
model = load_model('best_model.h5')


In [20]:
#Pickle scaler for features and value loading

# Open the file in binary mode
with open('./scalers/f_transformer.pkl', 'rb') as file: 
    # Call load method to deserialze
    f_transformer = pickle.load(file)

# Open the file in binary mode
with open('./scalers/cnt_transformer.pkl', 'rb') as file: 
    # Call load method to deserialze
    cnt_transformer = pickle.load(file)    
    

## Inference

In [12]:
from pydantic import BaseModel, validator

class input_item(BaseModel):
    year: int
    month: int

    @validator('month', 'year')
    def year_must_be_from1_to12(cls, month):
        """
        year: is the intended year to forecast in
        month: is the intended month to forecast in
        """
        if month not in [1,2,3,4,5,6,7,8,9,10,11,12]:
            raise ValueError("We expect the month from 1 to 12, but you entered {value}",month)
        else:
            pass
        return month

    def year_must_be_2010_or_less(cls, year, month):
        """"
        Check the value of year and month, if they are outside this range the model can't predict without 12
        previous steps.
        """
        if year not in [2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2009
            ,2008,2007,2006,2005,2004,2003,2002,2001,2000]:
            raise ValueError("We expect the year from 2021 to 2000, but you entered {value}",year)

        elif year == 2021:
            if month != 1:
                raise ValueError("We can't expect after that month 1 in year 2021, but you entered {month}", month)
        else:
            pass

        return year , month


In [13]:
df

Unnamed: 0_level_0,year,month,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-01,2000,1,78
2000-02-01,2000,2,53
2000-03-01,2000,3,73
2000-04-01,2000,4,78
2000-05-01,2000,5,96
...,...,...,...
2020-08-01,2020,8,46
2020-09-01,2020,9,46
2020-10-01,2020,10,34
2020-11-01,2020,11,23


In [14]:
def creat_ds(x,y, time_step=1):
    #Create x series and y series to hold sequences
    xs, ys = [], []
    
    for i in range(len(x) - time_step):
        #Extract the sequence
        v = x.iloc[i: (i+time_step)].to_numpy()
        #append it into x series
        xs.append(v)
        
        #Repeat all above for y series
        ys.append(y.iloc[i+time_step])
    return np.array(xs), np.array(ys)   


In [120]:
Time_Step = 12
import datetime


def predict():
    input_item = (2020,5)
    f_columns = ['year', 'month']

    input_df = pd.DataFrame()
    (year, month) = input_item
    input_date = datetime.datetime(year, month, 1)
    df.index = pd.to_datetime(df.index)
    input_df = df[df.index < input_date].tail(12)
    input_df.loc[len(input_df.index)] = [year, month, 0]
    input_df.rename(index={12:input_date},inplace=True)

    #Scaling features
    input_df.loc[:, f_columns] = f_transformer.transform(input_df[f_columns].to_numpy())
    input_df['value'] = cnt_transformer.transform(input_df[['value']])

        
    xs_inf, ys_inf = creat_ds(input_df, input_df.value, time_step=Time_Step)
    y_predict_inf = model.predict(xs_inf)
    y_pred_inv = cnt_transformer.inverse_transform(y_predict_inf)
    print('Pridicted Value ', np.ceil(y_pred_inv[0][0]))
    if df[df.index==input_date].shape[0] !=0:
        rea_value = df[df.index==input_date]
        print('Real value ',rea_value.value[0])

In [121]:
df

Unnamed: 0_level_0,year,month,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-01,2000,1,78
2000-02-01,2000,2,53
2000-03-01,2000,3,73
2000-04-01,2000,4,78
2000-05-01,2000,5,96
...,...,...,...
2020-08-01,2020,8,46
2020-09-01,2020,9,46
2020-10-01,2020,10,34
2020-11-01,2020,11,23


In [122]:
predict()

Pridicted Value  33.0
Real value  40


- https://curiousily.com/posts/demand-prediction-with-lstms-using-tensorflow-2-and-keras-in-python/
- https://www.youtube.com/watch?v=xaIA83x5Icg
- https://www.youtube.com/watch?v=uw6zYLbCGkY