# COURSE: MODEL ENGINEERING DLMDSME01

## CASE STUDY: AUTOMATION OF STANDBY DUTY PLANNING 
## FOR RESCUE DRIVERS VIA A FORECASTING MODEL
## Testing the model with randum generated data

## Author: Husam Romman

### Import Libraries

In [2]:
from warnings import filterwarnings
filterwarnings('ignore')

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from pathlib import Path  
import datetime 

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import plot_importance, plot_tree


from sklearn.model_selection import GridSearchCV
import xgboost as xgb

sns.set_style("whitegrid")
plt.rcParams["figure.figsize"]=(20,10) # for graphs styling
plt.style.use("tableau-colorblind10") # for graph stying
%matplotlib inline

### create random data for n_sick   calls   actual_duty


In [8]:
# import the  data to generate randum data
data = data = pd.read_csv('Data/sickness_table_addedFeatures_final.csv')

In [89]:
import random
from datetime import date,timedelta
randCalls =[]
randN_sick=[]
randActualDuty=[]
randStb_need=[]
hol =[]



for i in range(1,93):
    
    rc = random.choice(range(int(data['calls'].min()),int(data['calls'].max())) )
    randCalls.append(rc)
    rs = random.choice(range(int(data['n_sick'].min()),int(data['n_sick'].max())) )
    randN_sick.append(rs)
    rad = random.choice(range(int(data['actual_duty'].min()),int(data['actual_duty'].max())) )
    randActualDuty.append(rad)
    ras = random.choice(range(int(data['sby_need'].min()),int(data['sby_need'].max())) )
    randStb_need.append(ras)
    rah = hol.append(0)


def create_future_dates():
    
    start_dt = date(2019,5,28)
    end_dt = date(2019,8,27)
    delta = timedelta(days=1)
    dates =[]
    while start_dt <= end_dt:
        # add current date to list by converting  it to iso format
        dates.append(start_dt.isoformat())
        # increment start date by timedelta
        start_dt += delta
    return dates
    
rand_df= pd.DataFrame({'date': create_future_dates(),
                       'n_sick' : randN_sick,
                       'calls' : randCalls,
                       'actual_duty': randActualDuty,
                      'sby_need':randStb_need,
                      'holidays_de':hol},
                     columns=['date','n_sick','calls','actual_duty','holidays_de','sby_need'])



rand_df['date'] = rand_df['date'].apply( lambda x : pd.Timestamp(x))
rand_df.set_index('date', inplace = True)
rand_df.head()

Unnamed: 0_level_0,n_sick,calls,actual_duty,holidays_de,sby_need
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-05-28,109,6647,2131,0,39
2019-05-29,105,9039,2125,0,282
2019-05-30,68,8989,1820,0,389
2019-05-31,74,8438,2080,0,325
2019-06-01,96,4092,2210,0,100


In [90]:
def create_features2(data, label=None):
    rand_df['date'] = pd.to_datetime(rand_df.index)
    rand_df['year'] = (rand_df['date'].dt.year)
    rand_df['month'] = (rand_df['date'].dt.month)
    rand_df['week'] = (rand_df['date'].dt.week)
    rand_df['day'] = (rand_df['date'].dt.day)
    #df['DayOfWeek']=df['date'].dt.day_name()
    rand_df['NumDayOfWeek']=(rand_df['date'].dt.dayofweek)
    rand_df['qaurter']=(rand_df['date'].dt.quarter).astype(int)
    rand_df['percantage']= (rand_df.sby_need / 90) * 100  # took it from data df
    
    X = rand_df[['n_sick', 'calls', 'actual_duty', 'year', 'month', 'week',
       'day', 'NumDayOfWeek', 'qaurter' , 'percantage', 'holidays_de']]
  
    if label:
        y = rand_df[label]
        return X, y
    return X

In [91]:
# save data
filepath = Path('Data/sickness_random_future_final.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
rand_df.to_csv(filepath) 

In [92]:
test = pd.read_csv('Data/sickness_random_future_final.csv', parse_dates=[0], index_col=[0])
pred_x, pred_y = create_features2(rand_df, label='sby_need')

In [93]:
reg_new = xgb.XGBRegressor()
reg_new.load_model('json/model_final.json')

In [94]:
test['xgb_Prediction'] = reg_new.predict(pred_x)

test.drop([ 'n_sick', 'calls', 'actual_duty',  'holidays_de' ], axis=1, inplace=True)


test.head(30)

Unnamed: 0_level_0,sby_need,xgb_Prediction
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-05-28,39,40.551697
2019-05-29,282,283.728638
2019-05-30,389,358.479248
2019-05-31,325,326.346008
2019-06-01,100,102.785347
2019-06-02,331,328.101562
2019-06-03,501,441.09613
2019-06-04,427,438.483551
2019-06-05,304,307.699677
2019-06-06,4,3.776296


In [95]:
print(r2_score(rand_df.sby_need, reg_new.predict(pred_x)))
print(mean_absolute_error(rand_df.sby_need, reg_new.predict(pred_x)))
print(np.sqrt(mean_squared_error(rand_df.sby_need, reg_new.predict(pred_x))))

0.9814346813727969
12.075854279531125
21.60717892440835


In [96]:
# save data
filepath = Path('Data/future_predictions_final.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
test.to_csv(filepath) 