In [1]:
import pickle
import os
import pandas as pd
import numpy as np

In [3]:
model_path = r"E:\Seoul Bike Sharing Demand Prediction\models\xgboost_regressor_r2_0_949_v1.pkl"
model = pickle.load(open(model_path, "rb"))

In [4]:
data = [
    [ 0.94263713, -1.48002356, -1.13608572,  1.88683081,  0.93525012,
       -0.6164224 , -0.13595994, -0.17477199, -0.22720178,  0.18503645,
        1.39481516, -1.59277468,  0.30766527, -0.57998659, -0.57888812,
        1.72613935, -0.4075004 , -0.41106465,  2.4713367 , -0.40940298,
       -0.41272349, -0.40511677],
    
    [ 1.23308102,  1.0094194 ,  0.57525368, -0.51223995, -0.08720322,
       -0.66267834, -0.13595994, -0.17477199, -0.22720178,  0.18503645,
        1.62255069,  0.42998484,  0.30766527, -0.57998659,  1.72744953,
       -0.5793275 , -0.4075004 , -0.41106465, -0.40463932,  2.44258117,
       -0.41272349, -0.40511677]
]

prediction = model.predict(data)
list(prediction)

[38.903114, 1882.7937]

## Load SC

In [6]:
sc_dump_path = r"E:\Seoul Bike Sharing Demand Prediction\models\sc.pkl"

sc = pickle.load(open(sc_dump_path, "rb"))

In [7]:
sc

## User Input

In [8]:
Date = "20/05/2024"
Hour = 10
Temperature = 24
Humidity = 50
Wind_speed = 68
Visibility = 42
Solar_Radiation = 100
Rainfall = 0.0
Snowfall = 0.2
Seasons = "Spring"
Holiday = "Holiday"
Functioning_Day = "Yes"

## Converting User Data into Model Consumable Format

In [9]:
from datetime import datetime

holiday_dic = {"No Holiday":0, "Holiday":1}
functioning_day = {"No":0, "Yes":1}

def get_string_to_datetime(date):
    dt = datetime.strptime(date, "%d/%m/%Y")
    return {"day": dt.day, "month": dt.month, "year": dt.year, "week_day": dt.strftime("%A")}


str_to_date = get_string_to_datetime(Date)

str_to_date

{'day': 20, 'month': 5, 'year': 2024, 'week_day': 'Monday'}

In [10]:
u_input_list = [Hour, Temperature, Humidity, Wind_speed, Visibility,
                Solar_Radiation, Rainfall, Snowfall,
                holiday_dic[Holiday], functioning_day[Functioning_Day],
                str_to_date["day"], str_to_date["month"], str_to_date["year"]]

features_name = ['Hour', 'Temperature(°C)', 'Humidity(%)',
                 'Wind speed (m/s)', 'Visibility (10m)', 'Solar Radiation (MJ/m2)',
                 'Rainfall(mm)', 'Snowfall (cm)', 'Holiday', 'Functioning Day', 'Day',
                 'Month', 'Year']

df_u_input = pd.DataFrame([u_input_list], columns = features_name)
df_u_input

Unnamed: 0,Hour,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Holiday,Functioning Day,Day,Month,Year
0,10,24,50,68,42,100,0.0,0.2,1,1,20,5,2024


In [11]:
u_input_list

[10, 24, 50, 68, 42, 100, 0.0, 0.2, 1, 1, 20, 5, 2024]

In [12]:
features_name

['Hour',
 'Temperature(°C)',
 'Humidity(%)',
 'Wind speed (m/s)',
 'Visibility (10m)',
 'Solar Radiation (MJ/m2)',
 'Rainfall(mm)',
 'Snowfall (cm)',
 'Holiday',
 'Functioning Day',
 'Day',
 'Month',
 'Year']

In [13]:
def season_to_df(Season):
    seasons_cols = ['Spring', 'Summer', 'Winter']
    seasons_data = np.zeros((1,len(seasons_cols)), dtype = "int")

    df_seasons =  pd.DataFrame(seasons_data, columns = seasons_cols)
    if Seasons in seasons_cols:
        df_seasons[Seasons] = 1
    return df_seasons

df_seasons = season_to_df(Seasons)
df_seasons

Unnamed: 0,Spring,Summer,Winter
0,1,0,0


In [14]:
def days_df(week_day):
    days_names = ['Monday', 'Saturday', 'Sunday', 
                  'Thursday', 'Tuesday', 'Wednesday']
    days_name_data = np.zeros((1, len(days_names)), dtype = "int")
    
    df_days = pd.DataFrame(days_name_data, columns = days_names)
    
    if week_day in days_names:
        df_days[week_day] = 1
    return df_days

df_days = days_df(str_to_date["week_day"])
df_days

Unnamed: 0,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday
0,1,0,0,0,0,0


In [15]:
str_to_date["week_day"]

'Monday'

In [16]:
df_for_pred = pd.concat([df_u_input, df_seasons, df_days], axis = 1)
df_for_pred

Unnamed: 0,Hour,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Holiday,Functioning Day,...,Year,Spring,Summer,Winter,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday
0,10,24,50,68,42,100,0.0,0.2,1,1,...,2024,1,0,0,1,0,0,0,0,0


In [17]:
sc_data_for_pred = sc.transform(df_for_pred)
sc_data_for_pred

array([[ -0.21913842,   0.93423488,  -0.40265455,  63.5909306 ,
         -2.27817468, 114.97716408,  -0.13595994,   0.26525033,
          4.40137399,   0.18503645,   0.48387302,  -0.4369121 ,
         21.65537046,   1.72417781,  -0.57888812,  -0.5793275 ,
          2.45398535,  -0.41106465,  -0.40463932,  -0.40940298,
         -0.41272349,  -0.40511677]])

### Prediction in Deployment

In [18]:
model.predict(sc_data_for_pred)
print(f"Rented Bike Demand on date: {Date}, and Time: {Hour} is : {round(model.predict(sc_data_for_pred).tolist()[0])}")

Rented Bike Demand on date: 20/05/2024, and Time: 10 is : 897
