# Model Prediction

## Imports

In [1]:
import pandas as pd
import pickle
import numpy as np

## Import Files

In [2]:
full_df = pd.read_csv("../../../Data_thesis/Full_Datasets/Full.csv")

In [3]:
full_df.head()

Unnamed: 0,Date,Hour,weekday,is_weekend,Sensor,SensorLongitude,SensorLatitude,CrowdednessCount,LonScaled,LatScaled,...,Nieuwmarkt score,Nieuwmarkt weight,Nieuwezijds Kolk score,Nieuwezijds Kolk weight,Dam score,Dam weight,Spui score,Spui weight,Centraal Station score,Centraal Station weight
0,2018-03-11,100,6.0,1.0,GAWW-04,4.897908,52.373283,886,-0.558826,0.035969,...,0.0,0.978944,0.0,0.616678,-32.179221,-0.31242,-0.0,-1.140386,297.734037,0.629459
1,2018-03-11,2100,6.0,1.0,GAWW-07,4.900441,52.374414,1603,1.966352,1.066966,...,396.612265,1.142975,5.596253,0.028122,-2008.443214,-1.585196,-370.823855,-2.767342,4117.774978,1.066781
2,2018-03-11,2100,6.0,1.0,GAWW-08,4.897193,52.37165,21,-1.271546,-1.4525,...,306.731105,0.883951,85.461513,0.429455,-107.617256,-0.084939,-71.079754,-0.530446,262.90113,0.068109
3,2018-03-11,2100,6.0,1.0,GAWW-09,4.898479,52.37504,88,0.011331,1.637575,...,299.730494,0.863777,135.285034,0.679824,-832.481652,-0.657049,-252.977149,-1.887889,3940.747665,1.020919
4,2018-03-11,2100,6.0,1.0,GAWW-10,4.898808,52.372369,49,0.338729,-0.797289,...,402.696352,1.160508,44.878993,0.225523,-918.302668,-0.724785,-187.014683,-1.395632,2000.274786,0.518206


## Predefine Data

In [5]:
sensors = full_df["Sensor"].unique()
stations = ["Nieuwmarkt", "Nieuwezijds Kolk", "Dam", "Spui", "Centraal Station"]

sensor_dict = {}
station_dict = {}

for sensor in sensors:
    sensor_dict[sensor] = {"Longitude": full_df[full_df["Sensor"] == sensor].reset_index()["LonScaled"][0],
                          "Latitude": full_df[full_df["Sensor"] == sensor].reset_index()["LatScaled"][0]}
    temp_dict = {}
    for station in stations:
        temp_dict[station] = {"Weight": full_df[full_df["Sensor"] == sensor].reset_index()[station + " weight"][0],
                                "Score": full_df[full_df["Sensor"] == sensor].reset_index()[station + " score"].mean()}
        
    station_dict[sensor] = temp_dict

In [18]:
sensor_dict

{'GAWW-04': {'Longitude': -0.5588256618785936,
  'Latitude': 0.035968802532961634},
 'GAWW-07': {'Longitude': 1.9663517078482349, 'Latitude': 1.066966110212915},
 'GAWW-08': {'Longitude': -1.2715459830985292,
  'Latitude': -1.4524999429364545},
 'GAWW-09': {'Longitude': 0.01133065607550914, 'Latitude': 1.6375746652398655},
 'GAWW-10': {'Longitude': 0.33872938805078523,
  'Latitude': -0.7972892567308134},
 'GAWW-01': {'Longitude': 1.3747609418499522, 'Latitude': 1.2371731995615205},
 'GAWW-02': {'Longitude': 0.4332403496554872, 'Latitude': 0.4942607351257707},
 'GAWW-03': {'Longitude': -1.1310755770840049,
  'Latitude': -0.6562553160629516},
 'GAWW-05': {'Longitude': -0.7492433165897556,
  'Latitude': -0.8115111667116233},
 'GAWW-06': {'Longitude': -0.41376928093339016,
  'Latitude': -0.7544411946005058}}

## Import Models

### Regression

In [3]:
#RFG Model
rfg_filename = "../../../Data_thesis/Models/rfg_model.sav"

#XGB Regressor
xgbr_filename = "../../../Data_thesis/Models/xgbr_model.sav"

In [4]:
rfg = pickle.load(open(rfg_filename, 'rb'))
xgbr = pickle.load(open(xgbr_filename, 'rb'))

### Classification

In [10]:
#RFG Model
rfc_filename = "../../../Data_thesis/Models/rfc_model.sav"

#XGB Regressor
xgbc_filename = "../../../Data_thesis/Models/xgbc_model.sav"

In [11]:
rfc = pickle.load(open(rfc_filename, 'rb'))
xgbc = pickle.load(open(xgbc_filename, 'rb'))

## Functions

In [8]:
def TransformDate(date):
    date = pd.Timestamp.strptime(date, "%d-%m-%Y")
    
    weekday = date.weekday()
    
    if weekday == 5 or weekday == 6:
        is_weekend = 1
    else:
        is_weekend = 0
    
    print("Date: ", date)
    print("Day: ", weekday)
    print("Is Weekend: ", is_weekend)
    print("Timestuff: ", TransformTime(date))

In [10]:
def TransformTime(date):
    
    month_sin = np.sin(2 * np.pi * date.month / 12)
    month_cos = np.cos(2 * np.pi * date.month / 12)
    
    day_sin = np.sin(2 * np.pi * date.day / 365)
    day_cos = np.cos(2 * np.pi * date.day / 365)
    
    hour_sin = []
    hour_cos = []
    
    for hour in range(100, 2401, 100):
        hour_sin.append(np.sin(2 * np.pi * hour / 2400))
        hour_cos.append(np.cos(2 * np.pi * hour / 2400))
        
    return {"Month Sin": month_sin, "Month Cos": month_cos, "Day Sin": day_sin, 
          "Day Cos": day_cos, "Hour Sin": hour_sin, "Hour Cos": hour_cos}

In [26]:
def SelectSensor(sensor):
    
    for k, v in sensor_dict[sensor].items():
        print(v["Longitude"])

## Test Functions

In [11]:
date = input("Input the date in dd-mm-yyyy format: ")

TransformDate(date)

Input the date in dd-mm-yyyy format: 11-10-1996
Date:  1996-10-11 00:00:00
Day:  4
Is Weekend:  0
Timestuff:  {'Month Sin': -0.8660254037844386, 'Month Cos': 0.5000000000000001, 'Day Sin': 0.1882267098432442, 'Day Cos': 0.9821256058680006, 'Hour Sin': [0.2588190451025208, 0.5, 0.7071067811865476, 0.8660254037844387, 0.9659258262890682, 1.0, 0.9659258262890683, 0.8660254037844386, 0.7071067811865476, 0.5000000000000003, 0.2588190451025206, 1.2246467991473532e-16, -0.25881904510252035, -0.4999999999999997, -0.7071067811865475, -0.8660254037844388, -0.9659258262890681, -1.0, -0.9659258262890682, -0.866025403784439, -0.7071067811865477, -0.49999999999999967, -0.25881904510252157, -2.4492935982947064e-16], 'Hour Cos': [0.9659258262890683, 0.8660254037844386, 0.7071067811865476, 0.4999999999999999, 0.25881904510252096, 6.123233995736766e-17, -0.25881904510252063, -0.5000000000000002, -0.7071067811865475, -0.8660254037844385, -0.9659258262890683, -1.0, -0.9659258262890684, -0.8660254037844388

In [27]:
# sensor = input("What sensor would you like to predict crowdedness for: ")

sensor = "GAWW-04"

SelectSensor(sensor)

IndexError: invalid index to scalar variable.