# Model Prediction

## Imports

In [1]:
import pandas as pd
import pickle
import numpy as np
import plotly
from bubbly import bubbleplot

from sklearn.metrics.pairwise import rbf_kernel

import ipywidgets as widgets
from ipywidgets import interact_manual, ToggleButtons
from IPython.display import display

## Import Files

In [2]:
full_df = pd.read_csv("../../../Data_thesis/Full_Datasets/Full.csv")
passenger_df = pd.read_csv("../../../Data_thesis/Full_Datasets/TimeSeriesGVB.csv")

In [3]:
full_df.head()

Unnamed: 0,Date,Hour,Nieuwmarkt Lat,Nieuwmarkt Lon,Nieuwezijds Kolk Lat,Nieuwezijds Kolk Lon,Dam Lat,Dam Lon,Spui Lat,Spui Lon,...,Nieuwmarkt score,Nieuwmarkt weight,Nieuwezijds Kolk score,Nieuwezijds Kolk weight,Dam score,Dam weight,Spui score,Spui weight,Centraal Station score,Centraal Station weight
0,2018-03-11,100,52.371942,4.901239,52.376288,4.893731,52.373127,4.89008,52.369097,4.889259,...,0.0,0.882141,0.0,0.587521,-7.303229,-0.070905,-0.0,-0.681885,163.979433,0.34668
1,2018-03-11,2100,52.371942,4.901239,52.376288,4.893731,52.373127,4.89008,52.369097,4.889259,...,352.242352,1.015108,83.818349,0.421198,-448.251607,-0.35379,-130.349894,-0.97276,1668.972876,0.432376
2,2018-03-11,2100,52.371942,4.901239,52.376288,4.893731,52.373127,4.89008,52.369097,4.889259,...,384.307874,1.107515,8.440208,0.042413,-1899.118956,-1.49891,-352.192566,-2.628303,3994.026557,1.034722
3,2018-03-11,2200,52.371942,4.901239,52.376288,4.893731,52.373127,4.89008,52.369097,4.889259,...,342.287656,1.049962,42.31473,0.253382,-1231.740804,-1.207589,-279.71009,-2.350505,3446.831547,1.043862
4,2018-03-11,2200,52.371942,4.901239,52.376288,4.893731,52.373127,4.89008,52.369097,4.889259,...,343.52431,1.053756,73.166332,0.438122,-730.059883,-0.715745,-196.552679,-1.651703,2770.30116,0.838977


In [18]:
full_df.columns

Index(['Date', 'Hour', 'Nieuwmarkt Lat', 'Nieuwmarkt Lon',
       'Nieuwezijds Kolk Lat', 'Nieuwezijds Kolk Lon', 'Dam Lat', 'Dam Lon',
       'Spui Lat', 'Spui Lon', 'Centraal Station Lat', 'Centraal Station Lon',
       'weekday', 'is_weekend', 'Sensor', 'SensorLongitude', 'SensorLatitude',
       'CrowdednessCount', 'LonScaled', 'LatScaled', 'is_event', 'Year',
       'month_sin', 'month_cos', 'day_sin', 'day_cos', 'hour_sin', 'hour_cos',
       'Nieuwmarkt score', 'Nieuwmarkt weight', 'Nieuwezijds Kolk score',
       'Nieuwezijds Kolk weight', 'Dam score', 'Dam weight', 'Spui score',
       'Spui weight', 'Centraal Station score', 'Centraal Station weight'],
      dtype='object')

## Functions

In [4]:
def TransformDate(date):
    
    weekday = date.weekday()
    
    if weekday == 5 or weekday == 6:
        is_weekend = 1
    else:
        is_weekend = 0
        
    time = TransformTime(date)
        
    return weekday, is_weekend, time

In [5]:
def TransformTime(date):
    
    month_sin = np.sin(2 * np.pi * date.month / 12)
    month_cos = np.cos(2 * np.pi * date.month / 12)
    
    day_sin = np.sin(2 * np.pi * date.day / 365)
    day_cos = np.cos(2 * np.pi * date.day / 365)
    
    hour_sin = []
    hour_cos = []
    hour_list = []
    
    for hour in range(100, 2401, 100):
        hour_sin.append(np.sin(2 * np.pi * hour / 2400))
        hour_cos.append(np.cos(2 * np.pi * hour / 2400))
        hour_list.append(hour)
        
    return {"Month Sin": month_sin, "Month Cos": month_cos, "Day Sin": day_sin, 
          "Day Cos": day_cos, "Hour Sin": hour_sin, "Hour Cos": hour_cos, "Hour": hour_list}

In [88]:
def SelectSensor(weekday, sensor, stations, sensor_dict, station_dict, lat_scaler, lon_scaler, station_scaler):
    
    lon_scaled = lon_scaler.transform(sensor_dict[sensor]["Longitude"].reshape(-1, 1))[0, 0]
    lat_scaled = lat_scaler.transform(sensor_dict[sensor]["Latitude"].reshape(-1, 1))[0, 0]
    
    y = np.array(sensor_dict[sensor]["Latitude"], sensor_dict[sensor]["Longitude"]).reshape(1, -1)
    weights_dict = {}
    
    for station in stations:
        
        passengers = passenger_df[(passenger_df["Station"] == station) & (passenger_df["weekday"] == weekday)].reset_index()["Passengers"][0]
        
        x = np.array(station_dict[station]["Latitude"], station_dict[station]["Longitude"]).reshape(1, -1)
        
        weight = rbf_kernel(x, y)[0, 0]
        
        weights_dict[station] = {"Score": weight, "Weight": weight * passengers}
    
    return lon_scaled, lat_scaled, weights_dict

In [87]:
def combineData(dates, sensors, sensor_dict, station_dict, stations, lat_scaler, lon_scaler, station_scaler):
    
    input_dict = {}
        
    if sensors.size == 1:
        j = 0
        sensor = np.array2string(sensors).replace("'", "")
        
        if len(dates) == 1:
        
            weekday, is_weekend, time = TransformDate(dates)
            
            sensor_lon, sensor_lat, weights_dict = SelectSensor(weekday, sensor, stations, sensor_dict, 
                                                              station_dict, lat_scaler, lon_scaler, station_scaler)

            for i in range(len(time["Hour Sin"])):
                input_dict[j] = {"weekday": weekday, "is_weekend": is_weekend, "LonScaled": sensor_lon,
                                "LatScaled": sensor_lat, "is_event": 0.0, "month_sin": time["Month Sin"],
                                "month_cos": time["Month Cos"], "day_sin": time["Day Sin"], 
                                 "day_cos": time["Day Cos"], "hour_sin": time["Hour Sin"][i], 
                                 "hour_cos": time["Hour Cos"][i], "hour": time["Hour"][i], 
                                 "Nieuwmarkt score": weights_dict["Nieuwmarkt"]["Score"],
                                "Nieuwmarkt weight": weights_dict["Nieuwmarkt"]["Weight"],
                                "Nieuwezijds Kolk score": weights_dict["Nieuwezijds Kolk"]["Score"],
                                "Nieuwezijds Kolk weight": weights_dict["Nieuwezijds Kolk"]["Weight"],
                                "Dam score": weights_dict["Dam"]["Score"],
                                "Dam weight": weights_dict["Dam"]["Weight"],
                                "Spui score": weights_dict["Spui"]["Score"],
                                "Spui weight": weights_dict["Spui"]["Weight"],
                                "Centraal Station score": weights_dict["Centraal Station"]["Score"],
                                "Centraal Station weight": weights_dict["Centraal Station"]["Weight"],
                                "Sensor": sensor, "Date": dates, "SensorLongitude": sensor_dict[sensor]["Longitude"],
                                 "SensorLatitude": sensor_dict[sensor]["Latitude"]}

                j += 1
        
        else:
            
            for date in dates:
                
                weekday, is_weekend, time = TransformDate(date)
            
                sensor_lon, sensor_lat, weights_dict = SelectSensor(weekday, sensor, stations, sensor_dict, 
                                                                  station_dict, lat_scaler, lon_scaler, station_scaler)

                for i in range(len(time["Hour Sin"])):
                    input_dict[j] = {"weekday": weekday, "is_weekend": is_weekend, "LonScaled": sensor_lon,
                                    "LatScaled": sensor_lat, "is_event": 0.0, "month_sin": time["Month Sin"],
                                    "month_cos": time["Month Cos"], "day_sin": time["Day Sin"], 
                                     "day_cos": time["Day Cos"], "hour_sin": time["Hour Sin"][i], 
                                     "hour_cos": time["Hour Cos"][i], "hour": time["Hour"][i], 
                                     "Nieuwmarkt score": weights_dict["Nieuwmarkt"]["Score"],
                                    "Nieuwmarkt weight": weights_dict["Nieuwmarkt"]["Weight"],
                                    "Nieuwezijds Kolk score": weights_dict["Nieuwezijds Kolk"]["Score"],
                                    "Nieuwezijds Kolk weight": weights_dict["Nieuwezijds Kolk"]["Weight"],
                                    "Dam score": weights_dict["Dam"]["Score"],
                                    "Dam weight": weights_dict["Dam"]["Weight"],
                                    "Spui score": weights_dict["Spui"]["Score"],
                                    "Spui weight": weights_dict["Spui"]["Weight"],
                                    "Centraal Station score": weights_dict["Centraal Station"]["Score"],
                                    "Centraal Station weight": weights_dict["Centraal Station"]["Weight"],
                                    "Sensor": sensor, "Date": date, "SensorLongitude": sensor_dict[sensor]["Longitude"],
                                     "SensorLatitude": sensor_dict[sensor]["Latitude"]}

                    j += 1
                
    else:
        j = 0
        for sensor in sensors:

            if len(dates) == 1:
        
                weekday, is_weekend, time = TransformDate(dates)
            
                sensor_lon, sensor_lat, weights_dict = SelectSensor(weekday, sensor, stations, sensor_dict, 
                                                                  station_dict, lat_scaler, lon_scaler, station_scaler)

                for i in range(len(time["Hour Sin"])):
                    input_dict[j] = {"weekday": weekday, "is_weekend": is_weekend, "LonScaled": sensor_lon,
                                    "LatScaled": sensor_lat, "is_event": 0.0, "month_sin": time["Month Sin"],
                                    "month_cos": time["Month Cos"], "day_sin": time["Day Sin"], 
                                     "day_cos": time["Day Cos"], "hour_sin": time["Hour Sin"][i], 
                                     "hour_cos": time["Hour Cos"][i], "hour": time["Hour"][i], 
                                     "Nieuwmarkt score": weights_dict["Nieuwmarkt"]["Score"],
                                    "Nieuwmarkt weight": weights_dict["Nieuwmarkt"]["Weight"],
                                    "Nieuwezijds Kolk score": weights_dict["Nieuwezijds Kolk"]["Score"],
                                    "Nieuwezijds Kolk weight": weights_dict["Nieuwezijds Kolk"]["Weight"],
                                    "Dam score": weights_dict["Dam"]["Score"],
                                    "Dam weight": weights_dict["Dam"]["Weight"],
                                    "Spui score": weights_dict["Spui"]["Score"],
                                    "Spui weight": weights_dict["Spui"]["Weight"],
                                    "Centraal Station score": weights_dict["Centraal Station"]["Score"],
                                    "Centraal Station weight": weights_dict["Centraal Station"]["Weight"],
                                    "Sensor": sensor, "Date": dates, "SensorLongitude": sensor_dict[sensor]["Longitude"],
                                     "SensorLatitude": sensor_dict[sensor]["Latitude"]}

                    j += 1

            else:

                for date in dates:
                    
                    weekday, is_weekend, time = TransformDate(date)
            
                    sensor_lon, sensor_lat, weights_dict = SelectSensor(weekday, sensor, stations, sensor_dict, 
                                                                      station_dict, lat_scaler, lon_scaler, station_scaler)

                    for i in range(len(time["Hour Sin"])):
                        input_dict[j] = {"weekday": weekday, "is_weekend": is_weekend, "LonScaled": sensor_lon,
                                        "LatScaled": sensor_lat, "is_event": 0.0, "month_sin": time["Month Sin"],
                                        "month_cos": time["Month Cos"], "day_sin": time["Day Sin"], 
                                         "day_cos": time["Day Cos"], "hour_sin": time["Hour Sin"][i], 
                                         "hour_cos": time["Hour Cos"][i], "hour": time["Hour"][i], 
                                         "Nieuwmarkt score": weights_dict["Nieuwmarkt"]["Score"],
                                        "Nieuwmarkt weight": weights_dict["Nieuwmarkt"]["Weight"],
                                        "Nieuwezijds Kolk score": weights_dict["Nieuwezijds Kolk"]["Score"],
                                        "Nieuwezijds Kolk weight": weights_dict["Nieuwezijds Kolk"]["Weight"],
                                        "Dam score": weights_dict["Dam"]["Score"],
                                        "Dam weight": weights_dict["Dam"]["Weight"],
                                        "Spui score": weights_dict["Spui"]["Score"],
                                        "Spui weight": weights_dict["Spui"]["Weight"],
                                        "Centraal Station score": weights_dict["Centraal Station"]["Score"],
                                        "Centraal Station weight": weights_dict["Centraal Station"]["Weight"],
                                        "Sensor": sensor, "Date": date, "SensorLongitude": sensor_dict[sensor]["Longitude"],
                                         "SensorLatitude": sensor_dict[sensor]["Latitude"]}

                        j += 1
        
    
    return pd.DataFrame.from_dict(input_dict, orient="index")

In [8]:
def generateDates(start_date, end_date):

    dates = []
    delta = end_date - start_date 

    for i in range(delta.days):   
        dates.append(start_date + pd.Timedelta(i, unit="D"))
        
    return dates

In [9]:
def plotTimeSeries(df, date):
    
    #Variables
    x_column = 'SensorLongitude'
    y_column = 'SensorLatitude'
    bubble_column = 'Sensor'
    time_column = 'Hour'
    size_column = 'CrowdednessCount'
    str_date = pd.Timestamp.strftime(date, format="%Y-%m-%d")

    #Initialize grid
    grid = pd.DataFrame()
    
    figure = bubbleplot(dataset=df, x_column=x_column, y_column=y_column, 
        bubble_column=bubble_column, size_column=size_column, time_column=time_column, color_column=bubble_column,
        x_title="Sensor Longitude", y_title="Sensor Latitude", title='Crowdedness Counts Amsterdam - ' + str_date,
        x_logscale=False, scale_bubble=3, height=650, x_range=[min(df[x_column])-0.001, max(df[x_column])+0.001],
                       y_range=[min(df[y_column])-0.001, max(df[y_column])+0.001])

    plotly.offline.plot(figure, filename="../../../Data_thesis/Full_Datasets/Plots/{0}_plot.html".format(str_date),
                       auto_open=False)

In [10]:
def prediction(start_date, end_date, sensors, model, sensor_dict, station_dict, stations, lat_scaler, lon_scaler, 
               station_scaler, progress, xgbr_model):
    
    predict_dict = {}
    
    dates = generateDates(start_date, end_date)  
    progress.value += 1
    df = combineData(dates, sensors, sensor_dict, station_dict, stations, lat_scaler, lon_scaler, station_scaler)
    progress.value += 1
    input_df = df.drop(columns={"hour", "Sensor", "Date", "SensorLongitude", "SensorLatitude"}).copy()
    
    predict_dict["Date"] = df["Date"].copy()
    predict_dict["Hour"] = df["hour"].copy()
    predict_dict["Sensor"] = df["Sensor"].copy()
    predict_dict["SensorLongitude"] = df["SensorLongitude"].copy()
    predict_dict["SensorLatitude"] = df["SensorLatitude"].copy()
    
    if xgbr_model:
        predict_dict["CrowdednessCount"] = model.predict(input_df.values).astype(int)
        predict_dict["CrowdednessCount"][predict_dict["CrowdednessCount"] < 0] = 0        
    else:
        predict_dict["CrowdednessCount"] = model.predict(input_df).astype(int)
    
    predict_df = pd.DataFrame.from_dict(predict_dict)
    
    progress.value += 1
        
    for date in dates:
            
        series_df = predict_df[predict_df["Date"] == date].copy()
        series_df.replace(2400, 0, inplace=True)
        series_df.sort_values(by=["Hour", "Sensor"],inplace=True)
        plotTimeSeries(series_df.drop(columns={"Date"}), date)
    
    return predict_df, progress

In [83]:
def defineCoordinates(sensors, stations,add_sensors, extra_cor, extra_lon, extra_lat):

    sensor_dict = {}
    station_dict = {}
    
    if add_sensors == True:

        for sensor in sensors:
            if sensor != "Custom":
                sensor_dict[sensor] = {"Longitude": full_df[full_df["Sensor"] == sensor].reset_index()["SensorLongitude"][0],
                                      "Latitude": full_df[full_df["Sensor"] == sensor].reset_index()["SensorLatitude"][0]}
        
    if extra_cor == True:
        sensor_dict["Custom"] = {"Longitude": np.float64(extra_lon), "Latitude": np.float64(extra_lat)}

    for station in stations:
        station_dict[station] = {"Longitude": full_df[station + " Lon"][0],"Latitude": full_df[station + " Lat"][0]}
        
    return sensor_dict, station_dict

In [12]:
def importModels(model):
    
    #RFG Modelen(xgbc_filename, 'rb'))
        
    lat_scaler = pickle.load(open(lat_filename, 'rb'))
    lon_scaler = pickle.load(open(lon_filename, 'rb'))
    station_scaler = pickle.load(open(station_filename, 'rb'))
    
    return model, lat_scaler, lon_scaler, station_scaler, xgbr_model

In [22]:
def minMaxCoordinates(df):
    
    lon_max = df["SensorLongitude"].max()
    lon_min = df["SensorLongitude"].min()    
    lat_max = df["SensorLatitude"].max()    
    lat_min = df["SensorLatitude"].min()    
    
    return lon_max, lon_min, lat_max, lat_min

## Test Functions

In [77]:
interact_pred=interact_manual.options(manual_name="Generate Prediction")
lon_max, lon_min, lat_max, lat_min = minMaxCoordinates(full_df)

@interact_pred
def generatePredictions(model=widgets.Dropdown(options={"Random Forrest Regressor": "rfg", "XGB Regressor": "xgbr", 
                                                        "Random Forrest Classifier": "rfc","XGB Classifier": "xgbc"},
                                              description="Model"),
                        start_date=widgets.DatePicker(value=pd.to_datetime('2019-01-01'), description="Start Date"),
                        end_date=widgets.DatePicker(value=pd.to_datetime('2019-01-05'), description="End Date"),
                        add_sensors=widgets.Checkbox(value=True, description='Add sensors',disabled=False),
                       extra_coordinate=widgets.Checkbox(value=False, description='Add a custom coordinate',disabled=False),
                        extra_lon = widgets.FloatSlider(min=lon_min, max=lon_max, step=0.0000001, description='Longitude:',
                                                       readout=True, readout_format='.5f'),
                       extra_lat = widgets.FloatSlider(min=lat_min, max=lat_max, step=0.0000001, description='Latitude:',
                                                      readout=True, readout_format='.5f')):
    
    progress = widgets.IntProgress(min=0,max=8,step=1,description='Generating Prediction:',
                                   bar_style='', # 'success', 'info', 'warning', 'danger' or ''
                                    orientation='horizontal'
                                )
    
    display(progress)
    
    if add_sensors ==  False and extra_coordinate == False:
        print("At least, either the custom coordinates or the sensors have to be added")
    else:
    
        if add_sensors ==  True:
            sensors = full_df["Sensor"].unique()
            
            if extra_coordinate == True:
                sensors = np.append("Custom", sensors)
        else:
            sensors = np.array("Custom")

        stations = ["Nieuwmarkt", "Nieuwezijds Kolk", "Dam", "Spui", "Centraal Station"]

        progress.value += 1

        model, lat_scaler, lon_scaler, station_scaler, xgbr_model = importModels(model)

        progress.value += 1

        sensor_dict, station_dict = defineCoordinates(sensors, stations, add_sensors, extra_coordinate, extra_lon, extra_lat)

        progress.value += 1


        df,progress = prediction(start_date, end_date, sensors, model, sensor_dict, station_dict, stations, lat_scaler, lon_scaler, 
                        station_scaler, progress, xgbr_model) 

        progress.value += 1

        df.to_csv("../../../Data_thesis/Full_Datasets/Predictions.csv", index=False)
        progress.value += 1

        return df.head()

interactive(children=(Dropdown(description='Model', options={'Random Forrest Regressor': 'rfg', 'XGB Regressor…