In [5]:
#imports
import pandas as pd
import pickle
from model_fitting import fit_models
from prediction_data import get_prediction_data
from predictor import get_predictions

Let's check the MSE for the models for each station

In [6]:
#fit the models

fit_models(data_path='../data/finalized_data.csv', save_path='../data/models.pkl')

#load data

with open('../data/models.pkl', 'rb') as f:
    models = pickle.load(f)

models = pd.DataFrame.from_dict(models, orient='index')

models['Departures'] = models['Departures'].apply(lambda x: x[1])
models['Returns'] = models['Returns'].apply(lambda x: x[1])

models.rename(columns={'Departures': 'Departure MSE', 'Returns': 'Return MSE'}, inplace=True)

display(models)

Unnamed: 0,Departure MSE,Return MSE
Mamsellimyllynkatu,14.532433,14.458946
Intiankatu,197.756150,201.424567
Mäkelänkatu,354.089570,156.245241
Rautatientori / länsi,2118.688248,2040.891026
Kansallismuseo,235.045344,238.286311
...,...,...
Maununneva,5.817876,5.884992
Pirkkolan liikuntapuisto,20.438924,18.558974
Malminkartanonhuippu,14.116347,12.887390
Hakuninmaa,5.791129,8.380377


We can also check what our prediction data looks like

In [7]:
predict_data = get_prediction_data()

display(predict_data)

Unnamed: 0,Year,Month,Day,Precipitation amount (mm),Air temperature (degC),Maximum temperature (degC),Minimum temperature (degC),Weekend
0,7,10,14,18.256,11.5,13.89,8.61,1
1,7,10,15,8.457,7.999167,9.68,4.73,1
2,7,10,16,0.0,5.85,8.48,4.26,0
3,7,10,17,0.0,5.10875,8.25,2.92,0
4,7,10,18,0.0,2.924167,5.8,0.34,0


Now lets predict the departures and returns for the next 5 days

In [8]:
predictions = get_predictions(models_path='../data/models.pkl')

display(predictions)

Unnamed: 0,Predicted Departures (5 days),Predicted Returns (5 days)
Mamsellimyllynkatu,"[4, 4, 4, 4, 4]","[4, 4, 4, 4, 4]"
Intiankatu,"[15, 14, 16, 16, 14]","[15, 14, 16, 16, 14]"
Mäkelänkatu,"[14, 12, 15, 15, 14]","[16, 15, 17, 17, 16]"
Rautatientori / länsi,"[37, 34, 43, 43, 38]","[38, 35, 44, 43, 38]"
Kansallismuseo,"[10, 10, 14, 14, 13]","[10, 10, 14, 14, 13]"
...,...,...
Maununneva,"[4, 3, 3, 3, 2]","[3, 3, 2, 2, 2]"
Pirkkolan liikuntapuisto,"[6, 7, 9, 9, 9]","[6, 7, 9, 9, 8]"
Malminkartanonhuippu,"[4, 3, 4, 4, 3]","[4, 3, 3, 3, 2]"
Hakuninmaa,"[3, 3, 3, 3, 3]","[3, 3, 4, 4, 4]"


We can also make a tester function to test the model on a specific station with hypothetical data

In [28]:
def test_predict(station, year, month, day, precipitation, airtemp, maxtemp, mintemp, weekend):
    data = {'Year': [year], 'Month': [month], 'Day': [day], 'Precipitation amount (mm)': [precipitation], 
            'Air temperature (degC)': [airtemp], 'Maximum temperature (degC)': [maxtemp], 
            'Minimum temperature (degC)': [mintemp], 'Weekend': [weekend]}
    df = pd.DataFrame(data)
    
    with open('../data/models.pkl', 'rb') as f:
        models = pickle.load(f)

    return models[station]['Departures'][0].predict(df)[0], models[station]['Returns'][0].predict(df)[0]



dep, ret = test_predict(station='Rautatientori / länsi', year=7, month=6, day=20, precipitation=0, 
             airtemp=26, maxtemp=30, mintemp=22, weekend=1)

print("Departures: ", dep)
print("Returns: ", ret)


Departures:  121.69615377899203
Returns:  119.82703372816836
