In [2]:
#imports
import pandas as pd
import pickle
from model_fitting import fit_models
from prediction_data import get_prediction_data
from predictor import get_predictions, get_predictions_as_words
import warnings
warnings.filterwarnings('ignore')

Let's check the MSE for the models for each station

In [4]:
#fit the models

fit_models(data_path='../data/finalized_data.csv', save_path='../data/models.pkl')

#load data

with open('../data/models.pkl', 'rb') as f:
    models = pickle.load(f)

models = pd.DataFrame.from_dict(models, orient='index')

models['Departures'] = models['Departures'].apply(lambda x: x[1])
models['Returns'] = models['Returns'].apply(lambda x: x[1])

models.rename(columns={'Departures': 'Departure MSE', 'Returns': 'Return MSE'}, inplace=True)

display(models)

Unnamed: 0,Departure MSE,Return MSE
Mamsellimyllynkatu,13.293781,13.122145
Intiankatu,176.415109,175.635531
Mäkelänkatu,351.315768,150.347409
Rautatientori / länsi,1854.060965,1789.200702
Kansallismuseo,225.235809,227.608653
...,...,...
Maununneva,5.883732,4.935543
Pirkkolan liikuntapuisto,19.964397,18.431797
Malminkartanonhuippu,12.762817,11.430180
Hakuninmaa,5.709512,8.301752


We can also check what our prediction data looks like

In [5]:
predict_data = get_prediction_data()

display(predict_data)

Unnamed: 0,Year,Month,Day,Precipitation amount (mm),Air temperature (degC),Maximum temperature (degC),Minimum temperature (degC),Weekend
0,7,10,28,0.264,-0.21375,1.21,-1.42,1
1,7,10,29,0.11,-0.16875,1.99,-1.97,1
2,7,10,30,2.635,1.258333,2.38,-0.81,0
3,7,10,31,0.0,-0.104583,2.11,-1.92,0
4,7,11,1,0.0,-1.129167,0.79,-2.54,0


Now lets predict the departures and returns for the next 5 days

In [6]:
predictions = get_predictions(models_path='../data/models.pkl', avgs_path='../data/last_3_year_averages.csv')

display(predictions)

Unnamed: 0,Predicted Departures (5 days),Predicted Returns (5 days)
Mamsellimyllynkatu,"[5, 5, 5, 5, 5]","[4, 4, 4, 4, 4]"
Intiankatu,"[10, 10, 11, 11, 10]","[9, 10, 11, 11, 10]"
Mäkelänkatu,"[8, 8, 10, 11, 11]","[10, 11, 12, 12, 13]"
Rautatientori / länsi,"[21, 22, 27, 27, 26]","[21, 22, 28, 28, 27]"
Kansallismuseo,"[8, 8, 10, 10, 10]","[8, 8, 10, 10, 10]"
...,...,...
Maununneva,"[1, 1, 1, 1, 1]","[1, 1, 1, 1, 1]"
Pirkkolan liikuntapuisto,"[6, 6, 7, 7, 8]","[6, 6, 7, 7, 8]"
Malminkartanonhuippu,"[1, 2, 1, 1, 1]","[1, 1, 1, 1, 1]"
Hakuninmaa,"[2, 2, 2, 2, 3]","[3, 4, 3, 4, 3]"


We can also get the busyness index

In [7]:
predictions = get_predictions_as_words(models_path='../data/models.pkl', avgs_path='../data/last_3_year_averages.csv', save_path='../data/predictions.csv')

display(predictions)

Unnamed: 0,Station,Predicted Busyness
0,Mamsellimyllynkatu,"[Quite Busy, Quite Busy, Quite Busy, Quite Bus..."
1,Intiankatu,"[Not Busy, Not Busy, Moderately Busy, Moderate..."
2,Mäkelänkatu,"[Moderately Busy, Moderately Busy, Moderately ..."
3,Rautatientori / länsi,"[Not Busy, Not Busy, Not Busy, Not Busy, Not B..."
4,Kansallismuseo,"[Not Busy, Not Busy, Not Busy, Not Busy, Not B..."
...,...,...
452,Maununneva,"[Moderately Busy, Moderately Busy, Moderately ..."
453,Pirkkolan liikuntapuisto,"[Moderately Busy, Moderately Busy, Quite Busy,..."
454,Malminkartanonhuippu,"[Moderately Busy, Not Busy, Moderately Busy, M..."
455,Hakuninmaa,"[Quite Busy, Very Busy, Quite Busy, Very Busy,..."


We can also make a tester function to test the model on a specific station with hypothetical data

In [6]:
def test_predict(station, year, month, day, precipitation, airtemp, maxtemp, mintemp, weekend, last3yearavgdep, last3yearavgret):
    data = {'Year': [year], 'Month': [month], 'Day': [day], 'Precipitation amount (mm)': [precipitation], 
            'Air temperature (degC)': [airtemp], 'Maximum temperature (degC)': [maxtemp], 
            'Minimum temperature (degC)': [mintemp], 'Weekend': [weekend], 'Last 3 Year Avg Departures': last3yearavgdep, 
            'Last 3 Year Avg Returns': last3yearavgret}
    df = pd.DataFrame(data)
    
    with open('../data/models.pkl', 'rb') as f:
        models = pickle.load(f)

    return models[station]['Departures'][0].predict(df)[0], models[station]['Returns'][0].predict(df)[0]



dep, ret = test_predict(station='Rautatientori / länsi', year=7, month=7, day=3, precipitation=0, 
             airtemp=26, maxtemp=80, mintemp=23, weekend=1, last3yearavgdep=219, last3yearavgret=222)

print("Departures: ", dep)
print("Returns: ", ret)

busyness = ((dep / 219 + ret / 222) / 2) + ((dep - ret) / ((219 + 222) / 2))
print("Busyness: ", busyness)


Departures:  396.45313173909983
Returns:  312.0923452511852
Busyness:  1.9906434247455287
