In [34]:
#imports
import pandas as pd
from sklearn.linear_model import PoissonRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import datetime as dt
from fmiopendata.wfs import download_stored_query

Now we can fit a model for each station

In [37]:
file_path = '../data/finalized_data.csv'
df = pd.read_csv(file_path)

#We use only data from 2019 onwards for better accuracy
df = df[df['Year'] > 2]

#get a list of all unique stations in the dataset from year 2021 (latest year)
stations = df[df['Year'] == 5]['Station'].unique()

models = {}

for station in stations:
    #get data for station from df
    station_data = df[df['Station'] == station]

    if len(station_data) < 100:
        continue

    returns = station_data['Return Count']
    departures = station_data['Departure Count']

    station_data = station_data.drop(['Departure Count', 'Return Count', 'Station'], axis=1)

    #split data into train and test
    X_train, X_test, y_train, y_test = train_test_split(station_data, departures, test_size=0.2, shuffle=True, random_state=42)

    model = PoissonRegressor(max_iter=6000)
    model.fit(X_train, y_train)

    #find mean squared error of test data
    predictions = model.predict(X_test)
    score = mean_squared_error(y_test, predictions)

    models[station] = {'Departures': [model, score]}

    #repeat for returns

    X_train, X_test, y_train, y_test = train_test_split(station_data, returns, test_size=0.2, shuffle=True, random_state=42)

    model = PoissonRegressor(max_iter=6000)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test)
    score = mean_squared_error(y_test, predictions)

    models[station]['Returns'] = [model, score]

print(models)



{'Mamsellimyllynkatu': {'Departures': [PoissonRegressor(max_iter=6000), 14.518807099420298], 'Returns': [PoissonRegressor(max_iter=6000), 14.440385579623225]}, 'Intiankatu': {'Departures': [PoissonRegressor(max_iter=6000), 199.18141401424015], 'Returns': [PoissonRegressor(max_iter=6000), 202.98621907350002]}, 'Mäkelänkatu': {'Departures': [PoissonRegressor(max_iter=6000), 354.67923718951846], 'Returns': [PoissonRegressor(max_iter=6000), 156.42420214991373]}, 'Rautatientori / länsi': {'Departures': [PoissonRegressor(max_iter=6000), 2119.335079378253], 'Returns': [PoissonRegressor(max_iter=6000), 2038.6342693254164]}, 'Kansallismuseo': {'Departures': [PoissonRegressor(max_iter=6000), 234.92359647869839], 'Returns': [PoissonRegressor(max_iter=6000), 237.879955978106]}, 'Koivu-Mankkaa': {'Departures': [PoissonRegressor(max_iter=6000), 20.777280148535784], 'Returns': [PoissonRegressor(max_iter=6000), 18.230260178096366]}, 'Baana': {'Departures': [PoissonRegressor(max_iter=6000), 809.3509323

Now we can get weather forecast data using the fmiopendata library to try to precict the number of bikes at each station in the coming days.

In [38]:
#start time is tomorrow 00:00
start_time = dt.datetime.now() + dt.timedelta(days=1)
start_time = start_time.replace(hour=0, minute=0, second=0, microsecond=0)
#end time in 10 days
end_time = start_time + dt.timedelta(days=5) - dt.timedelta(hours=1)

#put into correct format
start_time = start_time.isoformat(timespec="seconds") + "Z"
end_time = end_time.isoformat(timespec="seconds") + "Z"

forecast = download_stored_query("fmi::forecast::edited::weather::scandinavia::point::multipointcoverage", args=["starttime="+start_time, "endtime="+end_time, "place=Helsinki", "timestep=60"])

for key in forecast.data.keys():
    forecast.data[key] = forecast.data[key]["Helsinki"]

forecast_data = pd.DataFrame.from_dict(forecast.data, orient='index')

#we can drop everything except Air temperature and Precipitation amount 1 hour

simple_data = forecast_data[['Air temperature', 'Precipitation amount 1 hour']].copy()

#each row for both columns contains a dictionary, so we extract the value for key 'value' from each dictionary

simple_data['Air temperature'] = simple_data['Air temperature'].apply(lambda x: x['value'])
simple_data['Precipitation amount 1 hour'] = simple_data['Precipitation amount 1 hour'].apply(lambda x: x['value'])

#Now we can make a dataframe with the same columns as our training data 
#and use the models to predict the number of departures and returns for each day

predict_data = simple_data.reset_index()

predict_data['Year'] = predict_data['index'].apply(lambda x: x.year - 2016)
predict_data['Month'] = predict_data['index'].apply(lambda x: x.month)
predict_data['Day'] = predict_data['index'].apply(lambda x: x.day)
predict_data['Weekend'] = predict_data['index'].apply(lambda x: x.weekday() > 4)
predict_data = predict_data.drop('index', axis=1)

min_temp = predict_data.groupby(['Year', 'Month', 'Day', 'Weekend']).agg({'Air temperature': ['min']})
max_temp = predict_data.groupby(['Year', 'Month', 'Day', 'Weekend']).agg({'Air temperature': ['max']})

predict_data = predict_data.groupby(['Year', 'Month', 'Day', 'Weekend']).agg({'Precipitation amount 1 hour': ['sum'], 'Air temperature': ['mean']})
predict_data = predict_data.merge(max_temp, on=['Year', 'Month', 'Day', 'Weekend'])
predict_data = predict_data.merge(min_temp, on=['Year', 'Month', 'Day', 'Weekend'])
predict_data.columns = ['Precipitation amount (mm)', 'Air temperature (degC)', 'Maximum temperature (degC)', 'Minimum temperature (degC)']
predict_data = predict_data.reset_index()
predict_data['Weekend'] = predict_data['Weekend'].apply(lambda x: 1 if x else 0)
predict_data = predict_data[['Year', 'Month', 'Day', 'Precipitation amount (mm)', 'Air temperature (degC)', 'Maximum temperature (degC)', 'Minimum temperature (degC)', 'Weekend']]

display(predict_data)


Unnamed: 0,Year,Month,Day,Precipitation amount (mm),Air temperature (degC),Maximum temperature (degC),Minimum temperature (degC),Weekend
0,7,10,14,18.256,11.499583,13.89,8.61,1
1,7,10,15,8.457,7.999167,9.68,4.73,1
2,7,10,16,0.0,5.870417,8.65,4.26,0
3,7,10,17,0.0,5.10875,8.25,2.92,0
4,7,10,18,0.0,2.924167,5.8,0.34,0


Now lets predict the departures and returns for the next 5 days at some station

In [39]:
def predict(data, models):

    predictions = {}

    for station in models.keys():
        #get model for departures
        model = models[station]['Departures'][0]
        #get model for returns
        model2 = models[station]['Returns'][0]

        #get predictions for departures
        departures = model.predict(data)
        #get predictions for returns
        returns = model2.predict(data)

        #add predictions to dictionary
        predictions[station] = {'Departures': departures, 'Returns': returns}
    
    return predictions

predictions = predict(predict_data, models)

#make a dataframe from predictions

predictions_df = pd.DataFrame.from_dict(predictions, orient='index')

display(predictions_df)

Unnamed: 0,Departures,Returns
Mamsellimyllynkatu,"[3.968257282021096, 3.962079617260475, 4.13485...","[3.6782716539763847, 3.7307993817491725, 4.002..."
Intiankatu,"[14.706157416024654, 14.151319573400695, 16.38...","[15.278205827170513, 14.214030160903834, 15.98..."
Mäkelänkatu,"[13.7211746996158, 12.118061318001716, 15.4451...","[16.159856410594802, 14.869260367604586, 17.41..."
Rautatientori / länsi,"[37.068078324156566, 34.319841477206246, 43.26...","[37.75415495137105, 34.54468584069306, 43.9247..."
Kansallismuseo,"[9.903239210821123, 9.969848993849684, 14.1533...","[10.13602843058265, 10.215310998520419, 14.422..."
...,...,...
Maununneva,"[4.509603004699244, 3.215324579961198, 2.70625...","[3.152402585426449, 2.5892049030397892, 2.4187..."
Pirkkolan liikuntapuisto,"[6.501188269154696, 7.263550424043436, 9.25418...","[6.432800288337848, 7.0697597555012255, 9.0640..."
Malminkartanonhuippu,"[3.957832108472943, 3.5387693678897123, 3.6412...","[3.772763111549208, 2.921129978295191, 2.72756..."
Hakuninmaa,"[2.6327709767477097, 2.7172509547019574, 2.905...","[3.4020948262349244, 3.3326852096172512, 3.698..."
