In [2]:
#imports
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import datetime as dt
from fmiopendata.wfs import download_stored_query

  from pandas.core import (


Now we can fit a model for each station

In [269]:
file_path = '../data/finalized_data.csv'
df = pd.read_csv(file_path)

#We use only data from 2019 onwards for better accuracy
df = df[df['Year'] > 2]

#get a list of all unique stations in the dataset from year 2021 (latest year)
stations = df[df['Year'] == 5]['Station'].unique()

models = {}

for station in stations:
    #get data for station from df
    station_data = df[df['Station'] == station]

    if len(station_data) < 100:
        continue

    returns = station_data['Return Count']
    departures = station_data['Departure Count']

    station_data = station_data.drop(['Departure Count', 'Return Count', 'Station'], axis=1)

    #split data into train and test
    X_train, X_test, y_train, y_test = train_test_split(station_data, departures, test_size=0.2, shuffle=True, random_state=42)

    model = LinearRegression()
    model.fit(X_train, y_train)

    #find mean squared error of test data
    predictions = model.predict(X_test)
    score = mean_squared_error(y_test, predictions)

    models[station] = [model, score]

print(models)



{'Mamsellimyllynkatu': [LinearRegression(), 14.103196273244608], 'Intiankatu': [LinearRegression(), 203.3786901724605], 'Mäkelänkatu': [LinearRegression(), 368.60952336274636], 'Rautatientori / länsi': [LinearRegression(), 2038.1742104697078], 'Kansallismuseo': [LinearRegression(), 220.19214053814358], 'Koivu-Mankkaa': [LinearRegression(), 19.209645647656593], 'Baana': [LinearRegression(), 814.6413563506883], 'Puotila (M)': [LinearRegression(), 34.59069451517211], 'Hanasaari': [LinearRegression(), 98.55692920137206], 'Lauttasaarensilta': [LinearRegression(), 291.82601847959535], 'Lystimäki': [LinearRegression(), 11.256325273319803], 'Töölöntori': [LinearRegression(), 321.6696453841476], 'Cygnaeuksenkatu': [LinearRegression(), 132.95931780973396], 'Muurarinkuja': [LinearRegression(), 30.23564144075174], 'Melkonkuja': [LinearRegression(), 111.5112071171287], 'Pohjolanaukio': [LinearRegression(), 100.26264804676546], 'Itämerentori': [LinearRegression(), 5142.31165694174], 'Thalianaukio': 

Now we can get weather forecast data using the fmiopendata library to try to precict the number of bikes at each station in the coming days.

In [12]:
#start time is tomorrow 00:00
start_time = dt.datetime.now() + dt.timedelta(days=1)
start_time = start_time.replace(hour=0, minute=0, second=0, microsecond=0)
#end time in 10 days
end_time = start_time + dt.timedelta(days=5) - dt.timedelta(hours=1)

#put into correct format
start_time = start_time.isoformat(timespec="seconds") + "Z"
end_time = end_time.isoformat(timespec="seconds") + "Z"

forecast = download_stored_query("fmi::forecast::edited::weather::scandinavia::point::multipointcoverage", args=["starttime="+start_time, "endtime="+end_time, "place=Helsinki", "timestep=60"])

for key in forecast.data.keys():
    forecast.data[key] = forecast.data[key]["Helsinki"]

forecast_data = pd.DataFrame.from_dict(forecast.data, orient='index')
display(forecast_data)


Unnamed: 0,Air pressure,Geopotential height,Air temperature,Dew point,Humidity,Wind direction,Wind speed,None,U-component of wind vector,V-component of wind,Total cloud cover,Low cloud cover,Medium cloud cover,High cloud cover,Long wave radiation,Global radiation,Weather,Precipitation amount 1 hour,Hourly maximum wind gust
2023-10-10 00:00:00,"{'value': 1013.3, 'units': 'hPa'}","{'value': 11.8, 'units': 'm'}","{'value': 2.35, 'units': 'degC'}","{'value': 0.0, 'units': 'degC'}","{'value': 88.5, 'units': '%'}","{'value': 298.3, 'units': 'deg'}","{'value': 2.0, 'units': 'm/s'}","{'value': 10120000.0, 'units': ''}","{'value': 1.7, 'units': 'm/s'}","{'value': -1.0, 'units': 'm/s'}","{'value': 5.2, 'units': '%'}","{'value': 1.6, 'units': '%'}","{'value': 5.2, 'units': '%'}","{'value': 5.5, 'units': '%'}","{'value': 240.6, 'units': 'W/m2'}","{'value': 0.0, 'units': 'W/m2'}","{'value': 1.0, 'units': 'index'}","{'value': 0.0, 'units': 'mm/h'}","{'value': 2.7, 'units': 'm/s'}"
2023-10-10 01:00:00,"{'value': 1013.3, 'units': 'hPa'}","{'value': 11.8, 'units': 'm'}","{'value': 2.27, 'units': 'degC'}","{'value': -0.01, 'units': 'degC'}","{'value': 88.9, 'units': '%'}","{'value': 296.2, 'units': 'deg'}","{'value': 2.1, 'units': 'm/s'}","{'value': 10120020.0, 'units': ''}","{'value': 1.9, 'units': 'm/s'}","{'value': -1.0, 'units': 'm/s'}","{'value': 24.1, 'units': '%'}","{'value': 5.7, 'units': '%'}","{'value': 24.1, 'units': '%'}","{'value': 2.0, 'units': '%'}","{'value': 242.4, 'units': 'W/m2'}","{'value': 0.0, 'units': 'W/m2'}","{'value': 1.0, 'units': 'index'}","{'value': 0.0, 'units': 'mm/h'}","{'value': 2.9, 'units': 'm/s'}"
2023-10-10 02:00:00,"{'value': 1013.2, 'units': 'hPa'}","{'value': 11.8, 'units': 'm'}","{'value': 2.07, 'units': 'degC'}","{'value': 0.02, 'units': 'degC'}","{'value': 89.9, 'units': '%'}","{'value': 290.4, 'units': 'deg'}","{'value': 1.9, 'units': 'm/s'}","{'value': 10120030.0, 'units': ''}","{'value': 1.8, 'units': 'm/s'}","{'value': -0.7, 'units': 'm/s'}","{'value': 35.3, 'units': '%'}","{'value': 13.6, 'units': '%'}","{'value': 35.3, 'units': '%'}","{'value': 3.9, 'units': '%'}","{'value': 244.8, 'units': 'W/m2'}","{'value': 0.0, 'units': 'W/m2'}","{'value': 2.0, 'units': 'index'}","{'value': 0.0, 'units': 'mm/h'}","{'value': 2.8, 'units': 'm/s'}"
2023-10-10 03:00:00,"{'value': 1013.3, 'units': 'hPa'}","{'value': 11.8, 'units': 'm'}","{'value': 2.42, 'units': 'degC'}","{'value': 0.37, 'units': 'degC'}","{'value': 90.0, 'units': '%'}","{'value': 283.1, 'units': 'deg'}","{'value': 2.2, 'units': 'm/s'}","{'value': 10120040.0, 'units': ''}","{'value': 2.1, 'units': 'm/s'}","{'value': -0.5, 'units': 'm/s'}","{'value': 48.7, 'units': '%'}","{'value': 43.8, 'units': '%'}","{'value': 28.6, 'units': '%'}","{'value': 0.0, 'units': '%'}","{'value': 249.1, 'units': 'W/m2'}","{'value': 0.0, 'units': 'W/m2'}","{'value': 2.0, 'units': 'index'}","{'value': 0.0, 'units': 'mm/h'}","{'value': 2.7, 'units': 'm/s'}"
2023-10-10 04:00:00,"{'value': 1013.4, 'units': 'hPa'}","{'value': 11.8, 'units': 'm'}","{'value': 2.4, 'units': 'degC'}","{'value': 0.16, 'units': 'degC'}","{'value': 89.1, 'units': '%'}","{'value': 278.0, 'units': 'deg'}","{'value': 2.9, 'units': 'm/s'}","{'value': 10120050.0, 'units': ''}","{'value': 2.9, 'units': 'm/s'}","{'value': -0.4, 'units': 'm/s'}","{'value': 68.0, 'units': '%'}","{'value': 34.0, 'units': '%'}","{'value': 68.0, 'units': '%'}","{'value': 0.0, 'units': '%'}","{'value': 261.1, 'units': 'W/m2'}","{'value': 0.0, 'units': 'W/m2'}","{'value': 2.0, 'units': 'index'}","{'value': 0.0, 'units': 'mm/h'}","{'value': 3.7, 'units': 'm/s'}"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-14 19:00:00,"{'value': 988.1, 'units': 'hPa'}","{'value': 11.8, 'units': 'm'}","{'value': 10.72, 'units': 'degC'}","{'value': 3.56, 'units': 'degC'}","{'value': 70.1, 'units': '%'}","{'value': 249.7, 'units': 'deg'}","{'value': 9.1, 'units': 'm/s'}","{'value': 10120020.0, 'units': ''}","{'value': 8.6, 'units': 'm/s'}","{'value': 3.1, 'units': 'm/s'}","{'value': 26.6, 'units': '%'}","{'value': 24.5, 'units': '%'}","{'value': 14.8, 'units': '%'}","{'value': 3.4, 'units': '%'}","{'value': 272.7, 'units': 'W/m2'}","{'value': 0.0, 'units': 'W/m2'}","{'value': 1.0, 'units': 'index'}","{'value': 0.0, 'units': 'mm/h'}","{'value': 14.0, 'units': 'm/s'}"
2023-10-14 20:00:00,"{'value': 988.0, 'units': 'hPa'}","{'value': 11.8, 'units': 'm'}","{'value': 10.52, 'units': 'degC'}","{'value': 3.18, 'units': 'degC'}","{'value': 69.4, 'units': '%'}","{'value': 249.4, 'units': 'deg'}","{'value': 9.0, 'units': 'm/s'}","{'value': 10120020.0, 'units': ''}","{'value': 8.4, 'units': 'm/s'}","{'value': 3.1, 'units': 'm/s'}","{'value': 25.1, 'units': '%'}","{'value': 23.5, 'units': '%'}","{'value': 12.7, 'units': '%'}","{'value': 3.1, 'units': '%'}","{'value': 271.6, 'units': 'W/m2'}","{'value': 0.0, 'units': 'W/m2'}","{'value': 1.0, 'units': 'index'}","{'value': 0.0, 'units': 'mm/h'}","{'value': 13.7, 'units': 'm/s'}"
2023-10-14 21:00:00,"{'value': 987.9, 'units': 'hPa'}","{'value': 11.8, 'units': 'm'}","{'value': 10.33, 'units': 'degC'}","{'value': 2.79, 'units': 'degC'}","{'value': 68.7, 'units': '%'}","{'value': 249.1, 'units': 'deg'}","{'value': 8.8, 'units': 'm/s'}","{'value': 10120020.0, 'units': ''}","{'value': 8.3, 'units': 'm/s'}","{'value': 3.1, 'units': 'm/s'}","{'value': 23.6, 'units': '%'}","{'value': 22.4, 'units': '%'}","{'value': 10.5, 'units': '%'}","{'value': 2.9, 'units': '%'}","{'value': 270.5, 'units': 'W/m2'}","{'value': 0.0, 'units': 'W/m2'}","{'value': 1.0, 'units': 'index'}","{'value': 0.0, 'units': 'mm/h'}","{'value': 13.3, 'units': 'm/s'}"
2023-10-14 22:00:00,"{'value': 987.8, 'units': 'hPa'}","{'value': 11.8, 'units': 'm'}","{'value': 10.13, 'units': 'degC'}","{'value': 2.41, 'units': 'degC'}","{'value': 68.0, 'units': '%'}","{'value': 248.8, 'units': 'deg'}","{'value': 8.7, 'units': 'm/s'}","{'value': 10120020.0, 'units': ''}","{'value': 8.1, 'units': 'm/s'}","{'value': 3.0, 'units': 'm/s'}","{'value': 22.2, 'units': '%'}","{'value': 21.3, 'units': '%'}","{'value': 8.4, 'units': '%'}","{'value': 2.7, 'units': '%'}","{'value': 269.4, 'units': 'W/m2'}","{'value': 0.0, 'units': 'W/m2'}","{'value': 1.0, 'units': 'index'}","{'value': 0.0, 'units': 'mm/h'}","{'value': 13.0, 'units': 'm/s'}"
