In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import dotenv
import re
import pandas as pd
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, root_mean_squared_error

# Dataframe
## Dataframe Setup

In [33]:
df_hubs = pd.read_csv('Hub List all time + pick up and drop offs.csv')
df_pickup = pd.read_csv('2024 - Hub ID & Drop Off - Pick Up numbers.csv',sep=";")

In [34]:
df_pickup

Unnamed: 0,hub_id,Days in full_date,Average bike_count,# of unique dropoff_hub_id,# of unique pickup_hub_id
0,2180.0,01/01/2024,6.282412,,
1,2180.0,02/01/2024,6.080100,,
2,2180.0,04/01/2024,6.078173,,
3,2180.0,05/01/2024,5.936238,,
4,2180.0,07/01/2024,6.443539,,
...,...,...,...,...,...
6925,26889.0,14/09/2024,6.150918,21.0,20.0
6926,26889.0,15/09/2024,6.229018,12.0,12.0
6927,26889.0,16/09/2024,6.177963,11.0,14.0
6928,26889.0,17/09/2024,6.418395,16.0,18.0


In [35]:
df_pickup.rename(columns = {'hub_id':'id'}, inplace=True)
df_combined = pd.merge(df_pickup, df_hubs, on='id')
df_combined["pick_drop_relation"] = round(df_combined["dropoffs"] /df_combined["pickups"],4)
df_combined["pick_drop_deficit"] = df_combined["# of unique dropoff_hub_id"] - df_combined["# of unique pickup_hub_id"]
df_combined['Days in full_date'] = pd.to_datetime(df_combined['Days in full_date'],format='%d/%m/%Y')

In [36]:
print(df_hubs.shape)
print(df_pickup.shape)
print(df_combined.shape)

(233, 7)
(6930, 5)
(6707, 13)


In [37]:
#find discrepency where bikes dont regulate themselves
df_combined.sort_values("pick_drop_relation",ascending=True)
df_combined.groupby(by="name")[['pickups', "dropoffs", "pick_drop_relation", "pick_drop_deficit"]].mean().sort_values("pick_drop_relation", ascending=False)

Unnamed: 0_level_0,pickups,dropoffs,pick_drop_relation,pick_drop_deficit
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Wik, Kanalfähre - Busstation",2929.0,3033.0,1.0355,-0.25
MEKUN Olympiahochhaus,15389.0,15721.0,1.0216,-0.221374
Wellingdorf Stadtteilzentrum,6035.0,6136.0,1.0167,-0.166667
Alter Markt,15439.0,15676.0,1.0154,-0.307692
Anleger Dietrichsdorf,4627.0,4683.0,1.0121,-0.155378
thyssenkrupp Marine Systems,11136.0,11255.0,1.0107,-0.129412
Hörnbad,9990.0,10091.0,1.0101,-0.231076
Tilsiter Platz,5658.0,5697.0,1.0069,-0.205426
Vinetaplatz,8225.0,8281.0,1.0068,-0.131687
Alte Mu/Brunswiker Str.,7544.0,7593.0,1.0065,-0.086735


## Dataframe IDA

In [38]:
print(f'The data lists {len(df_combined.id.unique())} unique ids and {len(df_combined.name.unique())} unique names.')
#time series of specific place
sandkrug_df = df_combined[df_combined['name'] == 'Sandkrug'].sort_values("Days in full_date", ascending=False)

The data lists 36 unique ids and 36 unique names.


In [39]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=sandkrug_df['Days in full_date'], y=sandkrug_df['pick_drop_deficit'], mode='lines', name='Value'))
fig.update_layout(title='Time Series Line Plot', xaxis_title='Date', yaxis_title='Value')
fig.show()

In [40]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=sandkrug_df['Days in full_date'], y=sandkrug_df['# of unique dropoff_hub_id'], mode='lines', name='Dropoff'))
fig.add_trace(go.Scatter(x=sandkrug_df['Days in full_date'], y=sandkrug_df['# of unique pickup_hub_id'], mode='lines', name='Pickup'))
fig.update_layout(title='Time Series Line Plot', xaxis_title='Date', yaxis_title='Value')
fig.show()

In [41]:
Wik_bus_df = df_combined[df_combined['name'] == ' Wik, Kanalfähre - Busstation'].sort_values("Days in full_date", ascending=False)
fig = go.Figure()
fig.add_trace(go.Scatter(x=Wik_bus_df['Days in full_date'], y=Wik_bus_df['# of unique dropoff_hub_id'], mode='lines', name='Dropoff'))
fig.add_trace(go.Scatter(x=Wik_bus_df['Days in full_date'], y=Wik_bus_df['# of unique pickup_hub_id'], mode='lines', name='Pickup'))
fig.update_layout(title='Time Series Line Plot', xaxis_title='Date', yaxis_title='Value')
fig.show()

# API
## API ACCESS

In [42]:
# .env
config = dotenv.dotenv_values("paul_sprotte.env")

PASSWORD = config["PASSWORD"]

CLIENT_SECRET = config["CLIENT_SECRET"]

In [43]:
token_url = 'https://accounts.kielregion.addix.io/realms/infoportal/protocol/openid-connect/token'
headers = {
    'Content-Type': 'application/x-www-form-urlencoded'
}

data = {
    'grant_type': 'password',
    'username': 'business.brodmapa@outlook.com', 
    'password': PASSWORD,
    'client_id': 'quantumleap',
    'client_secret': 'fP81XZ5OTt5iRJ7qhyyTCv4eQtpGqc5i'
}

response = requests.post(token_url, headers=headers, data=data)
if response.status_code == 200:
    token_data = response.json()
    access_token = token_data['access_token']
    ACCESS_TOKEN = token_data['access_token']
    print(f"Bearer Token successful requested")
    if access_token:
        dotenv.set_key('paul_sprotte.env', 'ACCESS_TOKEN', access_token)
        print(f"Access Token erfolgreich in die .env-Datei geschrieben.")
        print(access_token)
else:
    print(f"Error: {response.status_code}, {response.text}")

Bearer Token successful requested
Access Token erfolgreich in die .env-Datei geschrieben.
eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJPbWpOY21GRmgwTjV6Wlg1eHg5Zk9mbE0xNFljTmY3WFlieG02OTJtczhBIn0.eyJleHAiOjE3MjkwNjg2NTEsImlhdCI6MTcyODkyNDY1MSwianRpIjoiZmFjYjgzYWItYWE0OC00OWZmLWI3NGMtNGVjMmY0ZjNlNmMyIiwiaXNzIjoiaHR0cHM6Ly9hY2NvdW50cy5raWVscmVnaW9uLmFkZGl4LmlvL3JlYWxtcy9pbmZvcG9ydGFsIiwiYXVkIjoiYWNjb3VudCIsInN1YiI6IjI0NzQ4NTgxLTU4MGEtNDljZS1iZmY2LTY0YmMwYmExYjY1ZiIsInR5cCI6IkJlYXJlciIsImF6cCI6InF1YW50dW1sZWFwIiwic2Vzc2lvbl9zdGF0ZSI6IjQ5MDI4YmQzLTU0YWEtNGU5NC1iODFhLWViODE5OWFjNGEzNiIsImFjciI6IjEiLCJyZWFsbV9hY2Nlc3MiOnsicm9sZXMiOlsiZGVmYXVsdC1yb2xlcy1pbmZvcG9ydGFsIiwib2ZmbGluZV9hY2Nlc3MiLCJ1bWFfYXV0aG9yaXphdGlvbiJdfSwicmVzb3VyY2VfYWNjZXNzIjp7InF1YW50dW1sZWFwIjp7InJvbGVzIjpbImRhdGEtY29uc3VtZXItcWwiXX0sImFjY291bnQiOnsicm9sZXMiOlsibWFuYWdlLWFjY291bnQiLCJtYW5hZ2UtYWNjb3VudC1saW5rcyIsInZpZXctcHJvZmlsZSJdfX0sInNjb3BlIjoicm9sZXMgcHJvZmlsZSBlbWFpbCIsInNpZCI6IjQ5MDI4YmQzLTU0YWEtNGU5NC1iODFhLW

In [44]:
url = "https://apis.kielregion.addix.io/ql/v2/entities/urn:ngsi-ld:BikeHireDockingStation:KielRegion:26889"
param = {
    'type': 'BikeHireDockingStation',
    'fromDate': '2024-09-13T00:00:00',
    'toDate': '2024-09-13T23:59:59',
    'attrs': 'name,totalSlotNumber,availableBikeNumber,freeSlotNumber'}

header = {
    'NGSILD-Tenant': 'infoportal',
    'Authorization': f'Bearer {ACCESS_TOKEN}'
}
response = requests.get(url, headers=header, params=param)

if response.status_code == 200:
    response_data = response.json()
    print('got a response')
else:
    print(f"Error: {response.status_code}, {response.text}")

got a response


In [45]:
ACCESS_TOKEN

'eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJPbWpOY21GRmgwTjV6Wlg1eHg5Zk9mbE0xNFljTmY3WFlieG02OTJtczhBIn0.eyJleHAiOjE3MjkwNjg2NTEsImlhdCI6MTcyODkyNDY1MSwianRpIjoiZmFjYjgzYWItYWE0OC00OWZmLWI3NGMtNGVjMmY0ZjNlNmMyIiwiaXNzIjoiaHR0cHM6Ly9hY2NvdW50cy5raWVscmVnaW9uLmFkZGl4LmlvL3JlYWxtcy9pbmZvcG9ydGFsIiwiYXVkIjoiYWNjb3VudCIsInN1YiI6IjI0NzQ4NTgxLTU4MGEtNDljZS1iZmY2LTY0YmMwYmExYjY1ZiIsInR5cCI6IkJlYXJlciIsImF6cCI6InF1YW50dW1sZWFwIiwic2Vzc2lvbl9zdGF0ZSI6IjQ5MDI4YmQzLTU0YWEtNGU5NC1iODFhLWViODE5OWFjNGEzNiIsImFjciI6IjEiLCJyZWFsbV9hY2Nlc3MiOnsicm9sZXMiOlsiZGVmYXVsdC1yb2xlcy1pbmZvcG9ydGFsIiwib2ZmbGluZV9hY2Nlc3MiLCJ1bWFfYXV0aG9yaXphdGlvbiJdfSwicmVzb3VyY2VfYWNjZXNzIjp7InF1YW50dW1sZWFwIjp7InJvbGVzIjpbImRhdGEtY29uc3VtZXItcWwiXX0sImFjY291bnQiOnsicm9sZXMiOlsibWFuYWdlLWFjY291bnQiLCJtYW5hZ2UtYWNjb3VudC1saW5rcyIsInZpZXctcHJvZmlsZSJdfX0sInNjb3BlIjoicm9sZXMgcHJvZmlsZSBlbWFpbCIsInNpZCI6IjQ5MDI4YmQzLTU0YWEtNGU5NC1iODFhLWViODE5OWFjNGEzNiIsImVtYWlsX3ZlcmlmaWVkIjp0cnVlLCJuYW1lIjoiUGF1bCBCcm9kbWFubiIsInByZWZlcnJ

In [46]:
print(response_data)

{'attributes': [{'attrName': 'availableBikeNumber', 'values': [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3

In [47]:
df_flx = pd.read_csv('FelixData_final_main_kiel.csv')

In [48]:
print(f'''The dataframe has a {df_flx.shape[0]} rows on {df_flx.shape[1]} columns. The colums
have the following names: \n{'\n'.join(df_flx.columns.tolist())}''')

The dataframe has a 86501 rows on 9 columns. The colums
have the following names: 
index
entityId
entityType
name
availableBikeNumber
freeSlotNumber
totalSlotNumber
pickups
dropoffs


## Combine dataframes

## Visualisation

In [49]:
#visualise these
# analyse these in terms of: 
# - how long does a station stay empty/overcrowded
# - how intense is the overcpopulation
# - which stations are regular for overpopulation
# - relation between weather and bike requests
df_flx[df_flx['availableBikeNumber'] == 0]
df_flx[df_flx['availableBikeNumber'] > df_flx['totalSlotNumber']]

Unnamed: 0,index,entityId,entityType,name,availableBikeNumber,freeSlotNumber,totalSlotNumber,pickups,dropoffs
4570,2024-09-20T11:00:00.000+00:00,24368,BikeHireDockingStation,Umsteiger,20.083333,0.283333,20.0,7633,7629
4571,2024-09-20T12:00:00.000+00:00,24368,BikeHireDockingStation,Umsteiger,20.116667,0.583333,20.0,7633,7629
4572,2024-09-20T13:00:00.000+00:00,24368,BikeHireDockingStation,Umsteiger,21.050000,0.266667,20.0,7633,7629
4574,2024-09-20T15:00:00.000+00:00,24368,BikeHireDockingStation,Umsteiger,21.650000,0.650000,20.0,7633,7629
4575,2024-09-20T16:00:00.000+00:00,24368,BikeHireDockingStation,Umsteiger,21.203390,1.169492,20.0,7633,7629
...,...,...,...,...,...,...,...,...,...
83936,2024-09-24T16:00:00.000+00:00,26224,BikeHireDockingStation,"Wik, Kanalfähre - Busstation",20.700000,0.000000,20.0,2929,3033
83937,2024-09-24T17:00:00.000+00:00,26224,BikeHireDockingStation,"Wik, Kanalfähre - Busstation",21.000000,0.000000,20.0,2929,3033
83938,2024-09-24T18:00:00.000+00:00,26224,BikeHireDockingStation,"Wik, Kanalfähre - Busstation",21.000000,0.000000,20.0,2929,3033
84082,2024-09-30T18:00:00.000+00:00,26224,BikeHireDockingStation,"Wik, Kanalfähre - Busstation",20.500000,0.300000,20.0,2929,3033


In [50]:
df_flx.name.unique()

array(['Anleger Dietrichsdorf', 'Umsteiger', 'Alte Mu/Brunswiker Str.',
       'Alter Markt', 'Anleger Reventlou', 'Christian-Albrechts-Platz',
       'Hörnbad', 'Kirchhofallee', 'RBZ Wirtschaft', 'Vinetaplatz',
       'Westring/Eckernförder Str.', 'CAU Sportstätten', 'Dreiecksplatz',
       'Hansastr./Gutenbergstr.', 'Blücherplatz', 'Andreas-Gayk-Straße',
       'Tilsiter Platz', 'Wellingdorf Stadtteilzentrum', 'Hauptbahnhof',
       'Exerzierplatz', 'thyssenkrupp Marine Systems',
       'MEKUN Olympiahochhaus', 'Bernhard-Minetti-Platz',
       'KVG Betriebshof Diedrichstraße', 'Sophienhof', 'ADAC-Station',
       'Förde Sparkasse Lorentzendamm', 'Studiale', 'UKSH', 'Sandkrug',
       'Zur Fähre', 'Seefischmarkt', 'Krausstraße',
       'Kieler Innovations- und Technologiezentrum',
       ' Wik, Kanalfähre - Busstation', 'Wilhelmplatz'], dtype=object)

## ML Part

In [51]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error


In [52]:
df_ags = df_flx[df_flx['name'] == 'Andreas-Gayk-Straße']
df_ags_mf = df_ags.copy()

df_ags_mf['index'] = pd.to_datetime(df_ags_mf['index']) #more features

# Features erzeugen
df_ags_mf['Year'] = df_ags_mf['index'].dt.year
df_ags_mf['Month'] = np.sin(df_ags_mf['index'].dt.month)
df_ags_mf['Day'] = df_ags_mf['index'].dt.day
df_ags_mf['Hour'] = df_ags_mf['index'].dt.hour  # Stunde des Tages im 24-Stunden-Format
df_ags_mf['Season'] = df_ags_mf['index'].dt.month % 12 // 3 + 1  # Jahreszeit (1=Winter, 2=Frühling, 3=Sommer, 4=Herbst)
df_ags_mf['Weekend'] = df_ags_mf['index'].dt.weekday >= 5  # Boolescher Wert für Wochenende (Samstag=5, Sonntag=6)
df_ags_mf['DayOfWeek'] = np.cos(df_ags_mf['index'].dt.dayofweek)
df_ags_mf['DayOfYear'] = df_ags_mf['index'].dt.dayofyear

df_ags_mf.head(3)

Unnamed: 0,index,entityId,entityType,name,availableBikeNumber,freeSlotNumber,totalSlotNumber,pickups,dropoffs,Year,Month,Day,Hour,Season,Weekend,DayOfWeek,DayOfYear
35828,2024-06-17 11:00:00+00:00,24392,BikeHireDockingStation,Andreas-Gayk-Straße,2.2,27.4,30.0,6413,6396,2024,-0.279415,17,11,3,False,1.0,169
35829,2024-06-17 12:00:00+00:00,24392,BikeHireDockingStation,Andreas-Gayk-Straße,1.0,28.0,30.0,6413,6396,2024,-0.279415,17,12,3,False,1.0,169
35830,2024-06-17 13:00:00+00:00,24392,BikeHireDockingStation,Andreas-Gayk-Straße,1.416667,27.8,30.0,6413,6396,2024,-0.279415,17,13,3,False,1.0,169


In [53]:
df_ags_mf['index'].dtype

datetime64[ns, UTC]

In [54]:
print(df_ags_mf.shape)
df_ags_mf.dropna()
print(df_ags_mf.shape)
df_ags_mf.set_index('index', inplace=True)
df_ags_mf.dropna(inplace=True)
df_ags_mf.index.tz_localize(None)
df_ags_mf = df_ags_mf.asfreq('h')

(2412, 17)
(2412, 17)


In [55]:
def plot_that(data1_, forcast_):
    from plotly.subplots import make_subplots
    fig = make_subplots()
    orng = '#DF7D39'
    lbl = '#66BECF'
    color = '#092947'

    fig.add_trace(go.Scatter(x=data1_.index, y=data1_ ,name='totalSlotNumber',
        marker=dict(color=lbl),),
                row=1, col=1)
    fig.add_trace(go.Scatter(x=forcast_.index, y=forcast_ ,name='Forecast',
        marker=dict(color=orng),),
                row=1, col=1)
    fig.update_traces(textposition="bottom right")
    fig.update_layout(
        width = 1000,
        height = 500,
        title="Besetzungsstsatus der Radstation",
        yaxis_title="Verfügbare Räder",
        xaxis_title="Datum",
        paper_bgcolor='white',  # Background of the whole plot
        plot_bgcolor='white',
        xaxis=dict(
            gridwidth=1,                # Width of vertical grid lines
            showline=True,
            linewidth=2, linecolor=color
        ),
        yaxis=dict(
            #range=[0, 300],  # Set the y-axis limits from 0 to 10
            showgrid=True,             # Show horizontal grid lines
            gridcolor='lightgrey',         # Color of horizontal grid lines
            gridwidth=1,                # Width of horizontal grid lines
            showline=True,
            linewidth=2, linecolor=color
        )
    )
    fig.show()

In [56]:
train = df_ags_mf[(df_ags_mf.index <= '2024-08-25 00:00:00+00:00') & (df_ags_mf.index > '2024-08-08 00:00:00+00:00')]['availableBikeNumber']
test = df_ags_mf[(df_ags_mf.index >= '2024-08-25 00:00:00+00:00') & (df_ags_mf.index < '2024-08-26 00:00:00+00:00')]['availableBikeNumber']
try:
    model = ExponentialSmoothing(train, trend='add', seasonal='add', initialization_method = 'estimated',
                                 seasonal_periods=24)
    fit = model.fit(smoothing_seasonal=0.2,)
    forecast = fit.forecast(
        steps=24)
except:
    print(fit.mle_retvals)
print(f'Mean Squared Error: {mean_squared_error(test, forecast)}')
print(f'Mean Absolute percentage Error: {mean_absolute_percentage_error(test, forecast)}')
print(f'Root Mean Squared Error: {root_mean_squared_error(test, forecast)}')
plot_that(test,forecast)

Mean Squared Error: 0.4253576957725433
Mean Absolute percentage Error: 1678091562424614.8
Root Mean Squared Error: 0.6521945229550332


In [57]:
train = df_ags_mf[(df_ags_mf.index <= '2024-08-21 00:00:00+00:00') & (df_ags_mf.index > '2024-08-9 00:00:00+00:00')]['availableBikeNumber']
test = df_ags_mf[(df_ags_mf.index >= '2024-08-21 00:00:00+00:00') & (df_ags_mf.index < '2024-08-23 00:00:00+00:00')]['availableBikeNumber']
try:
    model = ExponentialSmoothing(train, trend='add', seasonal='add', initialization_method = 'estimated',
                                 seasonal_periods=24)
    fit = model.fit(smoothing_seasonal=0.2,)
    forecast = fit.forecast(
        steps=48)
except:
    print(fit.mle_retvals)
print(f'Mean Squared Error: {mean_squared_error(test, forecast)}')
print(f'Mean Absolute percentage Error: {mean_absolute_percentage_error(test, forecast)}')
print(f'Root Mean Squared Error: {root_mean_squared_error(test, forecast)}')
plot_that(test,forecast)

Mean Squared Error: 0.4480737065080438
Mean Absolute percentage Error: 1216097326072340.8
Root Mean Squared Error: 0.6693830790422206


---

In [58]:
df_cutout = df_ags_mf[df_ags_mf.index >= '2024-09-27 00:00:00+00:00']
try:
    model = ExponentialSmoothing(df_cutout['availableBikeNumber'], 
                                trend='add', 
                                seasonal='add', 
                                seasonal_periods=24)
    fit = model.fit(smoothing_seasonal=0.2,)#smoothing_level=1, smoothing_slope=1, smoothing_seasonal=1)
    forecast = fit.forecast(steps=40)
except:
    print(fit.mle_retvals)

In [59]:
plot_that(df_cutout['availableBikeNumber'],forecast)