In [1]:
import pandas as pd
import os
import json
from geopy.distance import geodesic


import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
import seaborn as sns




In [8]:
## Read in the data for one ov-fiets file (=one day)

# filename = 'ovfiets_201701/OVFiets_2017-01-04.csv.xz'
filename = 'ovfiets_november/OVFiets_2023-11-01.csv.xz'


df = pd.read_csv(filename,header = None)
# print(df)
# print(df.head)

In [10]:
# Parse the data from strings to dictionaries/floats
df["parsed"] = df[2].apply(json.loads)
# print(df['parsed'])

# Flatten into new columns
df2_flat = pd.json_normalize(df["parsed"])

# Combine back with your original DataFrame (optional)
df2_flat = pd.concat([df, df2_flat], axis=1)
df2_flat['extra.rentalBikes'] = df2_flat['extra.rentalBikes'].astype(float)
# df2_flat[0] = pd.to_datetime(df2_flat[0])
df2_flat[0] = pd.to_datetime(df2_flat[0], format="ISO8601", errors="coerce")

# df2_flat = df2_flat.set_index(0)
df2_flat = df2_flat.rename(columns={0: "time"})

# print(df2_flat.columns)
# print(df2_flat.head())
# print(df2_flat['extra.rentalBikes'])

## make columns with interesting numbers from string to float using .astype(float)





In [11]:
# Keep only unique station codes to find locations of ov-fiets rental origins

df_locations = df2_flat.drop_duplicates(subset=['stationCode'])

print(len(df_locations))

df_lons = df_locations['lng']
df_lats = df_locations['lat']
df_stationcode = df_locations['stationCode']

# plt.figure
# plt.scatter(df_lons,df_lats)

244


In [12]:
# open weather station data

weather_location = pd.read_csv("Usable_weather_station_data.csv")
weather_location.head(10)

weather_location_lats = weather_location["LAT(north)"]
weather_location_lons = weather_location["LON(east)"]

In [13]:
# create the plotly express map here
q2_fig = px.scatter_map(df_locations, lat='lat',lon="lng",hover_data=('stationCode',),zoom=5.5,center={'lat':52.1010,"lon":5.6461},
                           title='OV-fiets rental locations')

fig = px.scatter_map(weather_location, lat="LAT(north)", lon="LON(east)", hover_data=('STN','NAME','ALT(m)'),zoom=6.5, center={"lat":52.1010947603812, "lon":5.646120953892783}, title="KNMI Weather Station Locations", width=1000, height=1000)


# q2_fig.show()


In [17]:

# First scatter
fig = px.scatter_map(
    df_locations,
    lat="lat",
    lon="lng",
    hover_data=["stationCode"],
    zoom=6.5,
    center={"lat": 52.1010, "lon": 5.6461},
    title="OV-fiets rental locations and KNMI weather stations",
)

fig.data[0].name = "OV-fiets Rental Locations"
fig.data[0].showlegend = True
fig.data[0].marker.color = "blue"
fig.data[0].marker.size = 7


# Add the second scatter to the same figure
fig.add_scattermap(
    lat=weather_location["LAT(north)"],
    lon=weather_location["LON(east)"],
    mode="markers",
    marker=dict(size=7, color="red"),
    name="KNMI Weather Stations",
    hovertext=weather_location["NAME"],
    showlegend = True
)

# Set map style (required)
fig.update_layout(
    mapbox_style="open-street-map",
    width=1000,
    height=800,
    legend=dict(
    title="Legend",
    x=0.02, y=0.98,)
)

fig.write_image("locations_map.pdf")
fig.show()


In [9]:


# Example: df_locations = OV-fiets stations (lat, lng, stationCode, description)
#           weather_location = KNMI weather stations (LAT(north), LON(east), STN, NAME)

pairs = []

for _, loc_row in df_locations.iterrows():
    loc_point = (loc_row["lat"], loc_row["lng"])
    nearest_station = None
    nearest_dist = float("inf")

    # Loop through all weather stations to find nearest
    for _, w_row in weather_location.iterrows():
        w_point = (w_row["LAT(north)"], w_row["LON(east)"])
        dist = geodesic(loc_point, w_point).km
        if dist < nearest_dist:
            nearest_dist = dist
            nearest_station = w_row

    pairs.append({
        "stationCode": loc_row["stationCode"],
        "description": loc_row["description"],
        "weather_station": nearest_station["STN"],
        "weather_name": nearest_station["NAME"],
        "distance_km": round(nearest_dist, 2)
    })

# Create mapping DataFrame
mapping_df = pd.DataFrame(pairs)


In [10]:
mapping_df.head()



Unnamed: 0,stationCode,description,weather_station,weather_name,distance_km
0,WC,Wijchen,375,Volkel,17.02
1,EC,Echt,377,Ell,13.56
2,RS,Rosmalen,356,Herwijnen,22.25
3,Vg001,Vught,370,Eindhoven,23.55
4,NMGO,Nijmegen Goffert,375,Volkel,20.36


In [11]:
mapping_df[mapping_df['stationCode'] == 'DT']

Unnamed: 0,stationCode,description,weather_station,weather_name,distance_km
12,DT,"Delft, Fietsenstalling",344,Rotterdam,7.9
