In [1]:
!aws s3 sync s3://rowerki ./data

In [2]:
%config Completer.use_jedi = False

In [3]:
import os
import shutil
import requests 
import geopandas as gpd
import contextily as cx
import pandas as pd
import imageio
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta

In [4]:
json = requests.get("https://nextbike.net/maps/nextbike-official.json?city=372,210,475").json()
data = []
for city in json["countries"][0]['cities']:
    for place in city['places']:
        data.append({col: place[col] for col in ['uid', 'lat', 'lng', 'name','bike_racks']})
stations = pd.DataFrame(data)
gpd_stations = gpd.GeoDataFrame(stations, geometry=gpd.points_from_xy(stations.lng, stations.lat), crs='EPSG:4326')
stations.sort_values("bike_racks")

Unnamed: 0,uid,lat,lng,name,bike_racks,geometry
329,2681635,52.234390,20.999159,Spektrum Tower,11,POINT (20.99916 52.23439)
342,18751910,52.183037,20.991542,Signum Work Station,12,POINT (20.99154 52.18304)
348,55316960,52.196220,20.938810,Bolero,15,POINT (20.93881 52.19622)
76,2585400,52.231450,20.988690,Prosta - Wronia,15,POINT (20.98869 52.23145)
267,2586001,52.247314,21.042731,Metro Stadion Narodowy,15,POINT (21.04273 52.24731)
...,...,...,...,...,...,...
201,2585885,52.157289,21.044008,Rosoła - Ciszewskiego - SGGW,30,POINT (21.04401 52.15729)
207,2585901,52.142720,21.038362,Pileckiego - Alternatywy,30,POINT (21.03836 52.14272)
60,2585369,52.244947,21.001294,Andersa - Długa,30,POINT (21.00129 52.24495)
210,2585905,52.140391,21.056819,Metro Natolin,30,POINT (21.05682 52.14039)


In [5]:
data_path = '.\data'
bikes = pd.concat([pd.read_csv(os.path.join(data_path, p)) for p in os.listdir(data_path)]).reset_index()
bikes['date'] = pd.to_datetime(bikes['date'])
bikes['day'] = bikes['date'].dt.floor('d')
bikes

Unnamed: 0,index,date,station_id,bike_number,bike_type,state,day
0,0,2022-04-24 17:18:00,2585259,24927,174,ok,2022-04-24
1,1,2022-04-24 17:18:00,2585259,24801,174,ok,2022-04-24
2,2,2022-04-24 17:18:00,2585259,24655,174,ok,2022-04-24
3,3,2022-04-24 17:18:00,2585259,24648,174,ok,2022-04-24
4,4,2022-04-24 17:18:00,2585259,27559,174,ok,2022-04-24
...,...,...,...,...,...,...,...
3018770,3770,2022-05-08 18:24:00,48372123,26843,174,ok,2022-05-08
3018771,3771,2022-05-08 18:24:00,48372123,96565,174,ok,2022-05-08
3018772,3772,2022-05-08 18:24:00,52488396,91741,4,ok,2022-05-08
3018773,3773,2022-05-08 18:24:00,55316960,96033,174,ok,2022-05-08


In [6]:
bikes['prev_station'] = bikes\
.sort_values(by=['date'], kind='stable')\
.groupby(['bike_number'])\
.shift(1)['station_id'].astype('Int64')
bikes['next_station'] = bikes\
.sort_values(by=['date'], kind='stable')\
.groupby(['bike_number'])\
.shift(-1)['station_id'].astype('Int64')
# You can't do next != current because it does not detect loops
bikes['just_returned'] = bikes\
.sort_values(by=['date'], kind='stable')\
.groupby(['bike_number', 'station_id'])\
.shift(1)['date'].isna() | (bikes['prev_station'] != bikes['station_id'])
bikes['just_rented'] = bikes\
.sort_values(by=['date'], kind='stable')\
.groupby(['bike_number', 'station_id'])\
.shift(-1)['date'].isna() | (bikes['next_station'] != bikes['station_id'])
bikes[bikes['bike_number'] == 96033].sort_values(by=['date'], kind='stable')

Unnamed: 0,index,date,station_id,bike_number,bike_type,state,day,prev_station,next_station,just_returned,just_rented
8782,1198,2022-04-24 15:44:00,2585470,96033,174,ok,2022-04-24,,2585470,True,False
12608,1201,2022-04-24 15:44:00,2585470,96033,174,ok,2022-04-24,2585470,2585470,False,False
16435,1201,2022-04-24 15:51:00,2585470,96033,174,ok,2022-04-24,2585470,2585470,False,False
20295,1241,2022-04-24 16:21:00,2585470,96033,174,ok,2022-04-24,2585470,2585470,False,False
24302,1335,2022-04-24 16:51:00,2585470,96033,174,ok,2022-04-24,2585470,2585470,False,False
...,...,...,...,...,...,...,...,...,...,...,...
3004184,3311,2022-05-08 16:51:00,55316960,96033,174,ok,2022-05-08,55316960,55316960,False,False
3007654,3468,2022-05-08 17:21:00,55316960,96033,174,ok,2022-05-08,55316960,55316960,False,False
3011265,3609,2022-05-08 17:51:00,55316960,96033,174,ok,2022-05-08,55316960,55316960,False,False
3014998,3731,2022-05-08 18:21:00,55316960,96033,174,ok,2022-05-08,55316960,55316960,False,False


In [7]:
df = bikes[(bikes['day'] == '2022-04-27') & (bikes['just_rented'])].reset_index()
df

Unnamed: 0,level_0,index,date,station_id,bike_number,bike_type,state,day,prev_station,next_station,just_returned,just_rented
0,552699,488,2022-04-27 00:21:00,2585319,97477,174,ok,2022-04-27,2585319,2585337,False,True
1,552716,505,2022-04-27 00:21:00,2585321,97727,174,ok,2022-04-27,2585321,2585285,False,True
2,552949,738,2022-04-27 00:21:00,2585346,27239,174,ok,2022-04-27,2585346,2585345,False,True
3,553188,977,2022-04-27 00:21:00,2585377,27759,174,ok,2022-04-27,2585377,2585340,False,True
4,553396,1185,2022-04-27 00:21:00,2585405,25143,174,ok,2022-04-27,2585405,2585334,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
13044,773329,1545,2022-04-27 23:51:00,2585695,25337,174,ok,2022-04-27,2585695,2586034,False,True
13045,773374,1590,2022-04-27 23:51:00,2585704,29481,4,ok,2022-04-27,2585704,2586037,False,True
13046,773506,1722,2022-04-27 23:51:00,2585728,28990,174,ok,2022-04-27,2585728,2585712,False,True
13047,773579,1795,2022-04-27 23:51:00,2585762,24651,174,ok,2022-04-27,2585762,2585771,False,True


In [8]:
plot_settings = {'markersize': 50}
IMG_PATH = 'imgs'
if os.path.exists(IMG_PATH):
    shutil.rmtree(IMG_PATH)
os.makedirs(IMG_PATH, exist_ok=True)
t = min(df['date']) 
i = 0
delta  = timedelta(minutes=30)
colors_period = pd.Series(np.zeros(stations.shape[0]))
while t < min(df['date']) + timedelta(hours=24):
    fig, ax = plt.subplots(figsize=(30, 20))
    plt.title(t.strftime('%H:%M'), fontsize=50)
    stations_period = df[(df['date']>=t) & (df['date']< t+delta)]['station_id']
    colors_period[stations['uid'].isin(stations_period)] = 1
    ax = gpd_stations.plot(ax=ax, c=colors_period.map(lambda x: (1., 1-x, 1-x)), **plot_settings)
    cx.add_basemap(ax=ax, crs=gpd_stations.crs.to_string())
    fig.savefig(f'{IMG_PATH}/img{i:0>5}.png', 
              dpi=100, format='png', 
              bbox_inches='tight')
    plt.close() 
    i += 1
    t += delta
    colors_period *= 0.7

In [9]:
filenames = [os.path.join(IMG_PATH, x ) for x in sorted(os.listdir(IMG_PATH))]

In [10]:
import moviepy.video.io.ImageSequenceClip as vid
clip = vid.ImageSequenceClip(filenames, fps=1)
clip.write_videofile('rented_bikes_activity.mp4')

Moviepy - Building video rented_bikes_activity.mp4.
Moviepy - Writing video rented_bikes_activity.mp4



                                                                                                                                                                                        

Moviepy - Done !
Moviepy - video ready rented_bikes_activity.mp4
