# Imports

In [1]:
import pandas as pd
import numpy as np
import os
import datetime as dt

# Divvy Dataset

## Converting time data

Reading the Divvy dataset and converting time data to hourly intervals

Last column: hourly_data

In [2]:
path = '/Users/joachimclodic/code/G-Dolle/DIVVY_BIKE/raw_data/Divvy_Trips_2021_Q1.csv'

df=pd.read_csv(path)
df['started_at']=pd.to_datetime(df['started_at'])
df['ended_at']=pd.to_datetime(df['ended_at'])
df['hourly_data_started'] = df.started_at.dt.round('60min')
df['hourly_data_ended'] = df.ended_at.dt.round('60min')
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,hourly_data_started,hourly_data_ended
0,E19E6F1B8D4C42ED,electric_bike,2021-01-23 16:14:19,2021-01-23 16:24:44,California Ave & Cortez St,17660,,,41.900341,-87.696743,41.89,-87.72,member,2021-01-23 16:00:00,2021-01-23 16:00:00
1,DC88F20C2C55F27F,electric_bike,2021-01-27 18:43:08,2021-01-27 18:47:12,California Ave & Cortez St,17660,,,41.900333,-87.696707,41.9,-87.69,member,2021-01-27 19:00:00,2021-01-27 19:00:00
2,EC45C94683FE3F27,electric_bike,2021-01-21 22:35:54,2021-01-21 22:37:14,California Ave & Cortez St,17660,,,41.900313,-87.696643,41.9,-87.7,member,2021-01-21 23:00:00,2021-01-21 23:00:00
3,4FA453A75AE377DB,electric_bike,2021-01-07 13:31:13,2021-01-07 13:42:55,California Ave & Cortez St,17660,,,41.900399,-87.696662,41.92,-87.69,member,2021-01-07 14:00:00,2021-01-07 14:00:00
4,BE5E8EB4E7263A0B,electric_bike,2021-01-23 02:24:02,2021-01-23 02:24:45,California Ave & Cortez St,17660,,,41.900326,-87.696697,41.9,-87.7,casual,2021-01-23 02:00:00,2021-01-23 02:00:00


In [3]:
df.dtypes

ride_id                        object
rideable_type                  object
started_at             datetime64[ns]
ended_at               datetime64[ns]
start_station_name             object
start_station_id               object
end_station_name               object
end_station_id                 object
start_lat                     float64
start_lng                     float64
end_lat                       float64
end_lng                       float64
member_casual                  object
hourly_data_started    datetime64[ns]
hourly_data_ended      datetime64[ns]
dtype: object


## Focusing on one station

### Number of departures

In [5]:
df_departures=df[df['start_station_id']=='17660']
df_departures=df_departures[['started_at','hourly_data_started']]
df_departures=df_departures.rename(columns={'hourly_data_started':'hourly_data'})
df_departures=df_departures.groupby(by='hourly_data').count()

df_departures.head()

Unnamed: 0_level_0,started_at
hourly_data,Unnamed: 1_level_1
2021-01-02 09:00:00,2
2021-01-02 12:00:00,2
2021-01-02 17:00:00,1
2021-01-03 00:00:00,1
2021-01-03 11:00:00,1


### Number of arrivals 

In [6]:
df_arrivals=df[df['start_station_id']=='17660']
df_arrivals=df_arrivals[['ended_at','hourly_data_ended']]
df_arrivals=df_arrivals.rename(columns={'hourly_data_ended':'hourly_data'})
df_arrivals=df_arrivals.groupby(by='hourly_data').count()
df_arrivals.head()

Unnamed: 0_level_0,ended_at
hourly_data,Unnamed: 1_level_1
2021-01-02 09:00:00,2
2021-01-02 12:00:00,1
2021-01-02 13:00:00,1
2021-01-02 18:00:00,1
2021-01-03 01:00:00,1


### Merging departures and arrivals 

In [8]:
merge_ratio=pd.merge(
    df_departures,
    df_arrivals,
    how="inner",
    on='hourly_data')
merge_ratio['ratio']=merge_ratio['started_at']/merge_ratio['ended_at']
merge_ratio

Unnamed: 0_level_0,started_at,ended_at,ratio
hourly_data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-02 09:00:00,2,2,1.0
2021-01-02 12:00:00,2,1,2.0
2021-01-03 11:00:00,1,1,1.0
2021-01-03 17:00:00,1,1,1.0
2021-01-04 05:00:00,1,1,1.0
...,...,...,...
2021-03-30 18:00:00,2,2,1.0
2021-03-30 20:00:00,1,1,1.0
2021-03-31 07:00:00,1,1,1.0
2021-03-31 12:00:00,1,2,0.5


In [20]:
merge_ratio['ratio'].unique()

array([1.        , 2.        , 0.5       , 3.        , 0.33333333,
       1.5       , 0.66666667, 1.25      , 0.81818182, 2.5       ,
       1.33333333])

# Aggregating by station

In [1]:
stations=df['start_station_id'].unique()
station_names=df['start_station_name'].unique()
def station_ratio(station):
    
    # Divvy Dataset
    df=path = '/Users/joachimclodic/code/G-Dolle/DIVVY_BIKE/raw_data/Divvy_Trips_2021_Q1.csv'
    df=pd.read_csv(path)
    
    # Departures per station
    df_departures=df[df['start_station_id']==station]
    df_departures=df_departures[['started_at','hourly_data_started']]
    df_departures=df_departures.rename(columns={'hourly_data_started':'hourly_data'})
    df_departures=df_departures.groupby(by='hourly_data').count()
    
    
    # Arrivals per station
    df_arrivals=df[df['start_station_id']=='17660']
    df_arrivals=df_arrivals[['ended_at','hourly_data_ended']]
    df_arrivals=df_arrivals.rename(columns={'hourly_data_ended':'hourly_data'})
    df_arrivals=df_arrivals.groupby(by='hourly_data').count()

    # Merge departures and arrivals to get a ratio
    merge_ratio=pd.merge(
    df_departures,
    df_arrivals,
    how="inner",
    on='hourly_data')
    merge_ratio['ratio']=merge_ratio['started_at']/merge_ratio['ended_at']
    
    # Return the merged dataset
    return merge_ratio


NameError: name 'df' is not defined

# Weather Dataset

Reading the Weather dataset and converting time data to the same format as the Divvy Dataset

Last column: hourly_data

In [10]:
path_weather= '/Users/joachimclodic/code/G-Dolle/DIVVY_BIKE/raw_data/Chicago_weather.csv'
df_weather=pd.read_csv(path_weather)

# df_weather['dt']=pd.to_datetime(df_weather['dt'])
df_weather['dt_iso']=df_weather['dt_iso'].apply(lambda x: x.rstrip('+0000 UTC').join('00'))
df_weather['dt_iso']=pd.to_datetime(df_weather['dt_iso'])
df_weather['hourly_data']=df_weather['dt_iso']

df_weather.tail()

Unnamed: 0,dt,dt_iso,timezone,city_name,lat,lon,temp,visibility,dew_point,feels_like,...,rain_1h,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon,hourly_data
91354,1667242800,2022-10-31 19:00:00,-18000,Chicago,41.878114,-87.629798,13.89,,12.11,13.66,...,0.24,,,,100,500,Rain,light rain,10d,2022-10-31 19:00:00
91355,1667246400,2022-10-31 20:00:00,-18000,Chicago,41.878114,-87.629798,14.15,9656.0,12.19,13.92,...,,,,,100,701,Mist,mist,50d,2022-10-31 20:00:00
91356,1667250000,2022-10-31 21:00:00,-18000,Chicago,41.878114,-87.629798,14.11,,11.81,13.82,...,0.25,,,,100,500,Rain,light rain,10d,2022-10-31 21:00:00
91357,1667253600,2022-10-31 22:00:00,-18000,Chicago,41.878114,-87.629798,13.62,,11.84,13.36,...,0.14,,,,100,500,Rain,light rain,10d,2022-10-31 22:00:00
91358,1667257200,2022-10-31 23:00:00,-18000,Chicago,41.878114,-87.629798,13.38,,11.78,13.12,...,0.11,,,,100,500,Rain,light rain,10n,2022-10-31 23:00:00


In [11]:
df_weather.dtypes

dt                              int64
dt_iso                 datetime64[ns]
timezone                        int64
city_name                      object
lat                           float64
lon                           float64
temp                          float64
visibility                    float64
dew_point                     float64
feels_like                    float64
temp_min                      float64
temp_max                      float64
pressure                        int64
sea_level                     float64
grnd_level                    float64
humidity                        int64
wind_speed                    float64
wind_deg                        int64
wind_gust                     float64
rain_1h                       float64
rain_3h                       float64
snow_1h                       float64
snow_3h                       float64
clouds_all                      int64
weather_id                      int64
weather_main                   object
weather_desc

# Merging the two Dataframes

In [13]:
merge=pd.merge(
    merge_ratio,
    df_weather,
    how="outer",
    on='hourly_data')
merge.head()

Unnamed: 0,hourly_data,started_at,ended_at,ratio,dt,dt_iso,timezone,city_name,lat,lon,...,wind_gust,rain_1h,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,2021-01-02 09:00:00,2.0,2.0,1.0,1609578000,2021-01-02 09:00:00,-21600,Chicago,41.878114,-87.629798,...,0.0,,,,,100,701,Mist,mist,50n
1,2021-01-02 12:00:00,2.0,1.0,2.0,1609588800,2021-01-02 12:00:00,-21600,Chicago,41.878114,-87.629798,...,0.0,,,,,100,701,Mist,mist,50n
2,2021-01-03 11:00:00,1.0,1.0,1.0,1609671600,2021-01-03 11:00:00,-21600,Chicago,41.878114,-87.629798,...,0.0,,,0.47,,100,701,Mist,mist,50n
3,2021-01-03 11:00:00,1.0,1.0,1.0,1609671600,2021-01-03 11:00:00,-21600,Chicago,41.878114,-87.629798,...,0.0,,,0.47,,100,600,Snow,light snow,13n
4,2021-01-03 17:00:00,1.0,1.0,1.0,1609693200,2021-01-03 17:00:00,-21600,Chicago,41.878114,-87.629798,...,0.0,,,0.38,,100,701,Mist,mist,50d
