## Import all packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import statsmodels
import folium
from folium import plugins
from folium.plugins import HeatMap
from datetime import datetime #for working with times objects
from datetime import timedelta #for working with times objects
from datetime import date
import math
import random
import swifter
import json
from geopandas.tools import geocode

ModuleNotFoundError: No module named 'geopandas'

## Import data sets

In [None]:
#read in the data

chi_data = pd.read_csv("chicago_2017.csv")
chi_weather = pd.read_csv("weather_hourly_chicago.csv")

## General information about the raw data

### chicago_2017 data set

In [None]:
#chi_data.info()

In [None]:
#chi_data.describe()

In [None]:
#chi_data.head()

### weather_hourly_chicago data set

In [None]:
#chi_weather.info()

In [None]:
#chi_weather.describe()

In [None]:
#chi_data.head()

## Add date time features to chi_data

In [None]:
chi_data['start_time']= pd.to_datetime(chi_data['start_time'])
chi_data['end_time'] = pd.to_datetime(chi_data['end_time'])

In [None]:
chi_data.sort_values(['start_time'], inplace = True) #Sorts the values by date 

In [None]:
chi_data["Date"] = chi_data['start_time'].swifter.apply(lambda dt: dt.date())
chi_data["Month"] = chi_data['start_time'].swifter.apply(lambda dt: dt.month)
chi_data["Week"] = chi_data['start_time'].swifter.apply(lambda dt: dt.isocalendar()[1])
chi_data["Hour"] = chi_data['start_time'].swifter.apply(lambda dt: dt.hour)
chi_data["Weekday"] = chi_data['start_time'].swifter.apply(lambda dt: dt.weekday())
chi_data['Duration'] = (chi_data['end_time']-chi_data['start_time'])
chi_data["Duration_sec"] = chi_data["Duration"].swifter.apply(lambda x: x.total_seconds())
chi_data["IsWeekday"] = chi_data["Weekday"].swifter.apply(lambda x: 1 if x < 5 else 0)
#chi_data.head()

## Add date weather features to chi_weather_2017

In [None]:
chi_weather['date_time']=pd.to_datetime(chi_weather['date_time'])

In [None]:
first_date = datetime(year=2017, day=1, month=1)
last_date = datetime(year=2017, day=31, month=12)
chi_weather_2017 = chi_weather[(chi_weather['date_time']>=first_date)&(chi_weather["date_time"] <= last_date)]

In [None]:
chi_weather_2017.sort_values(by = "date_time", inplace = True) #sort the values by date time

In [None]:
chi_weather_2017["Date"] = chi_weather_2017['date_time'].swifter.apply(lambda dt: dt.date())
chi_weather_2017['Month']= chi_weather_2017["date_time"].swifter.apply(lambda dt: dt.month)
chi_weather_2017["Week"] = chi_weather_2017['date_time'].swifter.apply(lambda dt: dt.isocalendar()[1])
chi_weather_2017["Hour"] = chi_weather_2017['date_time'].swifter.apply(lambda dt: dt.hour)
chi_weather_2017["Weekday"] = chi_weather_2017['date_time'].swifter.apply(lambda dt: dt.weekday())
chi_weather_2017["mean_temp"] = (chi_weather_2017['max_temp']+chi_weather_2017['min_temp'])/2
#chi_weather_2017.head()

# `Data collection and preparation`

In [None]:
chi_data['Duration_sec'].describe()

In [None]:
chi_data['Duration_sec'].value_counts()

In [None]:
chi_data.sort_values (by= 'Duration_sec')

In [None]:
chi_data_60sec_trips=chi_data[chi_data.Duration_sec == 60]
chi_data_60sec_trips.count()

#checking quantity of very short trips

In [None]:
chi_data_idcheck = chi_data[['start_station_id', 'start_station_name']]

In [None]:
grouped_chi_data = chi_data_idcheck.groupby(['start_station_id'])
grouped_chi_data = grouped_chi_data.agg({"start_station_name": "nunique"})

grouped_chi_data = grouped_chi_data. reset_index()

print(grouped_chi_data)

In [None]:
bad_ids = grouped_chi_data[grouped_chi_data['start_station_name']>1]
bad_ids
#listing IDs that are associated with multiple names

In [None]:
#chi_data.to_csv('cleanedtrips.csv')

# `Descriptive analystics`

## Overview Fleet

unique station_ids, bike_ids, trips ...

## Fleet size

In [None]:
bikes = len(chi_data['bike_id'].unique())
print("The overall number of bikes is {}".format(bikes))

## Temporal demand patterns

### Bike rentals

#### Bike rentals per month

In [None]:
rentals_month = chi_data.groupby(['Month'])['bike_id'].count()
rentals_month = pd.DataFrame(rentals_month)

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(rentals_month)
ax.set_title('Bike rentals per month')
ax.set_xlabel('Month')
ax.set_ylabel("Bike rentals")
plt.show()

In [None]:
rentals_month_bp = chi_data.groupby(['Date','Month'])['bike_id'].count()
rentals_month_bp = pd.DataFrame(rentals_month_bp)

fig, ax = plt.subplots(figsize = (10,6))

sns.boxplot(x = rentals_month_bp.index.get_level_values('Month'), y =rentals_month_bp['bike_id'], ax = ax)
ax.set_title('Bike rentals per month')
ax.set_ylabel("Bike rentals")
plt.show()

#### Bike rentals per week

In [None]:
rentals_week = chi_data.groupby("Week")['bike_id'].count()
rentals_week = pd.DataFrame(rentals_week)

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(rentals_week, c = "green")

ax.set_title('Bike rentals per week')
ax.set_ylabel("Bike rentals")
ax.set_xlabel("Week")
plt.show()

#### Bike rentals per weekday

In [None]:
rentals_weekday = chi_data.groupby(['Weekday','Date'])['bike_id'].count()
rentals_weekday = pd.DataFrame(rentals_weekday)

fig, ax = plt.subplots(figsize = (10,6))

sns.boxplot(x = rentals_weekday.index.get_level_values('Weekday'), y =rentals_weekday['bike_id'], ax = ax)
ax.set_title('Bike rentals per weekday')
ax.set_ylabel("Bike rentals")

plt.show()

In [None]:
# legende Zahl - Wochentag

#### Bike rentals over year per day 

In [None]:
rentals_day = chi_data.groupby("Date")['bike_id'].count()
rentals_day = pd.DataFrame(rentals_day)

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(rentals_day, c = "green")

ax.set_title('Bike rentals per day')
ax.set_ylabel("Bike rentals")
ax.set_xlabel("Day")
plt.show()

#### Bike rentals per hour

In [None]:
rentals_hour = chi_data.groupby(["Date","Hour"])['bike_id'].count()
rentals_hour =pd.DataFrame(rentals_hour)

fig, ax = plt.subplots(figsize = (10,6))


sns.boxplot(x = rentals_hour.index.get_level_values("Hour"), y =rentals_hour['bike_id'], ax = ax)
ax.set_title('Bike rentals per hour')
ax.set_ylabel("Bike rentals")
plt.show()
rentals_hour

In [None]:
rentals_hour = chi_data.groupby(['Hour'])['bike_id'].count()
rentals_hour = pd.DataFrame(rentals_hour)

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(rentals_hour)
ax.set_title('Bike rentals per hour')
ax.set_ylabel("Bike rentals")
ax.set_xlabel("Hour")
plt.show

#### Correlation between weekday, hour & rentals???

### Duration/Travel time

#### Duration per week

In [None]:
duration_week = chi_data.groupby("Week")["Duration_sec"].mean()
duration_week = pd.DataFrame(duration_week)

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(duration_week)

ax.set_title('Duration of rentals per week')
ax.set_ylabel('Duration of rentals')
ax.set_xlabel('Week')
plt.show()

#### Duration per weekday

In [None]:
duration_weekday = chi_data.groupby("Weekday")["Duration_sec"].mean()
duration_weekday = pd.DataFrame(duration_weekday)

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(duration_weekday)

ax.set_title('Duration of rentals per weekday')
ax.set_ylabel('Duration of rentals')
ax.set_xlabel('Weekday')
plt.show()

In [None]:
duration_weekday_bp = chi_data.groupby(["Date","Weekday"])['Duration_sec'].mean()
duration_weekday_bp =pd.DataFrame(duration_weekday_bp)

fig, ax = plt.subplots(figsize = (10,6))


sns.boxplot(x = duration_weekday_bp.index.get_level_values("Weekday"), y =duration_weekday_bp['Duration_sec'], ax = ax)
ax.set_title('Duration of rentals per weekday')
ax.set_ylabel('Duration of rentals')
plt.show()

#### Duration per hour

In [None]:
duration_hour = chi_data.groupby("Hour")["Duration_sec"].mean()
duration_hour = pd.DataFrame(duration_hour)

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(duration_hour)

ax.set_title('Duration of rentals per hour')
ax.set_ylabel('Duration of rentals')
ax.set_xlabel('Hour')
plt.show()

### Weather

#### Temperature monthly

In [None]:
temp_month_mean = chi_weather_2017.groupby("Month")["mean_temp"].mean()

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(temp_month_mean)
ax.set_xlabel('Month')
ax.set_ylabel('Temperature')
ax.set_title("Average temperature per month")

plt.show()

In [None]:
temp_month_mean_bp = chi_weather_2017.groupby(['Date','Month'])['mean_temp'].mean()
temp_month_mean_bp = pd.DataFrame(temp_month_mean_bp)

fig, ax = plt.subplots(figsize = (10,6))

sns.boxplot(x = temp_month_mean_bp.index.get_level_values('Month'), y =temp_month_mean_bp['mean_temp'], ax = ax)

ax.set_title('Average temperature per month')
ax.set_ylabel("Temperature")
ax.set_xlabel("Month")
plt.show()

In [None]:
temp_day_min = chi_weather_2017.groupby("Date")["mean_temp"].min()

temp_day_max = chi_weather_2017.groupby("Date")["mean_temp"].max()

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(temp_day_max, c = "red", label = "Max temperature")
ax.plot(temp_day_min, c = "blue", label = "Min temperature")
plt.legend()
ax.set_title('Average temperature per day')
ax.set_ylabel("Temperature")
ax.set_xlabel('Day')

plt.show()

#### Temperature weekly

In [None]:
temp_week_mean = chi_weather_2017.groupby("Week")["mean_temp"].mean()

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(temp_week_mean)
ax.set_ylabel("Temperature")
ax.set_xlabel('Week')
ax.set_title('Average temperature per week')
plt.show()

#### Temperature daily

In [None]:
temp_day_mean = chi_weather_2017.groupby("Date")["mean_temp"].mean()

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(temp_day_mean, c = "red")
ax.set_title('Average temperature per day')
ax.set_ylabel("Temperature")
ax.set_xlabel('Day')

plt.show()

In [None]:
temp_day_min = chi_weather_2017.groupby("Date")["mean_temp"].min()

temp_day_max = chi_weather_2017.groupby("Date")["mean_temp"].max()

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(temp_day_max, c = "red", label = "Max temperature")
ax.plot(temp_day_min, c = "blue", label = "Min temperature")
plt.legend()
ax.set_title('Average temperature per day')
ax.set_ylabel("Temperature")
ax.set_xlabel('Day')

plt.show()

#### Precip monthly

In [None]:
precip_month_mean = chi_weather_2017.groupby("Month")["precip"].mean()

fig, ax = plt.subplots(figsize = (16,9))

ax.plot(precip_month_mean)
ax.set_xlabel('Month')
ax.set_ylabel('Precip percentage')
ax.set_title("Monthly precip percentage")

plt.show()

In [None]:
precip_month_mean_bp = chi_weather_2017.groupby(['Date','Month'])['precip'].mean()
precip_month_mean_bp = pd.DataFrame(precip_month_mean_bp)

fig, ax = plt.subplots(figsize = (10,6))

sns.boxplot(x = precip_month_mean_bp.index.get_level_values('Month'), y =precip_month_mean_bp['precip'], ax = ax)

ax.set_xlabel('Month')
ax.set_ylabel('Precip percentage')
ax.set_title("Monthly precip percentage")
plt.show()

#### Precip weekly

In [None]:
precip_week_mean = chi_weather_2017.groupby("Week")["precip"].mean()

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(precip_week_mean)
ax.set_title('Weekly precip percentage')
ax.set_ylabel("Precip percentage")
ax.set_xlabel('Week')
plt.show()

#### Precip daily

In [None]:
precip_day_mean = chi_weather_2017.groupby("Date")["precip"].mean()

fig, ax = plt.subplots(figsize = (10,6))

ax.plot(precip_day_mean)
ax.set_title('Daily precip percentage')
ax.set_ylabel(" Precip percentage")
ax.set_xlabel('Day')
plt.show()

### Correlation between rentals and weather 

#### Rescaling

In [None]:
rentals_day_resc = (rentals_day-rentals_day.min())/(rentals_day.max()-rentals_day.min())

rentals_week_resc = ((rentals_week-rentals_week.min())/(rentals_week.max()-rentals_week.min()))

rentals_month_resc = ((rentals_month-rentals_month.min())/(rentals_month.max()-rentals_month.min()))

precip_day_resc = (precip_day_mean-precip_day_mean.min())/(precip_day_mean.max()-precip_day_mean.min())

precip_week_resc = (precip_week_mean-precip_week_mean.min())/(precip_week_mean.max()-precip_week_mean.min())

precip_month_resc = (precip_month_mean-precip_month_mean.min())/(precip_month_mean.max()-precip_month_mean.min())

temp_day_resc = (temp_day_mean-temp_day_mean.min())/(temp_day_mean.max()-temp_day_mean.min())

temp_week_resc = (temp_week_mean-temp_week_mean.min())/(temp_week_mean.max()-temp_week_mean.min())

temp_month_resc = ((temp_month_mean-temp_month_mean.min())/(temp_month_mean.max()-temp_month_mean.min()))

#### Rentals and temperatur

In [None]:
fig, ax = plt.subplots(figsize = (10,4))

ax.plot(rentals_month_resc, c = 'green', label='Rentals per month')
ax.plot(temp_month_resc, c = 'red', label='Average temperature per month')

ax.set_title('Correlation between rentals & average temperature per month')
ax.set_xlabel('Month')
ax.set_ylabel('Rentals/Temperature')
ax.legend()

plt.show()

In [None]:
fig, ax = plt.subplots(figsize = (10,4))

ax.plot(rentals_day_resc, c = 'green', label='Rentals per day')
ax.plot(temp_day_resc, c = 'red', label='Average temperature per day')

ax.set_title('Correlation between rentals & average temperature per day')
ax.set_xlabel('Day')
ax.set_ylabel('Rentals/Temperature')
ax.legend()

plt.show()

In [None]:
#Put weather & rental data into the same data frame
temp_rentals_day = chi_weather_2017.groupby(["Date"])["mean_temp"].mean()
temp_rentals_day = pd.DataFrame(temp_rentals_day)
temp_rentals_day ["Rentals"] = chi_data.groupby(['Date'])['bike_id'].count()
temp_rentals_day ["IsWeekday"] = chi_data.groupby(['Date'])['IsWeekday'].max()

In [None]:

fig, ax = plt.subplots(figsize = (10,6))

sns.scatterplot(x = temp_rentals_day["mean_temp"], y= temp_rentals_day["Rentals"], hue = temp_rentals_day["IsWeekday"],palette="magma" )
ax.set_title("Correlation between daily average temperature and rentals")
ax.set_xlabel("Temperature")
plt.show()

In [None]:
#Put weather & rental data into the same data frame
precip_rentals_day = chi_weather_2017.groupby(["Date"])["precip"].mean()
precip_rentals_day = pd.DataFrame(precip_rentals_day)
precip_rentals_day ["Rentals"] = chi_data.groupby(['Date'])['bike_id'].count()
precip_rentals_day ["IsWeekday"] = chi_data.groupby(['Date'])['IsWeekday'].max()

In [None]:

fig, ax = plt.subplots(figsize = (10,6))

sns.scatterplot(x = precip_rentals_day["precip"], y= precip_rentals_day["Rentals"], hue = precip_rentals_day["IsWeekday"],palette="magma" )
ax.set_title("Correlation between weekly precip percentage and rentals")
ax.set_xlabel("Precip percentage")
plt.show()

#### Rentals and precip

In [None]:
fig, ax = plt.subplots(figsize = (10,4))

ax.plot(rentals_month_resc, c = 'green', label='Rentals per month')
ax.plot(precip_month_resc, c = 'blue', label='Precip percentage per month')

ax.set_title('Correlation between rentals & precip percentage per month')
ax.set_xlabel('Month')
ax.set_ylabel('Rentals/Precip percentage')
ax.legend()

plt.show()

In [None]:
fig, ax = plt.subplots(figsize = (10,4))

ax.plot(rentals_day_resc, c = 'green', label='Rentals per day')
ax.plot(precip_day_resc, c = 'blue', label='Precip percentage per day')

ax.set_title('Correlation between rentals & precip percentage per day')
ax.set_xlabel('Day')
ax.set_ylabel('Rentals/Precip percentage')
ax.legend()

plt.show()

## Geographical demand patterns (Emilio & Alex)

In [None]:
chi_stations = pd.read_csv("chicago_2017.csv")

#### Creating location dictionary for location
!!! This must be done only once, for further usage simply read in the created json file (see next section) !!!

In [None]:
# example api request
geocode("Universität zu Köln", provider="nominatim", user_agent='my_request')

In [None]:
def getLocation(x):
    try:
        location = geocode(x + ", Chicago", provider="nominatim", user_agent='my_request')
        point = location.geometry.iloc[0]
        return (f"{round(point.y, 5)} {round(point.x, 5)}")
    except:
        return 'NaN'

In [None]:
UniqueAdresses = chi_stations['start_station_name'].unique()
locDict = dict(zip(UniqueAdresses, pd.Series(UniqueAdresses).swifter.apply(lambda x: getLocation(x))))

In [None]:
# check if the list of stations are identical
UniqueAdresses2 = chi_stations['end_station_name'].unique()
locDict2 = dict(zip(UniqueAdresses2, pd.Series(UniqueAdresses2).swifter.apply(lambda x: getLocation(x))))


set(locDict.keys()) == set(locDict2.keys())

In [None]:
# storing the location dictionary into a json file 
# for better look you should beaufify the json with some online tools
with open('locDict.json', 'w') as file:
     file.write(json.dumps(locDict))

#### Accessing and formatting the location data from our created json dictionary

In [None]:
# retrieve the location dictionary from the json file
with open('locDict.json', 'r') as file:
     newLocDict = json.loads(file.read())

In [None]:
# formatting the retrieved data into location tuples
def convertToTuple(x):
    if not x == "NaN":
        return tuple(map(float, x.split(" ")))
    else:
        return float('NaN')

In [None]:
chi_stations['start_station_coordinates'] = chi_stations['start_station_name'].map(newLocDict)
chi_stations['start_station_coordinates'] = chi_stations['start_station_coordinates'].swifter.apply(lambda x: convertToTuple(x))

In [None]:
missingLocations = chi_stations[chi_stations["start_station_coordinates"].isnull()==True]["start_station_name"].count()
print(f"There are still {missingLocations} 'NaN' values in our coordinates where geocode didn't find a location according to the address")

In [None]:
chi_stations = chi_stations[chi_stations["start_station_coordinates"].isnull()==False]

### Mapping our stations

mapping out the stations in chicago

In [None]:
positions = list(chi_stations["start_station_coordinates"].unique())

chi_stations_map = folium.Map(location=(41.8856, -87.6415),  
                                tiles='Stamen Toner',
                                zoom_start=12, 
                                control_scale=True, 
                                max_zoom=20)

for position in positions:
    folium.CircleMarker(radius=5, location=position, popup='The Waterfront', 
                                 color='crimson', fill_color='crimson').add_to(chi_stations_map)
    
chi_stations_map

usage of every station visualized in a heatmap

In [None]:
chi_heat_map = folium.Map(location=(41.8856, -87.6415),
                            tiles='OpenStreetMap',
                            zoom_start=12,
                            control_scale=True,
                            max_zoom=20)

In [None]:
# get values for the new dataframe
chi_stations_usage = chi_stations.groupby(["start_station_name"])["bike_id"].count()
chi_stations_usage = pd.DataFrame({'start_station_name':chi_stations_usage.index, 'bike_id_count':chi_stations_usage.values})

# retrieve location data and convert it
chi_stations_usage["start_station_coordinates"] = chi_stations_usage["start_station_name"].map(newLocDict)
chi_stations_usage['start_station_coordinates'] = chi_stations_usage['start_station_coordinates'].swifter.apply(lambda x: convertToTuple(x))

# zip location and bike_id_count for heatmap
chi_stations_usage['heatmap_data'] = [a + (b,) for a, b in zip(chi_stations_usage['start_station_coordinates'], chi_stations_usage['bike_id_count'])]

In [None]:
# replacing first entry with CircleMarker in order to keep heatmap weighting in reasonable scale
chi_stations_usage.sort_values("bike_id_count", ascending=False).head()

In [None]:
# adding deleted point
folium.CircleMarker(radius=15, 
                    location=chi_stations_usage.start_station_coordinates[chi_stations_usage['bike_id_count'].idxmax()],
                    popup='Outliner',
                    color='crimson', 
                    fill_color='crimson').add_to(chi_heat_map)

chi_stations_usage = chi_stations_usage.drop(chi_stations_usage['bike_id_count'].idxmax())

In [None]:
# adding heatmap values
chi_heat_map.add_child(plugins.HeatMap(chi_stations_usage["heatmap_data"], radius=25, blur = 15, min_opacity = .3))
chi_heat_map

# `KPIs`

### Total bike usage

In [None]:
chi_data["date_hour"] = chi_data["start_time"].dt.floor("H")

In [None]:
rentals_date_hour = chi_data.groupby("date_hour")['bike_id'].count()
rentals_date_hour =pd.DataFrame(rentals_date_hour)

fig, ax = plt.subplots(figsize = (16,9))


sns.scatterplot(x = rentals_date_hour.index.get_level_values("date_hour"), y =rentals_date_hour['bike_id'], ax = ax, color = "none", ec = "blue")
ax.set_title('Hourly bike usage over a year')
ax.set_ylabel("Bike rentals")
ax.set_xlabel("Hour per date")
plt.show()

In [None]:
rentals_date_hour["date_hour"] =rentals_date_hour.index

In [None]:
rentals_date_hour["Hour"] = rentals_date_hour['date_hour'].swifter.apply(lambda dt: dt.hour)
rentals_date_hour["Month"] = rentals_date_hour['date_hour'].swifter.apply(lambda dt: dt.month)
rentals_date_hour["Day"] = rentals_date_hour['date_hour'].swifter.apply(lambda dt: dt.day)
rentals_date_hour.pop("date_hour")

In [None]:
#Function that returns the total bike usage of specified day, month and hour.
def calculateNumberRentalsSpecificHour(day, month, hour):
    x = rentals_date_hour[(rentals_date_hour["Day"] == day) & (rentals_date_hour["Month"] == month) & (rentals_date_hour["Hour"] == hour)]
    y = x["bike_id"]
    y = pd.DataFrame(y)
    return y

In [None]:
#Output for 02.01.2017 hour 1
calculateNumberRentalsSpecificHour(2, 1, 1)

### Fleet utilization

In [None]:
fleet_utilization = chi_data.groupby("date_hour")['bike_id'].nunique()/bikes
fleet_utilization =pd.DataFrame(fleet_utilization)

fig, ax = plt.subplots(figsize = (16,9))


sns.scatterplot(x = fleet_utilization.index.get_level_values("date_hour"), y =fleet_utilization['bike_id'], ax = ax, color = "none", ec = "blue")
ax.set_title('Hourly fleet utilization over year')
ax.set_ylabel("Fleet utilization [%]")
ax.set_xlabel("Hour per date")
plt.show()

In [None]:
fleet_utilization["date_hour"] =fleet_utilization.index
fleet_utilization["Year"] = fleet_utilization['date_hour'].swifter.apply(lambda dt: dt.year)
fleet_utilization["Hour"] = fleet_utilization['date_hour'].swifter.apply(lambda dt: dt.hour)
fleet_utilization["Month"] = fleet_utilization['date_hour'].swifter.apply(lambda dt: dt.month)
fleet_utilization["Day"] = fleet_utilization['date_hour'].swifter.apply(lambda dt: dt.day)
#fleet_utilization.pop("date_hour")
fleet_utilization

In [None]:
fu_daily = fleet_utilization.groupby(['Year','Month','Day'])['bike_id'].mean()
fu_daily = pd.DataFrame(fu_daily)
fu_daily["Month"] = fu_daily.index.get_level_values('Month')
fu_daily["Day"] = fu_daily.index.get_level_values('Day')
fu_daily["Year"] = 2017

fu_daily['Date'] = pd.to_datetime(fu_daily[['Year','Month','Day']])


In [None]:
fig, ax = plt.subplots(figsize = (10,6))

sns.lineplot(x = fu_daily['Date'], y =fu_daily['bike_id'], ax = ax)
ax.set_title('Fleet utilization')
ax.set_ylabel('Fleet utilization [%]')
plt.show()

In [None]:
first_date_q1 = datetime(year=2017, day=1, month=1, hour=0)
last_date_q1 = datetime(year=2017, day=31, month=3, hour=23)

fu_daily_q1 = fu_daily[(fu_daily['Date']>=first_date_q1)&(fu_daily["Date"] <= last_date_q1)]

fig, ax = plt.subplots(figsize = (10,6))

sns.lineplot(x = fu_daily_q1['Date'], y =fu_daily_q1['bike_id'], ax = ax)
ax.set_title('Fleet utilization')
ax.set_ylabel('Fleet utilization [%]')
plt.show()

In [None]:
#Function that returns the relative utilization of the fleet for a specified day, month and hour.
def calculateFleetUtilization(day, month, hour):
    x = fleet_utilization[(fleet_utilization["Day"] == day) & (fleet_utilization["Month"] == month) & (fleet_utilization["Hour"] == hour)]
    y = x["bike_id"]
    y = pd.DataFrame(y)
    return y

In [None]:
#Output for 15.08.2017 hour 8
calculateFleetUtilization(15,8,8)

### Duration of rentals

In [None]:
duration = chi_data.groupby("date_hour")['Duration_sec'].mean()
duration =pd.DataFrame(duration)
duration["mean_minutes"]= duration["Duration_sec"]/60
duration["min_minutes"]=  chi_data.groupby("date_hour")['Duration_sec'].min()/60
duration["max_minutes"]=  chi_data.groupby("date_hour")['Duration_sec'].max()/60
duration

In [None]:
fig, ax = plt.subplots(figsize = (16,9))


sns.scatterplot(x = duration.index.get_level_values("date_hour"), y =duration['mean_minutes'], ax = ax, color = "none", ec = "blue")
ax.set_title('Average travel duration per hour over year')
ax.set_ylabel("Minutes")
ax.set_xlabel("Hour per date")
plt.show()

In [None]:
duration["date_hour"] =duration.index
duration["Hour"] = duration['date_hour'].swifter.apply(lambda dt: dt.hour)
duration["Month"] = duration['date_hour'].swifter.apply(lambda dt: dt.month)
duration["Day"] = duration['date_hour'].swifter.apply(lambda dt: dt.day)
duration.pop("date_hour")

In [None]:
#Function that returns the relative utilization of the fleet for a specified day, month and hour.
def calculateDuration(day, month, hour):
    x = duration[(duration["Day"] == day) & (duration["Month"] == month) & (duration["Hour"] == hour)]
    y = x["mean_minutes"]
    y = pd.DataFrame(y)
    y["min_minutes"] = x["min_minutes"]
    y["max_minutes"] = x["max_minutes"]

    return y

In [None]:
calculateDuration(1,1,0)

### Rental distribution of costumers and subscribers

for better overview we are grouping by week

In [None]:
chi_usage = chi_data.groupby(["Week"])["bike_id"].count()
chi_usage_subs = chi_data[chi_data["user_type"]=="Subscriber"].groupby(["Week"])["bike_id"].count()
chi_usage_costu = chi_data[chi_data["user_type"]=="Customer"].groupby(["Week"])["bike_id"].count()

In [None]:
fig, ax = plt.subplots(figsize=(16,6))

ax.plot(chi_usage, label="Total", color='darkcyan')
ax.plot(chi_usage_subs, label="Subscribers", color='b')
ax.plot(chi_usage_costu, label="Other Costumers", color='r')

ax.set_title("Rental distribution of costumer and subscriber", fontsize=16)
ax.set_ylabel("Rentals", fontsize=14)
ax.set_xlabel("Weeks", fontsize=14)
ax.legend(fontsize=14, loc="upper left")
plt.show()