In [1]:
import numpy as np
import scipy as sp
import scipy.stats as stats
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Set color map to have light blue background
sns.set()
import statsmodels.formula.api as smf
import statsmodels.api as sm

London bike sharing dataset
========

The data from cycling dataset is grouped by "Start time", this represent the count of new bike shares grouped by hour. The long duration shares are not taken in the count.

Metadata:
---

"timestamp" - timestamp field for grouping the data  
"cnt" - the count of a new bike shares  
"t1" - real temperature in C  
"t2" - temperature in C "feels like"  
"hum" - humidity in percentage  
"wind_speed" - wind speed in km/h  
"weather_code" - category of the weather  
"is_holiday" - boolean field - 1 holiday / 0 non holiday  
"is_weekend" - boolean field - 1 if the day is weekend  
"season" - category field meteorological seasons: 0-spring ; 1-summer; 2-fall; 3-winter.  

"weather_code" category description:  
1 = Clear ; mostly clear but have some values with haze/fog/patches of fog/ fog in vicinity  
2 = scattered clouds / few clouds  
3 = Broken clouds  
4 = Cloudy  
7 = Rain/ light Rain shower/ Light rain  
10 = rain with thunderstorm  
26 = snowfall  
94 = Freezing Fog  

In [None]:
# import data
bikeshare_data = pd.read_csv("data/london_merged_hour.csv")

# reformat
bikeshare_data['timestamp'] = pd.to_datetime(bikeshare_data['timestamp'])

# Set the date (without time) as a new column
bikeshare_data['date'] = bikeshare_data['timestamp'].dt.date

# weather code
weather_code_map = {
    1:  "Clear or Fog",
    2:  "Few Clouds",
    3:  "Broken Clouds",
    4:  "Cloudy",
    7:  "Light Rain",
    10: "Thunderstorm with Rain",
    26: "Snowfall",
    94: "Freezing Fog"
}

# include weather description for each entry
bikeshare_data['weather_description'] = bikeshare_data['weather_code'].map(weather_code_map)

# rename columns to be more readable
bikeshare_data = bikeshare_data.rename(columns={
    'cnt': 'count',
    't1': 'temperature_actual',
    't2': 'temperature_feels_like',
    'hum': 'humidity',
    'weather_code': 'weather_condition_code',
    'wind_speed': 'wind_speed',
    'is_holiday': 'is_holiday',
    'is_weekend': 'is_weekend',
    'season': 'season',
    'date': 'date',
    'weather_code': 'weather_code'
})


            timestamp  count  temperature_actual  temperature_feels_like  \
0 2015-01-04 00:00:00    182                 3.0                     2.0   
1 2015-01-04 01:00:00    138                 3.0                     2.5   
2 2015-01-04 02:00:00    134                 2.5                     2.5   
3 2015-01-04 03:00:00     72                 2.0                     2.0   
4 2015-01-04 04:00:00     47                 2.0                     0.0   

   humidity  wind_speed  weather_code  is_holiday  is_weekend  season  \
0      93.0         6.0           3.0         0.0         1.0     3.0   
1      93.0         5.0           1.0         0.0         1.0     3.0   
2      96.5         0.0           1.0         0.0         1.0     3.0   
3     100.0         0.0           1.0         0.0         1.0     3.0   
4      93.0         6.5           1.0         0.0         1.0     3.0   

         date weather_description  
0  2015-01-04       Broken Clouds  
1  2015-01-04        Clear or Fo