In [30]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px

# the matplotlib plots will appear directly below the cell in which the plot function was called.
%matplotlib inline

In [31]:
bike = pd.read_csv('london_merged.csv')

In [32]:
bike.head(100)

Unnamed: 0,timestamp,cnt,t1,t2,hum,wind_speed,weather_code,is_holiday,is_weekend,season
0,2015-01-04 00:00:00,182,3.0,2.0,93.0,6.0,3.0,0.0,1.0,3.0
1,2015-01-04 01:00:00,138,3.0,2.5,93.0,5.0,1.0,0.0,1.0,3.0
2,2015-01-04 02:00:00,134,2.5,2.5,96.5,0.0,1.0,0.0,1.0,3.0
3,2015-01-04 03:00:00,72,2.0,2.0,100.0,0.0,1.0,0.0,1.0,3.0
4,2015-01-04 04:00:00,47,2.0,0.0,93.0,6.5,1.0,0.0,1.0,3.0
...,...,...,...,...,...,...,...,...,...,...
95,2015-01-08 00:00:00,123,11.0,11.0,82.0,26.0,4.0,0.0,0.0,3.0
96,2015-01-08 01:00:00,56,11.5,11.5,85.0,24.0,3.0,0.0,0.0,3.0
97,2015-01-08 02:00:00,51,12.0,12.0,82.0,25.0,3.0,0.0,0.0,3.0
98,2015-01-08 03:00:00,33,12.0,12.0,85.0,22.0,3.0,0.0,0.0,3.0


In [33]:
bike.info() # general info of the dataframe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17414 entries, 0 to 17413
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   timestamp     17414 non-null  object 
 1   cnt           17414 non-null  int64  
 2   t1            17414 non-null  float64
 3   t2            17414 non-null  float64
 4   hum           17414 non-null  float64
 5   wind_speed    17414 non-null  float64
 6   weather_code  17414 non-null  float64
 7   is_holiday    17414 non-null  float64
 8   is_weekend    17414 non-null  float64
 9   season        17414 non-null  float64
dtypes: float64(8), int64(1), object(1)
memory usage: 1.3+ MB


In [34]:
bike.isnull().sum() # check for missing values

timestamp       0
cnt             0
t1              0
t2              0
hum             0
wind_speed      0
weather_code    0
is_holiday      0
is_weekend      0
season          0
dtype: int64

In [35]:
bike.describe() # quick overview of the data

Unnamed: 0,cnt,t1,t2,hum,wind_speed,weather_code,is_holiday,is_weekend,season
count,17414.0,17414.0,17414.0,17414.0,17414.0,17414.0,17414.0,17414.0,17414.0
mean,1143.101642,12.468091,11.520836,72.324954,15.913063,2.722752,0.022051,0.285403,1.492075
std,1085.108068,5.571818,6.615145,14.313186,7.89457,2.341163,0.146854,0.451619,1.118911
min,0.0,-1.5,-6.0,20.5,0.0,1.0,0.0,0.0,0.0
25%,257.0,8.0,6.0,63.0,10.0,1.0,0.0,0.0,0.0
50%,844.0,12.5,12.5,74.5,15.0,2.0,0.0,0.0,1.0
75%,1671.75,16.0,16.0,83.0,20.5,3.0,0.0,1.0,2.0
max,7860.0,34.0,34.0,100.0,56.5,26.0,1.0,1.0,3.0


In [36]:
# choosing columns that I want to use
new_col_dict = {
    'timestamp': 'Time',
    'cnt': 'Count',
    't1': 'Real temp C',
    't2': 'Temp feels like C',
    'hum': 'Humidity %',
    'wind_speed': 'Wind Speed',
    'weather_code': 'Weather',
    'is_holiday': 'Holiday',
    'is_weekend': 'Weekend',
    'season': 'Season'
    
}

bike.rename(new_col_dict, axis=1, inplace='TRUE')
bike.head() # check for errors

Unnamed: 0,Time,Count,Real temp C,Temp feels like C,Humidity %,Wind Speed,Weather,Holiday,Weekend,Season
0,2015-01-04 00:00:00,182,3.0,2.0,93.0,6.0,3.0,0.0,1.0,3.0
1,2015-01-04 01:00:00,138,3.0,2.5,93.0,5.0,1.0,0.0,1.0,3.0
2,2015-01-04 02:00:00,134,2.5,2.5,96.5,0.0,1.0,0.0,1.0,3.0
3,2015-01-04 03:00:00,72,2.0,2.0,100.0,0.0,1.0,0.0,1.0,3.0
4,2015-01-04 04:00:00,47,2.0,0.0,93.0,6.5,1.0,0.0,1.0,3.0


In [37]:
# changing humidity to % value (0 to 1)
bike['Humidity %'] = bike['Humidity %'] / 100

# check for errors
bike.head()

Unnamed: 0,Time,Count,Real temp C,Temp feels like C,Humidity %,Wind Speed,Weather,Holiday,Weekend,Season
0,2015-01-04 00:00:00,182,3.0,2.0,0.93,6.0,3.0,0.0,1.0,3.0
1,2015-01-04 01:00:00,138,3.0,2.5,0.93,5.0,1.0,0.0,1.0,3.0
2,2015-01-04 02:00:00,134,2.5,2.5,0.965,0.0,1.0,0.0,1.0,3.0
3,2015-01-04 03:00:00,72,2.0,2.0,1.0,0.0,1.0,0.0,1.0,3.0
4,2015-01-04 04:00:00,47,2.0,0.0,0.93,6.5,1.0,0.0,1.0,3.0


In [38]:
# creating weather, holiday, weekend and season dictionary to map with respective integals
Weather_dict = {
    '1.0': 'Clear',
    '2.0': 'Scattered Clouds',
    '3.0': 'Broken Clouds',
    '4.0': 'Cloudy',
    '7.0': 'Rain',
    '10.0': 'Thunderstorm',
    '26.0': 'Snowfall',
    '94.0': 'Freezing fog'
}

holiday_dict = {
    '1.0': 'Holiday',
    '0.0': 'Non holiday'
}

weekend_dict = {
    '1.0': 'Weekend',
    '0.0': 'Weekday'
}

season_dict = {
    '0.0': 'Spring',
    '1.0': 'Summer',
    '2.0': 'Fall',
    '3.0': 'Winter'
}

In [39]:
# change weather, holiday, weekend and season column data type to strings
# map the values to respective written string dictionary data

bike.Weather = bike.Weather.astype('str')
bike.Weather = bike.Weather.map(Weather_dict)

bike.Holiday = bike.Holiday.astype('str')
bike.Holiday = bike.Holiday.map(holiday_dict)

bike.Weekend = bike.Weekend.astype('str')
bike.Weekend = bike.Weekend.map(weekend_dict)

bike.Season = bike.Season.astype('str')
bike.Season = bike.Season.map(season_dict)

bike.head()

Unnamed: 0,Time,Count,Real temp C,Temp feels like C,Humidity %,Wind Speed,Weather,Holiday,Weekend,Season
0,2015-01-04 00:00:00,182,3.0,2.0,0.93,6.0,Broken Clouds,Non holiday,Weekend,Winter
1,2015-01-04 01:00:00,138,3.0,2.5,0.93,5.0,Clear,Non holiday,Weekend,Winter
2,2015-01-04 02:00:00,134,2.5,2.5,0.965,0.0,Clear,Non holiday,Weekend,Winter
3,2015-01-04 03:00:00,72,2.0,2.0,1.0,0.0,Clear,Non holiday,Weekend,Winter
4,2015-01-04 04:00:00,47,2.0,0.0,0.93,6.5,Clear,Non holiday,Weekend,Winter


In [47]:
# save to excel file for Tableau visualisation and remove index col

bike.to_excel('London bike final.xlsx', index = False)

In [48]:
bike.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17414 entries, 0 to 17413
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Time               17414 non-null  object 
 1   Count              17414 non-null  int64  
 2   Real temp C        17414 non-null  float64
 3   Temp feels like C  17414 non-null  float64
 4   Humidity %         17414 non-null  float64
 5   Wind Speed         17414 non-null  float64
 6   Weather            17414 non-null  object 
 7   Holiday            17414 non-null  object 
 8   Weekend            17414 non-null  object 
 9   Season             17414 non-null  object 
dtypes: float64(4), int64(1), object(5)
memory usage: 1.3+ MB
