## Importing the dataset

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("london_merged.csv")

## seeing the structure of dataframe

In [3]:
df.head()

Unnamed: 0,timestamp,cnt,t1,t2,hum,wind_speed,weather_code,is_holiday,is_weekend,season
0,2015-01-04 00:00:00,182,3.0,2.0,93.0,6.0,3.0,0.0,1.0,3.0
1,2015-01-04 01:00:00,138,3.0,2.5,93.0,5.0,1.0,0.0,1.0,3.0
2,2015-01-04 02:00:00,134,2.5,2.5,96.5,0.0,1.0,0.0,1.0,3.0
3,2015-01-04 03:00:00,72,2.0,2.0,100.0,0.0,1.0,0.0,1.0,3.0
4,2015-01-04 04:00:00,47,2.0,0.0,93.0,6.5,1.0,0.0,1.0,3.0


In [4]:
df.tail()

Unnamed: 0,timestamp,cnt,t1,t2,hum,wind_speed,weather_code,is_holiday,is_weekend,season
17409,2017-01-03 19:00:00,1042,5.0,1.0,81.0,19.0,3.0,0.0,0.0,3.0
17410,2017-01-03 20:00:00,541,5.0,1.0,81.0,21.0,4.0,0.0,0.0,3.0
17411,2017-01-03 21:00:00,337,5.5,1.5,78.5,24.0,4.0,0.0,0.0,3.0
17412,2017-01-03 22:00:00,224,5.5,1.5,76.0,23.0,4.0,0.0,0.0,3.0
17413,2017-01-03 23:00:00,139,5.0,1.0,76.0,22.0,2.0,0.0,0.0,3.0


In [6]:
df.shape

(17414, 10)

In [7]:
df.hum.value_counts()

88.00    965
82.00    928
77.00    774
87.00    710
76.00    513
        ... 
25.50      1
88.50      1
72.75      1
23.00      1
98.00      1
Name: hum, Length: 143, dtype: int64

In [8]:
df.weather_code.value_counts()

1.0     6150
2.0     4034
3.0     3551
7.0     2141
4.0     1464
26.0      60
10.0      14
Name: weather_code, dtype: int64

In [9]:
df.wind_speed.value_counts()

12.000000    842
14.000000    832
11.000000    832
15.000000    828
13.000000    828
            ... 
52.000000      1
54.000000      1
50.000000      1
36.500000      1
2.666667       1
Name: wind_speed, Length: 103, dtype: int64

## Changing the column names and doing further analysis

**We can see the Data has some ambigious column names inside so we have to rename the columns as we wish!**

In [11]:
new_names_dict = {
    'timestamp':'time',
    'cnt':'count',
    't1':'real_temp_in_C',
    't2':'temp_feels_like_C',
    'hum':'humidity_percentage',
    'wind_speed':'wind_speed_kph',
    'weather_code':'weather',
    'is_holiday':'is_holiday',
    'is_weekend':'is_weekend',
    'season':'season'
}
#now we assign the name to the dataframe
df.rename(new_names_dict, axis=1, inplace=True)

In [12]:
df.head()

Unnamed: 0,time,count,real_temp_in_C,temp_feels_like_C,humidity_percentage,wind_speed_kph,weather,is_holiday,is_weekend,season
0,2015-01-04 00:00:00,182,3.0,2.0,93.0,6.0,3.0,0.0,1.0,3.0
1,2015-01-04 01:00:00,138,3.0,2.5,93.0,5.0,1.0,0.0,1.0,3.0
2,2015-01-04 02:00:00,134,2.5,2.5,96.5,0.0,1.0,0.0,1.0,3.0
3,2015-01-04 03:00:00,72,2.0,2.0,100.0,0.0,1.0,0.0,1.0,3.0
4,2015-01-04 04:00:00,47,2.0,0.0,93.0,6.5,1.0,0.0,1.0,3.0


**Changing the humidity percentage values to decimal fractions.**

In [13]:
df.humidity_percentage = df.humidity_percentage /100

In [14]:
df.humidity_percentage.value_counts()

0.8800    965
0.8200    928
0.7700    774
0.8700    710
0.7600    513
         ... 
0.2550      1
0.8850      1
0.7275      1
0.2300      1
0.9800      1
Name: humidity_percentage, Length: 143, dtype: int64

**Assigning dimensions to Season and Weather features**

In [15]:
season_dict = {
    '0.0':'spring',
    '1.0':'summer',
    '2.0':'autumn',
    '3.0':'winter'
}

df.season = df.season.astype(str) #changing the column datatype to string
df.season = df.season.map(season_dict) #changing the column values by mapping to written new values

weather_dict = {
    '1.0':'Clear',
    '2.0':'Scattered clouds',
    '3.0':'Broken clouds',
    '4.0':'Cloudy',
    '7.0':'Rain',
    '10.0':'Rain with thunderstorm',
    '26.0':'Snowfall'
}

df.weather = df.weather.astype(str) #changing the column datatype to string
df.weather = df.weather.map(weather_dict) #changing the column values by mapping to written new values

**finally checking the dataset**

In [16]:
df.head(10)

Unnamed: 0,time,count,real_temp_in_C,temp_feels_like_C,humidity_percentage,wind_speed_kph,weather,is_holiday,is_weekend,season
0,2015-01-04 00:00:00,182,3.0,2.0,0.93,6.0,Broken clouds,0.0,1.0,winter
1,2015-01-04 01:00:00,138,3.0,2.5,0.93,5.0,Clear,0.0,1.0,winter
2,2015-01-04 02:00:00,134,2.5,2.5,0.965,0.0,Clear,0.0,1.0,winter
3,2015-01-04 03:00:00,72,2.0,2.0,1.0,0.0,Clear,0.0,1.0,winter
4,2015-01-04 04:00:00,47,2.0,0.0,0.93,6.5,Clear,0.0,1.0,winter
5,2015-01-04 05:00:00,46,2.0,2.0,0.93,4.0,Clear,0.0,1.0,winter
6,2015-01-04 06:00:00,51,1.0,-1.0,1.0,7.0,Cloudy,0.0,1.0,winter
7,2015-01-04 07:00:00,75,1.0,-1.0,1.0,7.0,Cloudy,0.0,1.0,winter
8,2015-01-04 08:00:00,131,1.5,-1.0,0.965,8.0,Cloudy,0.0,1.0,winter
9,2015-01-04 09:00:00,301,2.0,-0.5,1.0,9.0,Broken clouds,0.0,1.0,winter


## Exporting the Dataset for visualizations

In [22]:
df.to_excel("london_bikes_output.xlsx",
            sheet_name='Final_Data')