In [1]:
import pandas as pd

In [2]:
bikes = pd.read_csv('london_merged.csv')

In [3]:
bikes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17414 entries, 0 to 17413
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   timestamp     17414 non-null  object 
 1   cnt           17414 non-null  int64  
 2   t1            17414 non-null  float64
 3   t2            17414 non-null  float64
 4   hum           17414 non-null  float64
 5   wind_speed    17414 non-null  float64
 6   weather_code  17414 non-null  int64  
 7   is_holiday    17414 non-null  int64  
 8   is_weekend    17414 non-null  int64  
 9   season        17414 non-null  int64  
dtypes: float64(4), int64(5), object(1)
memory usage: 1.3+ MB


In [4]:
bikes.shape

(17414, 10)

In [5]:
bikes

Unnamed: 0,timestamp,cnt,t1,t2,hum,wind_speed,weather_code,is_holiday,is_weekend,season
0,04-01-2015 00:00,182,3.0,2.0,93.0,6.0,3,0,1,3
1,04-01-2015 01:00,138,3.0,2.5,93.0,5.0,1,0,1,3
2,04-01-2015 02:00,134,2.5,2.5,96.5,0.0,1,0,1,3
3,04-01-2015 03:00,72,2.0,2.0,100.0,0.0,1,0,1,3
4,04-01-2015 04:00,47,2.0,0.0,93.0,6.5,1,0,1,3
...,...,...,...,...,...,...,...,...,...,...
17409,03-01-2017 19:00,1042,5.0,1.0,81.0,19.0,3,0,0,3
17410,03-01-2017 20:00,541,5.0,1.0,81.0,21.0,4,0,0,3
17411,03-01-2017 21:00,337,5.5,1.5,78.5,24.0,4,0,0,3
17412,03-01-2017 22:00,224,5.5,1.5,76.0,23.0,4,0,0,3


In [6]:
bikes.weather_code.value_counts()

weather_code
1     6150
2     4034
3     3551
7     2141
4     1464
26      60
10      14
Name: count, dtype: int64

In [7]:
bikes.season.value_counts()

season
0    4394
1    4387
3    4330
2    4303
Name: count, dtype: int64

In [8]:
new_cols_dict = {
    'timestamp' : 'time',
    'cnt' : 'count',
    't1' : 'temp_real_C',
    't2' : 'temp_feels_like_C',
    'hum' : 'humidity_percent',
    'wind_speed' : 'wind_speed_kph',
    'weather_code' : 'weather',
    'is_holiday' : 'is_holiday',
    'is_weekend' : 'is_weekend',
    'season' : 'season'
}

bikes.rename(new_cols_dict, axis=1, inplace=True)

In [9]:
# changing the humidity values to percentage ( i.e. a valuue between 0 and 1)
bikes.humidity_percent = bikes.humidity_percent / 100

In [10]:
# Season and weather mapping dictionaries
season_dict = {
    0: 'spring',
    1: 'summer',
    2: 'autumn',
    3: 'winter'
}
weather_dict = {
    1: 'Clear',
    2: 'Scattered clouds',
    3: 'Broken clouds',
    4: 'Cloudy',
    7: 'Rain',
    10: 'Rain with thunderstorm',
    26: 'Snowfall'
}

# Apply the mappings
bikes['season'] = bikes['season'].map(season_dict)
bikes['weather'] = bikes['weather'].map(weather_dict)

# Check the first few rows to confirm changes
print(bikes.head())

               time  count  temp_real_C  temp_feels_like_C  humidity_percent  \
0  04-01-2015 00:00    182          3.0                2.0             0.930   
1  04-01-2015 01:00    138          3.0                2.5             0.930   
2  04-01-2015 02:00    134          2.5                2.5             0.965   
3  04-01-2015 03:00     72          2.0                2.0             1.000   
4  04-01-2015 04:00     47          2.0                0.0             0.930   

   wind_speed_kph        weather  is_holiday  is_weekend  season  
0             6.0  Broken clouds           0           1  winter  
1             5.0          Clear           0           1  winter  
2             0.0          Clear           0           1  winter  
3             0.0          Clear           0           1  winter  
4             6.5          Clear           0           1  winter  


In [11]:
bikes.head(10)

Unnamed: 0,time,count,temp_real_C,temp_feels_like_C,humidity_percent,wind_speed_kph,weather,is_holiday,is_weekend,season
0,04-01-2015 00:00,182,3.0,2.0,0.93,6.0,Broken clouds,0,1,winter
1,04-01-2015 01:00,138,3.0,2.5,0.93,5.0,Clear,0,1,winter
2,04-01-2015 02:00,134,2.5,2.5,0.965,0.0,Clear,0,1,winter
3,04-01-2015 03:00,72,2.0,2.0,1.0,0.0,Clear,0,1,winter
4,04-01-2015 04:00,47,2.0,0.0,0.93,6.5,Clear,0,1,winter
5,04-01-2015 05:00,46,2.0,2.0,0.93,4.0,Clear,0,1,winter
6,04-01-2015 06:00,51,1.0,-1.0,1.0,7.0,Cloudy,0,1,winter
7,04-01-2015 07:00,75,1.0,-1.0,1.0,7.0,Cloudy,0,1,winter
8,04-01-2015 08:00,131,1.5,-1.0,0.965,8.0,Cloudy,0,1,winter
9,04-01-2015 09:00,301,2.0,-0.5,1.0,9.0,Broken clouds,0,1,winter


In [13]:
!pip install openpyxl --upgrade --quiet

In [14]:
bikes.to_excel('london_bikes_final.xlsx', sheet_name='Data')