# Bike Sharing 

## Citation 

**Boom** **Bikes**

Dataset link : https://www.kaggle.com/datasets/yasserh/bike-sharing-dataset


In [69]:
# Importing Libraries 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 

In [70]:
# warnings 
import warnings 
warnings.filterwarnings("ignore")

## 1. Data Understanding & Exploration 

In [71]:
# Loading Dataset 
df = pd.read_csv("/kaggle/input/bike-sharing-dataset/day.csv")

df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,01-01-2018,1,0,1,0,6,0,2,14.110847,18.18125,80.5833,10.749882,331,654,985
1,2,02-01-2018,1,0,1,0,0,0,2,14.902598,17.68695,69.6087,16.652113,131,670,801
2,3,03-01-2018,1,0,1,0,1,1,1,8.050924,9.47025,43.7273,16.636703,120,1229,1349
3,4,04-01-2018,1,0,1,0,2,1,1,8.2,10.6061,59.0435,10.739832,108,1454,1562
4,5,05-01-2018,1,0,1,0,3,1,1,9.305237,11.4635,43.6957,12.5223,82,1518,1600


In [72]:
# Shape 
df.shape

(730, 16)

In [73]:
# Columns in dataset 
df.columns

Index(['instant', 'dteday', 'season', 'yr', 'mnth', 'holiday', 'weekday',
       'workingday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed',
       'casual', 'registered', 'cnt'],
      dtype='object')

In [74]:
# Info
df.info(memory_usage = "deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 730 entries, 0 to 729
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   instant     730 non-null    int64  
 1   dteday      730 non-null    object 
 2   season      730 non-null    int64  
 3   yr          730 non-null    int64  
 4   mnth        730 non-null    int64  
 5   holiday     730 non-null    int64  
 6   weekday     730 non-null    int64  
 7   workingday  730 non-null    int64  
 8   weathersit  730 non-null    int64  
 9   temp        730 non-null    float64
 10  atemp       730 non-null    float64
 11  hum         730 non-null    float64
 12  windspeed   730 non-null    float64
 13  casual      730 non-null    int64  
 14  registered  730 non-null    int64  
 15  cnt         730 non-null    int64  
dtypes: float64(4), int64(11), object(1)
memory usage: 133.4 KB


In [75]:
# Statistical Summary 
df.describe()

Unnamed: 0,instant,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
count,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0
mean,365.5,2.49863,0.5,6.526027,0.028767,2.99726,0.683562,1.394521,20.319259,23.726322,62.765175,12.76362,849.249315,3658.757534,4508.006849
std,210.877136,1.110184,0.500343,3.450215,0.167266,2.006161,0.465405,0.544807,7.506729,8.150308,14.237589,5.195841,686.479875,1559.758728,1936.011647
min,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,2.424346,3.95348,0.0,1.500244,2.0,20.0,22.0
25%,183.25,2.0,0.0,4.0,0.0,1.0,0.0,1.0,13.811885,16.889713,52.0,9.04165,316.25,2502.25,3169.75
50%,365.5,3.0,0.5,7.0,0.0,3.0,1.0,1.0,20.465826,24.368225,62.625,12.125325,717.0,3664.5,4548.5
75%,547.75,3.0,1.0,10.0,0.0,5.0,1.0,2.0,26.880615,30.445775,72.989575,15.625589,1096.5,4783.25,5966.0
max,730.0,4.0,1.0,12.0,1.0,6.0,1.0,3.0,35.328347,42.0448,97.25,34.000021,3410.0,6946.0,8714.0


In [76]:
# Null Value Analysis 

df.isna().sum()

instant       0
dteday        0
season        0
yr            0
mnth          0
holiday       0
weekday       0
workingday    0
weathersit    0
temp          0
atemp         0
hum           0
windspeed     0
casual        0
registered    0
cnt           0
dtype: int64

In [77]:
# Decoding "season"

# 1 - Spring
# 2 - Summer 
# 3 - Fall 
# 4 - Winter 

df["season"] = df["season"].map({
    1:"spring", 
    2:"summer",
    3:"fall",
    4:"winter"
})


In [78]:
df["season"].value_counts()

season
fall      188
summer    184
spring    180
winter    178
Name: count, dtype: int64

In [79]:
# 0 - 2018  , 1 - 2019 -- Year
df["yr"].value_counts() , df["mnth"].value_counts()

(yr
 0    365
 1    365
 Name: count, dtype: int64,
 mnth
 1     62
 3     62
 5     62
 7     62
 8     62
 10    62
 12    62
 4     60
 6     60
 9     60
 11    60
 2     56
 Name: count, dtype: int64)

In [80]:
# Decoding months 
df["mnth"] = df["mnth"].map({
    1:"Jan",
    2:"Feb",
    3:"Mar",
    4:"Apr",
    5:"May",
    6:"Jun",
    7:"Jul",
    8:"Aug",
    9:"Sept",
    10:"Oct",
    11:"Nov",
    12:"Dec"
})

In [81]:
df["mnth"].value_counts()

mnth
Jan     62
Mar     62
May     62
Jul     62
Aug     62
Oct     62
Dec     62
Apr     60
Jun     60
Sept    60
Nov     60
Feb     56
Name: count, dtype: int64

In [82]:
# Holidays 
df["holiday"].value_counts()

holiday
0    709
1     21
Name: count, dtype: int64

In [83]:
# Week Day 
df["weekday"] = df["weekday"].map({
    1:"Mon",
    2:"Tue",
    3:"Wed",
    4:"Thurs",
    5:"Fri",
    6:"Sat"
})

In [84]:
df["weekday"].value_counts()

weekday
Sat      105
Mon      105
Tue      104
Thurs    104
Fri      104
Wed      103
Name: count, dtype: int64

In [85]:
# Working Day
# df["workingday"].astype("category").value_counts()
df["workingday"].value_counts()

workingday
1    499
0    231
Name: count, dtype: int64

In [86]:
# weathersit

# 1 - Clear , Few Clouds , Partly Cloudy 
# 2 - Mist , Cloudy 
# 3 - Light Snow , Heavy Rain 

df["weathersit"] = df["weathersit"].map({
    1:"A", 
    2:"B",
    3:"C"
})



In [87]:
df["weathersit"].value_counts()

weathersit
A    463
B    246
C     21
Name: count, dtype: int64

In [88]:
df['weekday'].value_counts(dropna = False)

weekday
Sat      105
NaN      105
Mon      105
Tue      104
Thurs    104
Fri      104
Wed      103
Name: count, dtype: int64