In [12]:
# Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [13]:
# Path where data is located

PATH = "./data/Bike-Sharing-Dataset/"
DATA = "day.csv"

In [14]:
# Reading in the data

day_bike_rentals = pd.read_csv(PATH+DATA)
day_bike_rentals.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,4,2011-01-04,1,0,1,0,2,1,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
4,5,2011-01-05,1,0,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600


## Data Cleaning

### Label Correction

In [15]:
# Label Lists

weekdays = ['SUN', 'MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT']
holidays = ['NO HOLIDAY', 'HOLIDAY']
working_day = ['NO WORKING DAY', 'WORKING DAY']
season = ['WINTER', 'SPRING', 'SUMMER', 'FALL']
weathersit = ['GOOD', 'MISTY', 'RAIN/SNOW/STORM']
months = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']

In [16]:
# Function to create a dictionary mapping index of list to the respective label

def create_dictionary(label_list, start_at_zero=True):
  """
  Returns a dictonary containging the mappings of indicies of a list to its value

  Parameters:
  -----------
  label_list -> list: list with all the values
  start_at_zero -> boolean: True if mapping indicies start at 0 else start at 1 

  Returns:
  --------
  d -> dictionary: a dictionary containing a mapping of indicies with value
  """
  
  d = {}
  if start_at_zero:
    for idx, val in enumerate(label_list):
      d[idx] = val
  else:
    for idx, val in enumerate(label_list):
      d[idx+1] = val
  return d

In [17]:
# Mapping the indicies to actual labels

day_bike_rentals['weekday'] = day_bike_rentals['weekday'].map(create_dictionary(weekdays))
day_bike_rentals['holiday'] = day_bike_rentals['holiday'].map(create_dictionary(holidays))
day_bike_rentals['workingday'] = day_bike_rentals['workingday'].map(create_dictionary(working_day))
day_bike_rentals['season'] = day_bike_rentals['season'].map(create_dictionary(season, start_at_zero=False))
day_bike_rentals['weathersit'] = day_bike_rentals['weathersit'].map(create_dictionary(weathersit, start_at_zero=False))
day_bike_rentals['mnth'] = day_bike_rentals['mnth'].map(create_dictionary(months, start_at_zero=False))

In [18]:
day_bike_rentals.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,WINTER,0,JAN,NO HOLIDAY,SAT,NO WORKING DAY,MISTY,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,WINTER,0,JAN,NO HOLIDAY,SUN,NO WORKING DAY,MISTY,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,WINTER,0,JAN,NO HOLIDAY,MON,WORKING DAY,GOOD,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,4,2011-01-04,WINTER,0,JAN,NO HOLIDAY,TUE,WORKING DAY,GOOD,0.2,0.212122,0.590435,0.160296,108,1454,1562
4,5,2011-01-05,WINTER,0,JAN,NO HOLIDAY,WED,WORKING DAY,GOOD,0.226957,0.22927,0.436957,0.1869,82,1518,1600


### Time

In [19]:
# Mapping 0 -> 2011 and 1 -> 2012

day_bike_rentals['yr'] = day_bike_rentals['yr'].apply(lambda row: 2011 if row == 0 else 2012)

In [20]:
# Calculating the number of days since beginning of 2011

day_bike_rentals['dteday'] = pd.to_datetime(day_bike_rentals['dteday'])
day_bike_rentals['days_since_2011'] = (day_bike_rentals['dteday'] - day_bike_rentals['dteday'].min()).dt.days

In [21]:
day_bike_rentals.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt,days_since_2011
0,1,2011-01-01,WINTER,2011,JAN,NO HOLIDAY,SAT,NO WORKING DAY,MISTY,0.344167,0.363625,0.805833,0.160446,331,654,985,0
1,2,2011-01-02,WINTER,2011,JAN,NO HOLIDAY,SUN,NO WORKING DAY,MISTY,0.363478,0.353739,0.696087,0.248539,131,670,801,1
2,3,2011-01-03,WINTER,2011,JAN,NO HOLIDAY,MON,WORKING DAY,GOOD,0.196364,0.189405,0.437273,0.248309,120,1229,1349,2
3,4,2011-01-04,WINTER,2011,JAN,NO HOLIDAY,TUE,WORKING DAY,GOOD,0.2,0.212122,0.590435,0.160296,108,1454,1562,3
4,5,2011-01-05,WINTER,2011,JAN,NO HOLIDAY,WED,WORKING DAY,GOOD,0.226957,0.22927,0.436957,0.1869,82,1518,1600,4
