# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('Datafiles/bikeshare_train - bikeshare_train.csv')
df.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
0,2011-01-01 0:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16
1,2011-01-01 1:00:00,1,0,0,1,9.02,13.635,80,0.0,8,32,40
2,2011-01-01 2:00:00,1,0,0,1,9.02,13.635,80,0.0,5,27,32
3,2011-01-01 3:00:00,1,0,0,1,9.84,14.395,75,0.0,3,10,13
4,2011-01-01 4:00:00,1,0,0,1,9.84,14.395,75,0.0,0,1,1


# Dropping Columns

In [3]:
df.drop(columns = ['casual','registered'], inplace = True)

# Converting datetime to Datetime

In [4]:
df['datetime'] = pd.to_datetime(df['datetime'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10886 entries, 0 to 10885
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   datetime    10886 non-null  datetime64[ns]
 1   season      10886 non-null  int64         
 2   holiday     10886 non-null  int64         
 3   workingday  10886 non-null  int64         
 4   weather     10886 non-null  int64         
 5   temp        10886 non-null  float64       
 6   atemp       10886 non-null  float64       
 7   humidity    10886 non-null  int64         
 8   windspeed   10886 non-null  float64       
 9   count       10886 non-null  int64         
dtypes: datetime64[ns](1), float64(3), int64(6)
memory usage: 850.6 KB


## Creating Month, Day, and Hour

In [5]:
df['Month'] = df['datetime'].dt.month_name()
df['Day'] = df['datetime'].dt.day_name()
df['Hour'] = df['datetime'].dt.hour.astype(object)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10886 entries, 0 to 10885
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   datetime    10886 non-null  datetime64[ns]
 1   season      10886 non-null  int64         
 2   holiday     10886 non-null  int64         
 3   workingday  10886 non-null  int64         
 4   weather     10886 non-null  int64         
 5   temp        10886 non-null  float64       
 6   atemp       10886 non-null  float64       
 7   humidity    10886 non-null  int64         
 8   windspeed   10886 non-null  float64       
 9   count       10886 non-null  int64         
 10  Month       10886 non-null  object        
 11  Day         10886 non-null  object        
 12  Hour        10886 non-null  object        
dtypes: datetime64[ns](1), float64(3), int64(6), object(3)
memory usage: 1.1+ MB


In [6]:
df.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,count,Month,Day,Hour
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,16,January,Saturday,0
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0,40,January,Saturday,1
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0,32,January,Saturday,2
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0,13,January,Saturday,3
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0,1,January,Saturday,4


In [7]:
# Want to check if hours is 24 or 12 format
df['Hour'].value_counts()

12    456
13    456
22    456
21    456
20    456
19    456
18    456
17    456
16    456
15    456
14    456
23    456
11    455
10    455
9     455
8     455
7     455
6     455
0     455
1     454
5     452
2     448
4     442
3     433
Name: Hour, dtype: int64

In [8]:
# Dropping redundant columns
df.drop(columns = ['datetime','season'], inplace = True)

# Converting from Celsius to Fahrenheit

In [9]:
df['temp'] = df['temp'].apply(lambda x: (9/5)*x+32)
df['atemp'] = df['atemp'].apply(lambda x: (9/5)*x+32)
df.head()

Unnamed: 0,holiday,workingday,weather,temp,atemp,humidity,windspeed,count,Month,Day,Hour
0,0,0,1,49.712,57.911,81,0.0,16,January,Saturday,0
1,0,0,1,48.236,56.543,80,0.0,40,January,Saturday,1
2,0,0,1,48.236,56.543,80,0.0,32,January,Saturday,2
3,0,0,1,49.712,57.911,75,0.0,13,January,Saturday,3
4,0,0,1,49.712,57.911,75,0.0,1,January,Saturday,4


# Creating Temp Variance

In [10]:
df['temp_variance'] = df['temp'] - df['atemp']
df.head()

Unnamed: 0,holiday,workingday,weather,temp,atemp,humidity,windspeed,count,Month,Day,Hour,temp_variance
0,0,0,1,49.712,57.911,81,0.0,16,January,Saturday,0,-8.199
1,0,0,1,48.236,56.543,80,0.0,40,January,Saturday,1,-8.307
2,0,0,1,48.236,56.543,80,0.0,32,January,Saturday,2,-8.307
3,0,0,1,49.712,57.911,75,0.0,13,January,Saturday,3,-8.199
4,0,0,1,49.712,57.911,75,0.0,1,January,Saturday,4,-8.199


In [11]:
df.drop('atemp', axis = 1)

Unnamed: 0,holiday,workingday,weather,temp,humidity,windspeed,count,Month,Day,Hour,temp_variance
0,0,0,1,49.712,81,0.0000,16,January,Saturday,0,-8.199
1,0,0,1,48.236,80,0.0000,40,January,Saturday,1,-8.307
2,0,0,1,48.236,80,0.0000,32,January,Saturday,2,-8.307
3,0,0,1,49.712,75,0.0000,13,January,Saturday,3,-8.199
4,0,0,1,49.712,75,0.0000,1,January,Saturday,4,-8.199
...,...,...,...,...,...,...,...,...,...,...,...
10881,0,1,1,60.044,50,26.0027,336,December,Wednesday,19,-7.407
10882,0,1,1,58.568,57,15.0013,241,December,Wednesday,20,-4.797
10883,0,1,1,57.092,61,15.0013,168,December,Wednesday,21,-3.546
10884,0,1,1,57.092,61,6.0032,129,December,Wednesday,22,-6.273
