In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
dataset = pd.read_csv("daily_bike_share.csv")

In [None]:
dataset.head()

In [None]:
dataset.shape

In [None]:
dataset.columns

In [None]:
dataset.describe()

In [None]:
dataset.info()

In [None]:
dataset.loc[(dataset['season'] == 1), 'season'] = 'spring'
dataset.loc[(dataset['season'] == 2), 'season'] = 'summer'
dataset.loc[(dataset['season'] == 3), 'season'] = 'fall'
dataset.loc[(dataset['season'] == 4), 'season'] = 'winter'

In [None]:
dataset['season'].astype('category').value_counts()

In [None]:
 dataset['year'].astype('category').value_counts()

In [None]:
def object_map_mnths(x):
    return x.map({1: 'Jan', 2: 'Feb',3: 'Mar',4: 'Apr',5: 'May',6: 'Jun',7: 'Jul',8: 'Aug',9: 'Sep',10: 'Oct',11: 'Nov',12: 'Dec'})

In [None]:
 dataset[['mnth']] = dataset[['mnth']].apply(object_map_mnths)

In [None]:
 dataset['holiday'].astype('category').value_counts()

In [None]:
def str_map_weekday(x):
    return x.map({1:'Mon', 2:'Tue', 3:'Wed', 4:'Thu', 5:'Fri', 6:'Sat', 7:'Sun'})

In [None]:
 dataset[['weekday']] = dataset[['weekday']].apply(str_map_weekday)

In [None]:
 dataset['weekday'].astype('category').value_counts()

In [None]:
 dataset['workingday'].astype('category').value_counts()

In [None]:
dataset.loc[(dataset['weathersit'] == 1), 'weathersit'] = 'A'
dataset.loc[(dataset['weathersit'] == 2), 'weathersit'] = 'B'
dataset.loc[(dataset['weathersit'] == 3), 'weathersit'] = 'C'

In [None]:
 dataset['weathersit'].astype('category').value_counts()

In [None]:
sns.distplot(dataset['temp'])

In [None]:
sns.distplot(dataset['atemp'])
plt.show()

In [None]:
sns.distplot(dataset['windspeed'])
plt.show()

In [None]:
sns.distplot(dataset['rentals'])
plt.show()

In [None]:
plt.figure(figsize=(20,20))
plt.subplot(3,3,1)
sns.boxplot(x = 'season' , y ='rentals', data=dataset)
plt.subplot(3,3,2)
sns.boxplot(x = 'mnth' , y ='rentals', data=dataset)
plt.subplot(3,3,3)
sns.boxplot(x = 'weekday' , y ='rentals', data=dataset)
plt.subplot(3,3,4)
sns.boxplot(x = 'weathersit' , y ='rentals', data=dataset)
plt.subplot(3,3,5)
sns.boxplot(x = 'workingday' , y ='rentals', data=dataset)
plt.subplot(3,3,6)
sns.boxplot(x = 'year' , y ='rentals', data=dataset)
plt.subplot(3,3,7)
sns.boxplot(x = 'holiday' , y ='rentals', data=dataset)

In [None]:
dataset_numeric = dataset.select_dtypes(include=['float64'])
dataset_numeric.head()

In [None]:
sns.pairplot(dataset_numeric)
plt.show()

In [None]:
cor = dataset_numeric.corr()
cor

In [None]:
mask = np.array(cor)
mask[np.tril_indices_from(mask)] = False
fig, ax = plt.subplots()
fig.set_size_inches(10,10)
sns.heatmap(cor,mask=mask, vmax=1, square=True, annot=True)

In [None]:
dataset.drop('atemp', axis=1, inplace=True)

In [None]:
dataset.head()

In [None]:
dataset_categorical = dataset.select_dtypes(include=['object'])

In [None]:
dataset_categorical.head()

In [None]:
dataset_dummies = pd.get_dummies(dataset_categorical, drop_first = True)
dataset_dummies.head()

In [None]:
dataset = dataset.drop(list(dataset_categorical.columns), axis=1, errors='ignore')
dataset

In [None]:
dataset  = pd.concat([dataset, dataset_dummies], axis = 1)

In [None]:
dataset.head()

In [None]:
dataset = dataset.drop(['instant', 'dteday'], axis=1, inplace=False, errors='ignore')
dataset.head()

In [None]:
from sklearn import linear_model
from sklearn.linear_model import LinearRegression

In [None]:
from sklearn.model_selection import train_test_split
np.random.seed(0)
df_train, df_test = train_test_split(dataset, train_size= 0.7, test_size=0.3, random_state=100 )

In [None]:
df_train

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
var = ["temp","hum","windspeed","rentals"]
df_train[var] = scaler.fit_transform(df_train[var])

In [None]:
df_train.describe()

In [None]:
plt.figure(figsize= (30,30))
sns.heatmap(df_train.corr(),annot=True, cmap= "YlGnBu")
plt.show()

In [None]:
x_train = df_train.drop(["rentals"],axis=1)
y_train = df_train.pop('cnt')

In [None]:
x_train.head()

In [None]:
np.array(x_train)

In [None]:
import statsmodels.api as sm
x_train_lm = sm.add_constant(x_train)

lr = sm.OLS(y_train, x_train_lm).fit()

In [None]:
lr.params

In [None]:
lm = LinearRegression()
lm.fit(x_train, y_train)

In [None]:
print(lm.coef_)
print(lm.intercept_)

In [None]:
lr.summary()

In [None]:
import sklearn.feature_selection import RFE

In [None]:
lm = LinearRegression()
rfe1 = RFE(lm, 15)

rfe1.fit(x_train, y_train)
print(rfe1,support_)
print(rfe1,ranking_)

In [None]:
col1 = x_train.columns[rfe1.support_]

In [None]:
col1

In [None]:
x_train_rfe1 = x_train[col1]
x_train_lm = sm.add_constant(x_train_rfe1)
lm1 = sm.OLS(y_train, x_train_rfe1).fit()
lm1.summary()

In [None]:
fromm statsmodels.stats.outliers_influence import varience_inflation_factor

In [None]:
a = x_train_rfe1.drop('const', axis=1)

In [None]:
vif = pd.DataFrame()
vif['features'] = a.columns
vif['VIF'] = [variance_inflation_factor(a.values, i) for i in range(a.shape[1])]
vif['VIF'] = round(vif['VIF'], 2)
vif = vif.sort_values(by = "VIF", ascending = False)
vif

In [None]:
lm = LinearRegression()
rfe2 = RFE(lm, 15)

rfe2.fit(x_train, y_train)
print(rfe2,support_)
print(rfe2,ranking_)

In [None]:
col2 = x_train.columns[rfe2.support_]

x_train_rfe2 = x_train[col2]
x_train_lm = sm.add_constant(x_train_rfe2)
lm2 = sm.OLS(y_train, x_train_rfe2).fit()
lm2.summary()

In [None]:
vif1 = pd.DataFrame()
vif1['features'] = a.columns
vif1['VIF'] = [variance_inflation_factor(a.values, i) for i in range(a.shape[1])]
vif1['VIF'] = round(vif1['VIF'], 2)
vif1 = vif1.sort_values(by = "VIF", ascending = False)
vif1

In [None]:
y_train_cnt = lm2.predict(x_train_rfe2)

In [None]:
fig = plt.figure()
sns.distplot((y_train, y_train_cnt), bins=20)

In [None]:
df_test[var] = scaler.transform(df_test[var])

In [None]:
y_test = df_test.pop('cnt')
x_test = df_test.drop("rentals")


In [None]:
x_test.head()

In [None]:
c = x_train_rfe2.drop('const', axis=1)

In [None]:
col2 = c.columns

In [None]:
x_test_rfe2 = x_test[col2]

In [None]:
x_test_rfe2 = sm.add_constant(x_test_rfe2)

In [None]:
x_test_rfe2.info()

In [None]:
y_pred = lm2.predict(x_test_rfe2)

In [None]:
plt.figure()
plt.scatter(y_test, y_pred)

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

In [None]:
plt.figure(figsize=(8,5))

sns.heatmap(datset[col2].corr(), cmap = "YlGnBu", annot=True)
plt.show()