In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor




# Data Understanding


In [None]:
bike_dataSet = pd.read_csv("./data/day.csv")
bike_dataSet.head()

In [None]:
#Data Description
bike_dataSet.info()

In [None]:
bike_dataSet.describe()

In [None]:
bike_dataSet.shape

***Data Analysis:***
- Total Records are 730 x 16
- All the columns except dteday i.e. date object type are either integer or float
- There are fields that are categorical in nature, but their values are integer/float

# Data Check

In [None]:
# Checking for missing values

round(100*(bike_dataSet.isnull().sum()/len(bike_dataSet)), 2).sort_values(ascending=False)

In [None]:
bike_dataSet_dup = bike_dataSet.copy()
bike_dataSet_dup.drop_duplicates(subset=None, inplace=True)
bike_dataSet_dup.shape

In [None]:
bike_dataSet.shape

***Data Analysis:***
- There is no missing value in the data set
- There is no duplicate values as the shape is equal for both duplicate data set and actual data set

# Data Cleaning

In [None]:
bike_dataSet.drop(columns=['instant','dteday','casual','registered'],axis=1,inplace=True)
bike_dataSet.head()

***Data Analysis and Actions:***
- "instant" column is the serial no, so we can remove this
- "dteday" column is the dat type column whichis not required as we have year, month columns present
- as our target is to achieve the total number of bikes used, we can remove the "casual" and "registered" columns


In [None]:
bike_dataSet.info()

In [None]:
#Changing season int value to string
bike_dataSet['season']=bike_dataSet['season'].map({1:'spring', 2:'summer', 3:'fall', 4:'winter'})
# bike_dataSet['weekday']=bike_dataSet['weekday'].map({0:'sunday',1:'monday', 2:'teusday', 3:'wednesday', 4:'thursday',5:'friday',6:'saturday'})
# bike_dataSet['mnth']=bike_dataSet['mnth'].map({1:'jan', 2:'feb', 3:'mar', 4:'apr',5:'may',6:'jun',7:'jul',8:'aug',9:'sep',10:'oct',11:'nov',12:'dec'})

In [None]:
bike_dataSet.head()

In [None]:
bike_dataSet.shape

# Creating Dummy Variables

There are 4 fields for whih we need to create dummy variables as they are the categorical variables
- "mnth"
- "weekday"
- "season"
- "weathersit"

In [None]:
bike_dataSet.head()

In [None]:
# Converting the categorical columns beofre creating the dummy variables
bike_dataSet['season']=bike_dataSet['season'].astype('category')
bike_dataSet['weathersit']=bike_dataSet['weathersit'].astype('category')
bike_dataSet['mnth']=bike_dataSet['mnth'].astype('category')
bike_dataSet['weekday']=bike_dataSet['weekday'].astype('category')
bike_dataSet.info()

In [None]:
# Creating dummy variables and droping first column
# This function will create dummy variable and drop the respective columns and drop first columns
bike_clean_dataSet = pd.get_dummies(bike_dataSet,drop_first=True)
bike_clean_dataSet.info()

# Data Visualization With Pair Plot

In [None]:
#Data visualizing for non categorical data
bike_num=bike_clean_dataSet[[ 'cnt','temp', 'atemp', 'hum', 'windspeed']]

sns.pairplot(bike_num, diag_kind='kde')
plt.show()

Analysis
- There is a linear relation inbetween cnt, atemp and temp

# Data Visualization With Box Plot

In [None]:
plt.figure(figsize=(25, 10))
plt.subplot(2,3,1)
sns.boxplot(x = 'season', y = 'cnt', data = bike_dataSet)
plt.subplot(2,3,2)
sns.boxplot(x = 'mnth', y = 'cnt', data = bike_dataSet)
plt.subplot(2,3,3)
sns.boxplot(x = 'weathersit', y = 'cnt', data = bike_dataSet)
plt.subplot(2,3,4)
sns.boxplot(x = 'holiday', y = 'cnt', data = bike_dataSet)
plt.subplot(2,3,5)
sns.boxplot(x = 'weekday', y = 'cnt', data = bike_dataSet)
plt.subplot(2,3,6)
sns.boxplot(x = 'workingday', y = 'cnt', data = bike_dataSet)
plt.show()

Analysis of Categorical data

- season can be a good predictor for our model as the season 3 has the median almost at par 5000, and season 2 and 4 also behind that with median of 4000+
- mnth also can be a good predictor as it also shows that the bike booking go up in the month of 9th followed by 6th, 8th and 10th.
- weathersit is a good predictor too as it clearly shows that the weathersit1 has the median of 5000+
- Rest of them we will se how the model predicts as there is not much diference in their median

# Splliting Data Set

## Splitting the Data into Training and Testing Sets

In [None]:
# We specify this so that the train and test data set always have the same rows, respectively
np.random.seed(0)
bike_train_set, bike_test_set = train_test_split(bike_clean_dataSet, train_size = 0.7, test_size = 0.3, random_state = 100)

In [None]:
print(bike_train_set.shape)
print(bike_test_set.shape)

## Rescaling the Features
- we need to rescale some data so that they have a comparabel scale

In [None]:
scaler = MinMaxScaler()
# Apply scaler() to all the columns except 'dummy' variables and 1/0 variable
num_vars = ['temp', 'atemp', 'hum', 'windspeed', 'cnt']
# Scaling the Train Set
bike_train_set[num_vars] = scaler.fit_transform(bike_train_set[num_vars])
bike_train_set.head()

In [None]:
bike_train_set.describe()

Now the data is bieng rescaled, every data have the max as 1 and min as 0. this will help tp get better correleation among the data

## Putting up a correlation matrix
- Visualizing correlation coefficients to see what variables are highly correlated

In [None]:
plt.figure(figsize = (30, 25))
sns.heatmap(bike_train_set.corr(), annot = True, cmap="YlGnBu")
plt.show()

We see temp and atemp are highly corelated with cnt, lets see a scatter plot to visualize the trend

## Putting up a Scatter Plot
- Visualizing Linearity in tem and atemp variable w.r.t cnt

In [None]:
plt.figure(figsize=[6,6])
plt.scatter(bike_train_set.temp, bike_train_set.cnt)
plt.scatter(bike_train_set.atemp, bike_train_set.cnt)
plt.show()

We clearly see atemp and temp has a clear linear line rising up with the cnt

# Feature Selection & Model Building

## Mixed Approach - Automated RFE + Manual removing feature using VIF and P-Values

**Training Set**

In [None]:
# Dividing training set into X and Y sets for the model building
y_bike_train_cnt = bike_train_set.pop('cnt')
X_bike_train_set = bike_train_set

### Iteration 1

#### Feature Selection
- We will be using the LinearRegression function from SciKit Learn for its compatibility with RFE (which is a utility from sklearn)

In [None]:
# Initializing the lr Object
lr = LinearRegression()
lr.fit(X_bike_train_set, y_bike_train_cnt)

# Taking 10 columns initially as feature to be selected
rfe = RFE(estimator=lr,n_features_to_select=15)
rfe = rfe.fit(X_bike_train_set, y_bike_train_cnt)
list(zip(X_bike_train_set.columns,rfe.support_,rfe.ranking_))

In [None]:
# RFE supported column
rfe_sup_col = X_bike_train_set.columns[rfe.support_]
rfe_sup_col


In [None]:
#  RFE non supporting columns
X_bike_train_set.columns[~rfe.support_]

In [None]:
# Create data frame only selecting the RFE supported variables
X_bike_train_set_rfe = X_bike_train_set[rfe_sup_col]
X_bike_train_set_rfe.head()

#### Model Building
- Building model using statsmodel, for the detailed statistics

In [None]:
# Adding the constant variable
X_bike_train_lrm = sm.add_constant(X_bike_train_set_rfe)
X_bike_train_lrm.head()

In [None]:
# Running the LR Model
lrm = sm.OLS(y_bike_train_cnt,X_bike_train_lrm.astype(float)).fit()

#### Model Summary

In [None]:
#Let's see the summary of our LR Model
print(lrm.summary())

In [None]:
print(f'{"R-Squared: "}{round(lrm.rsquared * 100,1)}{"%"}')
print(f'{"Adj.R-Squared: "}{round(lrm.rsquared_adj * 100,1)}{"%"}')

#### Model VIF

In [None]:
# Remove added const variable before VIF Calculation
X_bike_train_vif = X_bike_train_lrm.drop(['const'], axis=1)
# Check for the VIF values of the feature variables. 
# Create a dataframe that will contain the names of all the feature variables and their respective VIFs
vif = pd.DataFrame()
vif['Features'] = X_bike_train_vif.columns
vif['VIF'] = [variance_inflation_factor(X_bike_train_vif.astype(float).values, i) for i in range(X_bike_train_vif.shape[1])]
vif['VIF'] = round(vif['VIF'], 2)
vif = vif.sort_values(by = "VIF", ascending = False)
vif

Analysis:
- According to the VIF table and above summary, 'hum',''temp' and 'workingday' has VIF value > 5 i.e. high multicollinearity with 'p value' at '0.0' which is under 0.05.
- We need to remove the high VIF features.


### Iteration 2

#### Removing High VIF Features

In [None]:
X_bike_train_set_rfe=X_bike_train_set_rfe.drop(['hum'],axis=1)
X_bike_train_set_rfe.head()

#### Model Re-Build

In [None]:
# Adding constant again
X_bike_train_lrm = sm.add_constant(X_bike_train_set_rfe)
X_bike_train_lrm.head()

In [None]:
# Running the LR Model again with new set of features
lrm = sm.OLS(y_bike_train_cnt,X_bike_train_lrm.astype(float)).fit()

#### Model Summary

In [None]:
#Let's see the summary of our latest LR Model
print(lrm.summary())

In [None]:
print(f'{"R-Squared: "}{round(lrm.rsquared * 100,1)}{"%"}')
print(f'{"Adj.R-Squared: "}{round(lrm.rsquared_adj * 100,1)}{"%"}')

#### Model VIF

In [None]:
# Remove added const variable before VIF Calculation
X_bike_train_vif = X_bike_train_lrm.drop(['const'], axis=1)
# Check for the VIF values of the feature variables. 
# Create a dataframe that will contain the names of all the feature variables and their respective VIFs
vif = pd.DataFrame()
vif['Features'] = X_bike_train_vif.columns
vif['VIF'] = [variance_inflation_factor(X_bike_train_vif.astype(float).values, i) for i in range(X_bike_train_vif.shape[1])]
vif['VIF'] = round(vif['VIF'], 2)
vif = vif.sort_values(by = "VIF", ascending = False)
vif

Analysis:
- According to the VIF table and above summary, 'temp' and 'workingday' has VIF value > 5 i.e. high multicollinearity with 'p value' at '0.0' which is under 0.05.
- But 'temp' has high significance with ' target variable 'cnt;.
- So, We need to remove the high P-Value features.

### Iteration 3

#### Removing High P-Value Features

In [None]:
X_bike_train_set_rfe=X_bike_train_set_rfe.drop(['mnth_8'],axis=1)
X_bike_train_set_rfe.head()

#### Model Re-Build

In [None]:
# Adding constant again
X_bike_train_lrm = sm.add_constant(X_bike_train_set_rfe)
X_bike_train_lrm.head()

In [None]:
# Running the LR Model
lrm = sm.OLS(y_bike_train_cnt,X_bike_train_lrm.astype(float)).fit()

#### Model Summary

In [None]:
#Let's see the summary of our latest LR Model
print(lrm.summary())

In [None]:
print(f'{"R-Squared: "}{round(lrm.rsquared * 100,1)}{"%"}')
print(f'{"Adj.R-Squared: "}{round(lrm.rsquared_adj * 100,1)}{"%"}')

#### Model VIF

In [None]:
# Remove added const variable before VIF Calculation
X_bike_train_vif = X_bike_train_lrm.drop(['const'], axis=1)
# Check for the VIF values of the feature variables. 
# Create a dataframe that will contain the names of all the feature variables and their respective VIFs
vif = pd.DataFrame()
vif['Features'] = X_bike_train_vif.columns
vif['VIF'] = [variance_inflation_factor(X_bike_train_vif.astype(float).values, i) for i in range(X_bike_train_vif.shape[1])]
vif['VIF'] = round(vif['VIF'], 2)
vif = vif.sort_values(by = "VIF", ascending = False)
vif

Analysis:
- According to the VIF table and above summary, all feature has VIF < 5.
- Therefore removing High P-Value fature

### Iteration 4

#### Removing High P-Value Features

In [None]:
X_bike_train_set_rfe=X_bike_train_set_rfe.drop(['mnth_6'],axis=1)
X_bike_train_set_rfe.head()

#### Model Re-Build

In [None]:
# Adding constant again
X_bike_train_lrm = sm.add_constant(X_bike_train_set_rfe)
X_bike_train_lrm.head()

In [None]:
# Running the LR Model
lrm = sm.OLS(y_bike_train_cnt,X_bike_train_lrm.astype(float)).fit()

#### Model Summary

In [None]:
#Let's see the summary of our latest LR Model
print(lrm.summary())

In [None]:
print(f'{"R-Squared: "}{round(lrm.rsquared * 100,1)}{"%"}')
print(f'{"Adj.R-Squared: "}{round(lrm.rsquared_adj * 100,1)}{"%"}')

#### Model VIF

In [None]:
# Remove added const variable before VIF Calculation
X_bike_train_vif = X_bike_train_lrm.drop(['const'], axis=1)
# Check for the VIF values of the feature variables. 
# Create a dataframe that will contain the names of all the feature variables and their respective VIFs
vif = pd.DataFrame()
vif['Features'] = X_bike_train_vif.columns
vif['VIF'] = [variance_inflation_factor(X_bike_train_vif.astype(float).values, i) for i in range(X_bike_train_vif.shape[1])]
vif['VIF'] = round(vif['VIF'], 2)
vif = vif.sort_values(by = "VIF", ascending = False)
vif

Analysis:
- According to the VIF table and above summary, all feature has VIF < 5.
- Therefore removing high P-Value

### Iteration 5

#### Removing High P-Value Features

In [None]:
X_bike_train_set_rfe=X_bike_train_set_rfe.drop(['mnth_4'],axis=1)
X_bike_train_set_rfe.head()

#### Model Re-Build

In [None]:
# Adding constant again
X_bike_train_lrm = sm.add_constant(X_bike_train_set_rfe)
X_bike_train_lrm.head()

In [None]:
# Running the LR Model
lrm = sm.OLS(y_bike_train_cnt,X_bike_train_lrm.astype(float)).fit()

#### Model Summary

In [None]:
#Let's see the summary of our latest LR Model
print(lrm.summary())

In [None]:
print(f'{"R-Squared: "}{round(lrm.rsquared * 100,1)}{"%"}')
print(f'{"Adj.R-Squared: "}{round(lrm.rsquared_adj * 100,1)}{"%"}')

#### Model VIF

In [None]:
# Remove added const variable before VIF Calculation
X_bike_train_vif = X_bike_train_lrm.drop(['const'], axis=1)
# Check for the VIF values of the feature variables. 
# Create a dataframe that will contain the names of all the feature variables and their respective VIFs
vif = pd.DataFrame()
vif['Features'] = X_bike_train_vif.columns
vif['VIF'] = [variance_inflation_factor(X_bike_train_vif.astype(float).values, i) for i in range(X_bike_train_vif.shape[1])]
vif['VIF'] = round(vif['VIF'], 2)
vif = vif.sort_values(by = "VIF", ascending = False)
vif

Analysis:
- According to the VIF table and above summary, all feature has VIF < 5.
- Therefore considering this the final model

### Iteration 6

#### Removing High P-Value Features

In [None]:
X_bike_train_set_rfe=X_bike_train_set_rfe.drop(['mnth_5'],axis=1)
X_bike_train_set_rfe.head()

#### Model Re-Build

In [None]:
# Adding constant again
X_bike_train_lrm = sm.add_constant(X_bike_train_set_rfe)
X_bike_train_lrm.head()

In [None]:
# Running the LR Model
lrm = sm.OLS(y_bike_train_cnt,X_bike_train_lrm.astype(float)).fit()

#### Model Summary

In [None]:
#Let's see the summary of our latest LR Model
print(lrm.summary())

In [None]:
print(f'{"R-Squared: "}{round(lrm.rsquared * 100,1)}{"%"}')
print(f'{"Adj.R-Squared: "}{round(lrm.rsquared_adj * 100,1)}{"%"}')

#### Model VIF

In [None]:
# Remove added const variable before VIF Calculation
X_bike_train_vif = X_bike_train_lrm.drop(['const'], axis=1)
# Check for the VIF values of the feature variables. 
# Create a dataframe that will contain the names of all the feature variables and their respective VIFs
vif = pd.DataFrame()
vif['Features'] = X_bike_train_vif.columns
vif['VIF'] = [variance_inflation_factor(X_bike_train_vif.astype(float).values, i) for i in range(X_bike_train_vif.shape[1])]
vif['VIF'] = round(vif['VIF'], 2)
vif = vif.sort_values(by = "VIF", ascending = False)
vif

Analysis:
- According to the VIF table and above summary, all feature has VIF < 5.
- Therefore removing high P-Value

### Iteration 7

#### Removing High P-Value Features

In [None]:
X_bike_train_set_rfe=X_bike_train_set_rfe.drop(['mnth_3'],axis=1)
X_bike_train_set_rfe.head()

#### Model Re-Build

In [None]:
# Adding constant again
X_bike_train_lrm = sm.add_constant(X_bike_train_set_rfe)
X_bike_train_lrm.head()

In [None]:
# Running the LR Model
lrm = sm.OLS(y_bike_train_cnt,X_bike_train_lrm.astype(float)).fit()

#### Model Summary

In [None]:
#Let's see the summary of our latest LR Model
print(lrm.summary())

In [None]:
print(f'{"R-Squared: "}{round(lrm.rsquared * 100,1)}{"%"}')
print(f'{"Adj.R-Squared: "}{round(lrm.rsquared_adj * 100,1)}{"%"}')

#### Model VIF

In [None]:
# Remove added const variable before VIF Calculation
X_bike_train_vif = X_bike_train_lrm.drop(['const'], axis=1)
# Check for the VIF values of the feature variables. 
# Create a dataframe that will contain the names of all the feature variables and their respective VIFs
vif = pd.DataFrame()
vif['Features'] = X_bike_train_vif.columns
vif['VIF'] = [variance_inflation_factor(X_bike_train_vif.astype(float).values, i) for i in range(X_bike_train_vif.shape[1])]
vif['VIF'] = round(vif['VIF'], 2)
vif = vif.sort_values(by = "VIF", ascending = False)
vif

Analysis:
- According to the VIF table and above summary, all feature has VIF < 5.
- And all the P-Values are 0
- Therefore considering this the final model

# Final Model Interpretation

## Hypothesis Testing

Hypothesis testing states that:

- H0: B1=B2=......=Bn=0
- H1: at least one Bi!=0

## Final Model Coefficient Values

- const             0.2671
- yr                0.2354
- holiday          -0.0970
- temp              0.4078
- windspeed        -0.1356
- season_spring    -0.1162
- season_winter     0.0480
- mnth_9            0.0700
- weathersit_2     -0.0786
- weathersit_3     -0.2885

## Insight
- It is evident that all our coefficients are not equal to zero, which means We **Reject The NULL Hypothesis**
- The F-Statistics value of 233 (which is greater than 1) and the p-value of '~0.0000' states that the overall model is significant

## Equation
- cnt = 0.2671+(**yr** x 0.2354)+(**holiday** x -0.0970)+(**temp** x 0.4078)+(**windspeed** x -0.1356)+(**season_spring** x -0.1162)+(**season_winter** x 0.0480)+(**mnth_9** x 0.0700)+(**weathersit_2** x 0.0786)+(**weathersit_3** x 0.2885)

## Interpretation Of Co-efficients

- **const**         : This indicates that in the absence of all other predictor variables, The bike rental can still **increase** by 0.2671 units.
- **yr**            : This indicates that a unit increase in this variable, **increases** the bike rental by 0.2354 units.
- **holiday**       : This indicates that a unit increase in this variable, **decreases** the bike rental by 0.0970 units.
- **temp**          : This indicates that a unit increase in this variable, **increases** the bike rental by 0.4078 units.
- **windspeed**     : This indicates that a unit increase in this variable, **decreases** the bike rental by 0.1356 units.
- **season_spring** : This indicates that a unit increase in this variable, **decreases** the bike rental by 0.1162 units.
- **season_winter** : This indicates that a unit increase in this variable, **increases** the bike rental by 0.0480 units.
- **mnth_9**        : This indicates that a unit increase in this variable, **increases** the bike rental by 0.0700 units.
- **weathersit_2**  : This indicates that a unit increase in this variable, **decreases** the bike rental by 0.0786 units.
- **weathersit_3**  : This indicates that a unit increase in this variable, **decreases** the bike rental by 0.2885 units.

### No Multicollinearity between the predictor variables as VIF for all the features are less than 5

# Residual Analysis Of Trained Data -- Assumptions

In [None]:
y_bike_train_cnt_pred = lrm.predict(X_bike_train_lrm)

In [None]:
# Plot the histogram of the error terms
fig = plt.figure()
sns.distplot((y_bike_train_cnt - y_bike_train_cnt_pred), bins = 20)
fig.suptitle('Error Terms', fontsize = 14)                  # Plot heading 
plt.xlabel('Errors', fontsize = 12)  

Analysis:
- We can observe tha there is an evenly distribution of errors which is a good sign that the assumption are valid.

In [None]:
# Plotting y_train and y_pred to understand the spread.
fig = plt.figure()
plt.scatter(y_bike_train_cnt,y_bike_train_cnt_pred)
fig.suptitle('Actual Count vs Predicted Count', fontsize=20)              # Plot heading 
plt.xlabel('Actual Count', fontsize=18)                          # X-label
plt.ylabel('Predicted Count', fontsize=16)   

# Making Prediction

## Applying the scaling on the test sets

In [None]:
num_vars = ['temp', 'atemp', 'hum', 'windspeed', 'cnt']
# Scaling the Train Set
bike_test_set[num_vars] = scaler.transform(bike_test_set[num_vars])
bike_test_set.head()

## Dividing Test Set into X and Y

In [None]:
# Dividing testing set into X and Y sets for the model building
y_bike_test_cnt = bike_test_set.pop('cnt')
X_bike_test_set = bike_test_set

## Predict with the Trained Model "lrm"

In [None]:
# Now let's use our model to make predictions.
X_bike_train_lrm.drop(['const'],axis=1,inplace=True)
# Creating X_test_new dataframe by dropping variables from X_test
X_bike_test_tm=X_bike_test_set[X_bike_train_lrm.columns]
# Adding a constant variable 
X_bike_test_tm=sm.add_constant(X_bike_test_tm)

In [None]:
y_bike_test_cnt_pred = lrm.predict(X_bike_test_tm)

## Model Evaluation

In [None]:
# Plotting y_test and y_pred to understand the spread.
fig = plt.figure()
plt.scatter(y_bike_test_cnt,y_bike_test_cnt_pred)
fig.suptitle('Actual Count vs Predicted Count', fontsize=20)              # Plot heading 
plt.xlabel('Actual Count', fontsize=18)                          # X-label
plt.ylabel('Predicted Count', fontsize=16)     

## R^2 Value Of Test Set

In [None]:
print(f'{"R^2 Score for Training Set : "}{round(lrm.rsquared * 100,1)}{"%"}')
print(f'{"Adjusted R^2 Score for Training Set: "}{round(lrm.rsquared_adj * 100,1)}{"%"}')

r2 = r2_score(y_bike_test_cnt, y_bike_test_cnt_pred)
print(f'Shape of Test Set: {X_bike_test_tm.shape}')
print(f'R^2 Score for the Test Set: {round(r2*100,2)}{"%"}')

# Number of rows
n = X_bike_test_tm.shape[0]

# Number of Predictors
p = X_bike_test_tm.shape[1]

# Adjusted R-squared
r2_adj = 1-(1-r2)*(n-1)/(n-p-1)
print(f'Adjusted R^2 Score for the Test Set: {round(r2_adj*100,2)}{"%"}')

# Final Report

As per Final Model below are the top 3 predictors which influence the bike bookings:
- **Year (yr):** The coefficient for the "yr" variable is 0.2354 with a very low p-value (p < 0.001), indicating a highly significant positive effect on bike demand. This suggests that there has been a significant increase in bike rentals over time.
- **Temperature (temp):** The coefficient for the "temp" variable is 0.4078 with a very low p-value (p < 0.001), indicating a highly significant positive effect on bike demand. This suggests that higher temperatures lead to increased bike rentals, which is intuitive as people are more likely to ride bikes in warmer weather.
- **Weather Situation (weathersit):** Both "weathersit_2" (partly cloudy) and "weathersit_3" (rain/snow/fog) variables have significant coefficients with very low p-values (p < 0.001). However, their coefficients have negative values, indicating a negative effect on bike demand. This suggests that adverse weather conditions (partly cloudy, rain, snow, fog) lead to decreased bike rentals, which is reasonable as people may be less inclined to ride bikes in such weather conditions.

Then the next considerable predictors would be as follows:

- **Season (season):** Spring season has co-efficient of **-0.1162** which indicates that a unit increase in this variable, **decreases** the bike rental by 0.1162 units. Also Winter season has co-effficient of **0.0480**, which indicates that a unit increase in this variable, **increases** the bike rental by **0.0480** units.
- **wiindspeed:** This variable state that a unit increase in this will decrease the bike rental by **0.1356** units.
