<a href="https://colab.research.google.com/github/Nikhitha-S-Pavan/ML-and-DL-Regression-model-examples/blob/main/Bike_rental_count_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import glob
for dir in glob.glob("/content/drive/My Drive/regression_data_files_csv/Bike-rental-count/*"):
  print(dir)

In [None]:
import pandas as pd

In [None]:
bike_df = pd.read_csv("/content/drive/My Drive/regression_data_files_csv/Bike-rental-count/day.csv")
bike_df.head(100)

In [None]:
bike_df.shape

In [None]:
bike_df.rename(columns={'instant':'rec_id','dteday':'datetime','yr':'year','mnth':'month','weathersit':'weather_condition',
                       'hum':'humidity','cnt':'total_count'},inplace=True)

In [None]:
bike_df.head(10)

In [None]:
#Type casting the datetime and numerical attributes to category

bike_df['datetime']=pd.to_datetime(bike_df.datetime)

bike_df['season']=bike_df.season.astype('category')
bike_df['year']=bike_df.year.astype('category')
bike_df['month']=bike_df.month.astype('category')
bike_df['holiday']=bike_df.holiday.astype('category')
bike_df['weekday']=bike_df.weekday.astype('category')
bike_df['workingday']=bike_df.workingday.astype('category')
bike_df['weather_condition']=bike_df.weather_condition.astype('category')

In [None]:
bike_df.head(10)

In [None]:
bike_df.describe()

In [None]:
#Missing values in dataset
bike_df.isnull().sum()

In [None]:
# drop all those rows which  
# have any 'nan' value in it. 
bike_df.dropna(inplace = True) 

In [None]:
#load the required libraries
from sklearn import preprocessing,metrics,linear_model
from sklearn.model_selection import cross_val_score,cross_val_predict,train_test_split

In [None]:
bike_df.columns

In [None]:
bike_df.iloc[:,0:-3]
bike_df.iloc[:,-1]

In [None]:
#Split the dataset into the train and test data
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(bike_df.iloc[:,0:-3],bike_df.iloc[:,-1],test_size=0.3, random_state=42)

#Reset train index values
X_train.reset_index(inplace=True)
y_train=y_train.reset_index()

# Reset train index values
X_test.reset_index(inplace=True)
y_test=y_test.reset_index()

print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)
print(y_train.head())
print(y_test.head())

In [None]:
#Create a new dataset for train attributes
train_attributes=X_train[['season','month','year','weekday','holiday','workingday','weather_condition','humidity','temp','windspeed']]
#Create a new dataset for test attributes
test_attributes=X_test[['season','month','year','weekday','holiday','workingday','humidity','temp','windspeed','weather_condition']]
#categorical attributes
cat_attributes=['season','holiday','workingday','weather_condition','year']
#numerical attributes
num_attributes=['temp','windspeed','humidity','month','weekday']

In [None]:
#To get dummy variables to encode the categorical features to numeric
train_encoded_attributes=pd.get_dummies(train_attributes,columns=cat_attributes)
print('Shape of transfomed dataframe::',train_encoded_attributes.shape)
train_encoded_attributes.head(5)

In [None]:
#Training dataset for modelling
X_train=train_encoded_attributes
y_train=y_train.total_count.values

In [None]:
#training model
lr_model=linear_model.LinearRegression()
lr_model

In [None]:
lr_model.fit(X_train,y_train)

In [None]:
#Accuracy of the model
lr=lr_model.score(X_train,y_train)
print('Accuracy of the model :',lr)
print('Model coefficients :',lr_model.coef_)
print('Model intercept value :',lr_model.intercept_)

In [None]:
#Cross validation prediction
predict=cross_val_predict(lr_model,X_train,y_train,cv=3)
predict

In [None]:
import numpy as np
#R-squared scores
r2_scores = cross_val_score(lr_model, X_train, y_train, cv=3)
print('R-squared scores :',np.average(r2_scores))

## **Model performance on test dataset**

In [None]:
#To get dummy variables to encode the categorical features to numeric
test_encoded_attributes=pd.get_dummies(test_attributes,columns=cat_attributes)
print('Shape of transformed dataframe :',test_encoded_attributes.shape)
test_encoded_attributes.head(5)

In [None]:
#Test dataset for prediction
X_test=test_encoded_attributes
y_test=y_test.total_count.values

In [None]:
#predict the model
lr_pred=lr_model.predict(X_test)
lr_pred

In [None]:
import math
#Root mean square error 
rmse=math.sqrt(metrics.mean_squared_error(y_test,lr_pred))
#Mean absolute error
mae=metrics.mean_absolute_error(y_test,lr_pred)
print('Root mean square error :',rmse)
print('Mean absolute error :',mae)