#Regression Models using scikit-learn

##Explore different regression models and apply them using the sk learn library

###importing necessary libraries and dataset

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR


data = pd.read_csv(r'C:\Users\Huawei\Desktop\Reg_BikeShareDay.csv')

# Features and target
X = data[['temp', 'atemp', 'hum', 'windspeed', 'season', 'mnth', 'holiday', 'weekday', 'workingday', 'weathersit']]
y = data['cnt']


###Split the data into train and testing sets

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### here test size = 0.2 means 80% data is used for training and remaning 20% is for testing. It is basically 80 20 split
### random_state=42 means data split is reproducible every time the code is run

### Linear Regression 


In [27]:
model_lr = LinearRegression()
model_lr.fit(X_train, y_train)
y_pred_lr = model_lr.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
mae_lr = mean_absolute_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)
print("Linear Regression MSE(MeanSquaredError): ", mse_lr)
print("Linear Regression MAE(MeanAbsoluteError):", mae_lr)
print("Linear Regression R-squared:", r2_lr)

Linear Regression MSE(MeanSquaredError):  103991.59111747658
Linear Regression MAE(MeanAbsoluteError): 240.70695019630313
Linear Regression R-squared: -0.16655410884579291


### Linear Regression models the relationship between a dependant and one or more independant variable using a linear equation.

### Ridge Regression

In [32]:
model_ridge = Ridge(alpha=1.0)
model_ridge.fit(X_train, y_train)
y_pred_ridge = model_ridge.predict(X_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)
print("Ridge Regression MSE(MeanSquaredError): ", mse_ridge)
print("Ridge Regression MAE(MeanAbsoluteError): ", mae_ridge)
print("Ridge Regression R-squared: ", r2_ridge)

Ridge Regression MSE(MeanSquaredError):  32814.52451729854
Ridge Regression MAE(MeanAbsoluteError):  174.19557503548145
Ridge Regression R-squared:  0.6318940984157864


### Ridge regression is a technique that adds a penalty to the size of coefficents to prevent them from getting to large.
### This helps the model reduces overfitting(i.e model learns train data too well including noise and detials, making it perform poorly on unseen data).

### Lasso Regression

In [40]:
model_lasso = Lasso(alpha=0.1)
model_lasso.fit(X_train, y_train)
y_pred_lasso = model_lasso.predict(X_test)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
mae_lasso = mean_absolute_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)
print("Lasso Regression MSE(MeanSquaredError): ", mse_lasso)
print("Lasso Regression MAE(MeanAbsoluteError): ", mae_lasso)
print("Lasso Regression R-squared: ", r2_lasso)


Lasso Regression MSE(MeanSquaredError):  79236.1081209185
Lasso Regression MAE(MeanAbsoluteError):  201.79053781596588
Lasso Regression R-squared:  0.11114729081327723


### Lasso regression adds a penalty to reduce some coefficents to zero, effectively selecting important features.
### This helps simplify the model and prevent overfitting.

### SGD(Stochastic Gradient Descent) Regressor

In [41]:

model_sgd = SGDRegressor(max_iter=1000, tol=1e-3)
model_sgd.fit(X_train, y_train)
y_pred_sgd = model_sgd.predict(X_test)
mse_sgd = mean_squared_error(y_test, y_pred_sgd)
mae_sgd = mean_absolute_error(y_test, y_pred_sgd)
r2_sgd = r2_score(y_test, y_pred_sgd)
print("SGD Regressor MSE(MeanSquaredError): ",mse_sgd)
print("SGD Regressor MAE(MeanAbsoluteError): ",mae_sgd)
print("SGD Regressor R-squared: ",r2_sgd)

SGD Regressor MSE(MeanSquaredError):  34389.090805972286
SGD Regressor MAE(MeanAbsoluteError):  177.01891931376846
SGD Regressor R-squared:  0.6142309705228067


### SGD Regressor uses a method called Stochastic Gradient Descent to update the model step by step, making it efficent for large datasets.
### It adjusts the model gradually, aiming to minimize errors in predictions.

### Support Vector Regression (SVR)

In [46]:
model_svr = SVR(kernel='rbf')
model_svr.fit(X_train, y_train)
y_pred_svr = model_svr.predict(X_test)
mse_svr = mean_squared_error(y_test, y_pred_svr)
mae_svr = mean_absolute_error(y_test, y_pred_svr)
r2_svr = r2_score(y_test, y_pred_svr)
print("SVR MSE(MeanSquaredError): ",mse_svr)
print("SVR MAE(MeanAbsoluteError): ",mae_svr)
print("SVR R-squared: ",r2_svr)

SVR MSE(MeanSquaredError):  122014.66009289038
SVR MAE(MeanAbsoluteError):  324.72961953144505
SVR R-squared:  -0.36873281330978025


### Support vector regression uses a flexible kernel to find the best line that fits within a margin of error.
### it is effective for complex data and can handle outliers(a data point taht is much different from the other data points) well

### Bayesian Ridge Regression

In [47]:
model_br = BayesianRidge()
model_br.fit(X_train, y_train)
y_pred_br = model_br.predict(X_test)
mse_br = mean_squared_error(y_test, y_pred_br)
mae_br = mean_absolute_error(y_test, y_pred_br)
r2_br = r2_score(y_test, y_pred_br)
print("Bayesian Ridge Regression MSE(MeanSquaredError): ", mse_br)
print("Bayesian Ridge Regression MAE(MeanAbsoluteError): ", mae_br)
print("Bayesian Ridge Regression R_squared: ", r2_br)

Bayesian Ridge Regression MSE(MeanSquaredError):  100709.64141231301
Bayesian Ridge Regression MAE(MeanAbsoluteError):  294.35803870620003
Bayesian Ridge Regression R_squared:  -0.12973794061100974


### Bayesian Ridge Regression estimates the best-fit line by considering uncertainties in the coefficients and adding regularization(adding penalty to model's complexity to prevent it from overfitting). 

### Elastic Net Regression

In [48]:
model_en = ElasticNet(alpha=1.0, l1_ratio=0.5)
model_en.fit(X_train, y_train)
y_pred_en = model_en.predict(X_test)
mse_en = mean_squared_error(y_test, y_pred_en)
mae_en = mean_absolute_error(y_test, y_pred_en)
r2_en = r2_score(y_test, y_pred_en)
print("Elastic Net Regression MSE(MeanSquaredError): ", mse_en)
print("Elastic Net Regression MAE(MeanAbsoluteError): ", mae_en)
print("Elastic Net Regression R_squared: ", r2_en)

Elastic Net Regression MSE(MeanSquaredError):  62805.426318633894
Elastic Net Regression MAE(MeanAbsoluteError):  245.87205297798158
Elastic Net Regression R_squared:  0.29546295673995915


### Elastic net regression combines ridge and lasso penalties to balance between shrinking co_efficents and selecting important features.
### It helps manage correlated features and simplifies the model.