In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter("ignore")

In [2]:
df = pd.read_excel("Taxi_cleaned.xlsx")
df

Unnamed: 0,Trip_Distance_km,Passenger_Count,Per_Km_Rate,Trip_Duration_Minutes,Trip_Price,Time_of_Day_Evening,Time_of_Day_Morning,Time_of_Day_Night,Day_of_Week_Weekend,Traffic_Conditions_Low,Traffic_Conditions_Medium,Weather_Rain,Weather_Snow
0,5.279810,3,0.800000,53.82,36.2624,0,1,0,0,1,0,0,0
1,7.378374,1,1.210000,37.27,52.9032,1,0,0,1,0,0,0,0
2,6.690839,4,0.510000,116.81,36.4698,1,0,0,0,1,0,0,0
3,3.253939,2,1.710000,89.33,60.2028,0,0,0,1,0,1,0,0
4,1.733422,4,1.660000,5.05,11.2645,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
853,2.347067,4,0.620000,58.39,34.4049,0,0,0,1,0,1,0,0
854,8.213556,4,0.610000,61.96,62.1295,0,0,1,0,0,1,0,0
855,3.009442,3,1.780000,54.18,33.1236,0,1,0,0,1,0,1,0
856,8.350267,1,0.820000,114.94,61.2090,0,1,0,0,1,0,0,0


In [3]:
X = df.drop("Trip_Price",axis = 1)
y = df["Trip_Price"]

### Train-test split

In [4]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=54)

**Applying hyperparameter tunning for Lasso regression**

In [5]:
from sklearn.model_selection import GridSearchCV

#model
from sklearn.linear_model import Lasso
estimator = Lasso()

#parameters and values
param_grid = {"alpha":list(range(1,100))}

#Identifying the best value of the parameter within given values for the given data
model_hp = GridSearchCV(estimator,param_grid,cv=5,scoring="r2")

model_hp.fit(X_train,y_train)

model_hp.best_params_

{'alpha': 1}

### Modelling

In [6]:
from sklearn.linear_model import Lasso
model = Lasso(alpha=1)
model.fit(X_train,y_train)

print("Intercept",model.intercept_)
print("Coefficient",model.coef_)

Intercept -58.49247681996633
Coefficient [12.5525914  -0.         21.24497012  0.28183233 -0.          0.
 -0.         -0.         -0.         -0.          0.          0.        ]


### Prediction & Evaluation

In [7]:
#Prediction
train_prediction = model.predict(X_train)
test_prediction = model.predict(X_test)

#Evaluation
print("Train r2:",model.score(X_train,y_train))
print("Test R2:",model.score(X_test,y_test))

from sklearn.model_selection import cross_val_score
print("Cross validation score:",cross_val_score(model,X,y,cv=5).mean())

Train r2: 0.6261119156712983
Test R2: 0.6233550500861724
Cross validation score: 0.5787197208544841


In [8]:
X_train

Unnamed: 0,Trip_Distance_km,Passenger_Count,Per_Km_Rate,Trip_Duration_Minutes,Time_of_Day_Evening,Time_of_Day_Morning,Time_of_Day_Night,Day_of_Week_Weekend,Traffic_Conditions_Low,Traffic_Conditions_Medium,Weather_Rain,Weather_Snow
711,5.816958,2,1.36,98.11,0,1,0,0,0,0,0,0
709,5.073255,1,1.61,28.92,0,0,1,0,1,0,0,0
50,4.903994,3,1.93,100.24,0,0,0,0,0,1,0,1
524,2.181305,4,0.79,58.02,0,0,0,1,1,0,1,0
456,4.230412,3,1.17,102.30,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
737,5.823616,4,1.18,100.48,0,0,0,0,0,1,0,0
23,3.429891,1,1.85,7.07,0,0,0,0,0,1,0,0
783,6.829124,1,0.88,91.70,0,0,1,1,0,1,1,0
325,7.714444,1,1.90,84.32,0,0,0,0,0,0,1,0


### Applying Final Algo

In [9]:
X_train = X_train.drop(columns=["Passenger_Count","Time_of_Day_Evening","Time_of_Day_Morning","Time_of_Day_Night","Day_of_Week_Weekend","Traffic_Conditions_Low","Traffic_Conditions_Medium","Weather_Rain","Weather_Snow"])
X_test = X_test.drop(columns=["Passenger_Count","Time_of_Day_Evening","Time_of_Day_Morning","Time_of_Day_Night","Day_of_Week_Weekend","Traffic_Conditions_Low","Traffic_Conditions_Medium","Weather_Rain","Weather_Snow"])

from sklearn.linear_model import Lasso
lasso_best = Lasso(alpha=1)
lasso_best.fit(X_train,y_train)

print("Intercept",lasso_best.intercept_)
print("Coefficient",lasso_best.coef_)

#Prediction
train_prediction = lasso_best.predict(X_train)
test_prediction = lasso_best.predict(X_test)

#Evaluation
print("Train r2:",lasso_best.score(X_train,y_train))
print("Test R2:",lasso_best.score(X_test,y_test))
print("Cross validation score:",cross_val_score(lasso_best,X,y,cv=5).mean())

Intercept -58.49247681996634
Coefficient [12.5525914  21.24497012  0.28183233]
Train r2: 0.6261119156712982
Test R2: 0.6233550500861724
Cross validation score: 0.5787197208544841
