In [1]:

"""Predict the price of the Uber ride from a given pic
 1. Pre-process the dataset.
 2. Identify outliers.
 3. Check the correlation.
 4. Implement linear regression and ridge, Lasso regres
 5. Evaluate the models and compare their respective sc"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.impute import SimpleImputer

df=pd.read_csv('uber.csv')

df.head()

# Drop unnecessary columns
df = df.drop(columns=['Unnamed: 0', 'key', 'pickup_datetime'])

# Handle missing values
imputer = SimpleImputer (strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

# Split the data into features (X) and target (y)
X = df_imputed.drop(columns=['fare_amount']) # create
y = df_imputed['fare_amount'] # create a series of on

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (scaling)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Implement Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)
y_pred_lr = lr_model.predict(X_test_scaled)

# Implement Ridge Regression
ridge_model = Ridge (alpha=1.0)
ridge_model.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_model.predict(X_test_scaled)

# Implement Lasso Regression
lasso_model = Lasso (alpha=0.1)
lasso_model.fit(X_train_scaled, y_train)
y_pred_lasso = lasso_model.predict(X_test_scaled)

# Evaluate the models
def evaluate_model(y_true, y_pred, model_name):
 r2 = r2_score(y_true, y_pred)
 rmse = np.sqrt(mean_squared_error(y_true, y_pred))
 print(f"{model_name} R2 Score: {r2:.4f}, RMSE: {rmse:.2f}")

evaluate_model(y_test, y_pred_lr, "Linear Regression")
evaluate_model (y_test, y_pred_ridge, "Ridge Regression")
evaluate_model (y_test, y_pred_lasso, "Lasso Regression")

Linear Regression R2 Score: 0.0003, RMSE: 10.31
Ridge Regression R2 Score: 0.0003, RMSE: 10.31
Lasso Regression R2 Score: -0.0000, RMSE: 10.31
