In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt

In [9]:
# Load and preprocess dataset
df = pd.read_csv("uber.csv")
df['pickup_datetime'] = pd.to_datetime(df['pickup_datetime'])
df['hour'] = df['pickup_datetime'].dt.hour
df['day_of_week'] = df['pickup_datetime'].dt.dayofweek
df.drop(columns=['Unnamed: 0', 'key', 'pickup_datetime'], inplace=True)

In [10]:
# Handle missing values
df = pd.DataFrame(SimpleImputer(strategy='mean').fit_transform(df), columns=df.columns)

# Split data
X, y = df.drop(columns=['fare_amount']), df['fare_amount']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Standardize features
scaler = StandardScaler()
X_train_scaled, X_test_scaled = scaler.fit_transform(X_train), scaler.transform(X_test)

# Train models and evaluate
def train_and_evaluate(model, name):
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f"{name} - R2 Score: {r2:.4f}, RMSE: {rmse:.2f}")

In [12]:
train_and_evaluate(LinearRegression(), "Linear Regression")
train_and_evaluate(Ridge(alpha=1.0), "Ridge Regression")
train_and_evaluate(Lasso(alpha=0.1), "Lasso Regression")

Linear Regression - R2 Score: 0.0007, RMSE: 10.31
Ridge Regression - R2 Score: 0.0007, RMSE: 10.31
Lasso Regression - R2 Score: 0.0003, RMSE: 10.31
