In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

data = []

print("Generating 1000 Taxi Trips...")

for i in range(1000):
    dist_km = np.random.uniform(1, 50)
    traffic_min = np.random.uniform(5, 120)

    price = 50 + (12 * dist_km) + (2 * traffic_min) + np.random.normal(0, 15)

    data.append([dist_km, traffic_min, price])

df = pd.DataFrame(data, columns=['Distance_KM', 'Traffic_Minutes', 'Price'])
print("\nSample Data (First 5 rides):")
print(df.head())

X = df[['Distance_KM', 'Traffic_Minutes']]
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\n--- Model Performance Results ---")

model_a = LinearRegression()
model_a.fit(X_train, y_train)
acc_a = model_a.score(X_test, y_test)
print(f"Model A (Linear Regression): {acc_a*100:.2f}% Accuracy")

model_b = RandomForestRegressor(n_estimators=50)
model_b.fit(X_train, y_train)
acc_b = model_b.score(X_test, y_test)
print(f"Model B (Random Forest):     {acc_b*100:.2f}% Accuracy")

if acc_a > acc_b:
    print("\nWinner: Linear Regression is better.")
else:
    print("\nWinner: Random Forest is better.")

Generating 1000 Taxi Trips...

Sample Data (First 5 rides):
   Distance_KM  Traffic_Minutes       Price
0    28.680009        94.215851  583.187693
1    27.046961        49.753011  471.412905
2     7.071699        77.137939  283.279596
3    48.248709         5.692932  629.113811
4     2.528774        86.093623  254.784243

--- Model Performance Results ---
Model A (Linear Regression): 99.38% Accuracy
Model B (Random Forest):     99.11% Accuracy

Winner: Linear Regression is better.
