<a href="https://colab.research.google.com/github/Muskanpoddar/Linear_Regression/blob/main/Taxi_Fare_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Upload the dataset
from google.colab import files
uploaded = files.upload()

# Load the CSV file into a DataFrame
df = pd.read_csv('Taxi_Fare.csv')

# Display first 5 rows to check the data
print("Dataset Preview:")
print(df.head())

# Check for missing values
print("\nMissing Values Check:")
print(df.isnull().sum())

# Define independent variables (features) and dependent variable (target)
X = df[['Trip_Miles', 'Trip_Seconds', 'Pickup_Area', 'Dropoff_Area']]
y = df['Fare']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Create and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R² Score): {r2:.2f}")

# Display actual vs predicted fares
comparison = pd.DataFrame({'Actual Fare': y_test, 'Predicted Fare': y_pred})
print("\nActual vs Predicted Fares:")
print(comparison)


Saving Taxi_Fare.csv to Taxi_Fare.csv
Dataset Preview:
   Trip_Miles  Trip_Seconds  Pickup_Area  Dropoff_Area   Fare
0         2.5           600           12            15  12.50
1         1.2           300            5             8   8.75
2         3.8           900           20            22  18.40
3         0.9           240            3             4   7.20
4         5.0          1200            7             9  24.00

Missing Values Check:
Trip_Miles      0
Trip_Seconds    0
Pickup_Area     0
Dropoff_Area    0
Fare            0
dtype: int64

Model Evaluation:
Mean Squared Error (MSE): 0.94
R-squared (R² Score): 0.92

Actual vs Predicted Fares:
   Actual Fare  Predicted Fare
8        15.75       15.108290
1         8.75        7.539365
