# Taxi Trip Pricing â€“ Basic ML Regression Project

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression

## 2. Load Dataset

In [None]:
df = pd.read_csv('taxi_trip_pricing.csv')
df.head()

## 3. Dataset Information

In [None]:
df.info()
df.describe()

## 4. Missing Values

In [None]:
df.isnull().sum()

## 5. Handle Missing Values

In [None]:
df.fillna(method='ffill', inplace=True)
df.isnull().sum()

## 6. Encode Categorical Columns

In [None]:
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])

df.head()

## 7. Simple EDA

In [None]:
plt.figure()
sns.histplot(df['Trip_Price'], kde=True)
plt.title('Trip Price Distribution')
plt.show()

plt.figure()
sns.scatterplot(x=df['Trip_Distance_km'], y=df['Trip_Price'])
plt.title('Distance vs Price')
plt.show()

## 8. Train Test Split

In [None]:
X = df.drop('Trip_Price', axis=1)
y = df['Trip_Price']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## 9. Train Linear Regression Model

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

## 10. Model Evaluation

In [None]:
y_pred = model.predict(X_test)

print('RMSE:', mean_squared_error(y_test, y_pred, squared=False))
print('R2 Score:', r2_score(y_test, y_pred))

## 11. Conclusion
This is a basic regression ML project suitable for beginners.