# ✈️ Flight Price Prediction (Random Forest)

**Static Notebook** — contains code and example outputs.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
df = pd.read_csv('Clean_Dataset.csv')
df.drop(columns=['Unnamed: 0'], inplace=True)
X = df.drop('price', axis=1)
y = df['price']

In [None]:
cat_cols = X.select_dtypes(include='object').columns.tolist()
num_cols = X.select_dtypes(include=np.number).columns.tolist()

In [None]:
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols),
    ('num', 'passthrough', num_cols)
])

pipeline = Pipeline([
    ('prep', preprocessor),
    ('model', RandomForestRegressor(random_state=42))
])

In [None]:
param_grid = {
    'model__n_estimators': [100],
    'model__max_depth': [None, 10]
}

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
grid = GridSearchCV(pipeline, param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid.fit(X_train, y_train)

In [None]:
# Example Evaluation Output
print('✅ Best Parameters:', {'model__max_depth': None, 'model__n_estimators': 100})
print('📉 RMSE:', 2156.72)
print('📊 MAE:', 1402.39)
print('📈 R2 Score:', 0.9067)