<a href="https://colab.research.google.com/github/9mithun9/Flight-Cancellation-Prediction/blob/main/Flight_Cancellation_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Import Libraries**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns

**Load Data**

In [None]:
data = pd.read_csv('flight_data.csv')  # Replace with actual data source

**Data Preprocessing**

In [None]:
data.fillna({
    'dep_time': data['dep_time'].median(),
    'weather_temp': data['weather_temp'].mean(),
    'wind_speed': data['wind_speed'].mean()
}, inplace=True)

**Enncoding**

In [None]:
data = pd.get_dummies(data, columns=['airline', 'origin', 'dest'], drop_first=True)

In [None]:
X = data.drop(['cancellation', 'flight_id'], axis=1)  # Exclude non-predictive columns
y = data['cancellation']

**Class Imbalance**

In [None]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

Spliting Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

Scaling Features

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Random Forest

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

Prediction & Evaluation

In [None]:
y_pred = model.predict(X_test_scaled)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Flight Cancellation Prediction')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.savefig('confusion_matrix.png')

Plots

In [None]:
feature_importance = pd.Series(model.feature_importances_, index=X.columns)
feature_importance.nlargest(10).plot(kind='barh')
plt.title('Top 10 Feature Importances')
plt.savefig('feature_importance.png')

Deployment

In [None]:
import joblib
joblib.dump(model, 'flight_cancellation_model.pkl')
joblib.dump(scaler, 'scaler.pkl')