<h2 align='center'>Flight_Price_Prediction_Model ML Flow</h2>

In [15]:
# Import Libraries
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from joblib import dump, load

import warnings
warnings.filterwarnings("ignore")

In [16]:
# Load Dataset
flights_dataset = pd.read_csv("flights.csv", parse_dates=['date'])

In [17]:
# one hot encoding categorical variables
ohe = OneHotEncoder(drop='first')
ohe.fit(flights_dataset[['from', 'to', 'flightType', 'agency']])
encoded_var = ohe.transform(flights_dataset[['from', 'to', 'flightType', 'agency']])
encoded_df = pd.DataFrame(encoded_var.toarray(), columns=ohe.get_feature_names_out(), dtype=int)

In [18]:
# merge encoded_variables with main dataframe
flights_dataset = pd.concat([flights_dataset,encoded_df], axis=1)

# drop encoded_variables original columns from main dataframe
flights_dataset.drop(columns=['from', 'to', 'flightType', 'agency'], inplace=True)

In [19]:
# Separate features and target variable
X = flights_dataset.drop(['date','price'], axis=1)
y = flights_dataset['price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
# ML Model - 2 Implementation
# Initialize Random Forest Regressor
rf_model = RandomForestRegressor(random_state=42)

# Fit the Algorithm
rf_model.fit(X_train, y_train)

# Predict on the model
y_pred_rf = rf_model.predict(X_test)

In [21]:
# Evaluation Metrics
mse_rf = mean_squared_error(y_test, y_pred_rf)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

# evaluation Metric Score
print("Random Forest Model Evaluation Metrics:")
print("Mean Squared Error (MSE):", mse_rf)
print("Mean Absolute Error (MAE):", mae_rf)
print("R-squared (R2) Score:", r2_rf)

# dump(rf_model, 'rf_model.joblib') # save the rf_model model
# dump(ohe, 'cat_encoder.joblib') # save the categorical encoder

Random Forest Model Evaluation Metrics:
Mean Squared Error (MSE): 0.11533580459763008
Mean Absolute Error (MAE): 0.004339021298612483
R-squared (R2) Score: 0.9999991247135043


### Track Experiments

In [22]:
import mlflow
import mlflow.sklearn

# dagshub setup
import dagshub
dagshub.init(repo_owner='Rajesh1505', repo_name='Flight_Price_Prediction_Model', mlflow=True)

In [23]:
# Models list with details for tracking
models = [
    (
        "Decision Tree Regressor",
        {"random_state": 45},
        DecisionTreeRegressor(random_state=45),
    ),
    (
        "Random Forest Regressor",
        {"random_state": 45},
        RandomForestRegressor(random_state=45),
    ),
    (
        "Gradient Boosting Regressor",
        {"random_state": 45},
        GradientBoostingRegressor(random_state=45),
    ),
    (
        "Linear Regression",
        {},
        LinearRegression(),
    )
]

In [24]:
# Initialize MLflow
mlflow.set_experiment("Regression Model Evaluation")

for model_name, params, model in models:
    # Train the model
    model.set_params(**params)
    model.fit(X_train, y_train)
    
    # Predict and evaluate
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("\n\n")
    print(f"{model_name} Evaluation Metrics:")
    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"R-squared (R2) Score: {r2}\n")

    # Log results in MLflow
    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(params)
        mlflow.log_metrics({
            "mean_squared_error": mse,
            "mean_absolute_error": mae,
            "r2_score": r2
        })

        # Log the model
        mlflow.sklearn.log_model(model, "model")





Decision Tree Regressor Evaluation Metrics:
Mean Squared Error (MSE): 0.3366748023097551
Mean Absolute Error (MAE): 0.003367538348791879
R-squared (R2) Score: 0.9999974449659501





🏃 View run Decision Tree Regressor at: https://dagshub.com/Rajesh1505/Flight_Price_Prediction_Model.mlflow/#/experiments/0/runs/aba8128cbdc045cc81d67f00ccf0e7ec
🧪 View experiment at: https://dagshub.com/Rajesh1505/Flight_Price_Prediction_Model.mlflow/#/experiments/0



Random Forest Regressor Evaluation Metrics:
Mean Squared Error (MSE): 0.13760743201018724
Mean Absolute Error (MAE): 0.003806780318806686
R-squared (R2) Score: 0.9999989556935301





🏃 View run Random Forest Regressor at: https://dagshub.com/Rajesh1505/Flight_Price_Prediction_Model.mlflow/#/experiments/0/runs/c3264fb34a7c4fbf9743a297e7de27cf
🧪 View experiment at: https://dagshub.com/Rajesh1505/Flight_Price_Prediction_Model.mlflow/#/experiments/0



Gradient Boosting Regressor Evaluation Metrics:
Mean Squared Error (MSE): 1900.569464298239
Mean Absolute Error (MAE): 34.498375451450144
R-squared (R2) Score: 0.9855765276698222





🏃 View run Gradient Boosting Regressor at: https://dagshub.com/Rajesh1505/Flight_Price_Prediction_Model.mlflow/#/experiments/0/runs/6a5ef8c6103340e59d11d588ec47fabe
🧪 View experiment at: https://dagshub.com/Rajesh1505/Flight_Price_Prediction_Model.mlflow/#/experiments/0



Linear Regression Evaluation Metrics:
Mean Squared Error (MSE): 10640.016681277286
Mean Absolute Error (MAE): 81.32658135983428
R-squared (R2) Score: 0.9192526297628911





🏃 View run Linear Regression at: https://dagshub.com/Rajesh1505/Flight_Price_Prediction_Model.mlflow/#/experiments/0/runs/3c3c3685871a44b99aa1bd70874ae9f8
🧪 View experiment at: https://dagshub.com/Rajesh1505/Flight_Price_Prediction_Model.mlflow/#/experiments/0


View ML Flow experiment at: https://dagshub.com/Rajesh1505/Flight_Price_Prediction_Model.mlflow