In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error

df = pd.read_csv("Clean_flight_data.csv")

df["days_left"] = df["days_left"].astype(int)
df["class"] = df["class"].str.capitalize()

features = ["source_city", "destination_city", "class", "days_left"]
target = "price"

X = df[features]
y = df[target]

y = y.clip(upper=30000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

categorical_features = ["source_city", "destination_city", "class"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=300, max_depth=15, min_samples_split=5, random_state=42, n_jobs=-1)) # Increased depth for better learning
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print(f"Mean Absolute Error: {mae:.2f}")

# Function to recommend cheapest flights
def recommend_flights(source, destination, flight_class, days_left, df, top_n=5):
    flight_class = flight_class.capitalize()
    filtered_flights = df.loc[(df["source_city"] == source) & 
                              (df["destination_city"] == destination) & 
                              (df["class"] == flight_class)].copy()
    
    if filtered_flights.empty:
        print("No matching flights found. Try adjusting your input.")
        print("Available source cities:", df["source_city"].unique())
        print("Available destination cities:", df["destination_city"].unique())
        print("Available classes:", df["class"].unique())
        return None

    filtered_flights.loc[:, "days_diff"] = abs(filtered_flights["days_left"] - days_left)
    sorted_flights = filtered_flights.sort_values(by=["days_diff", "price"])
    top_flights = sorted_flights.head(top_n)

    
    print("Recommended Flights:")
    for index, row in top_flights.iterrows():
        print(f"Flight {row['flight']}: {row['airline']} | Price: ₹{row['price']:.2f} | Days Left: {row['days_left']}")
    
    return top_flights

source_input = "Delhi"
destination_input = "Mumbai"
class_input = "Economy"
days_left_input = 2
recommended_flights = recommend_flights(source_input, destination_input, class_input, days_left_input, df)
print(recommended_flights)

Mean Absolute Error: 2055.77
Recommended Flights:
Flight I5-747: AirAsia | Price: ₹5949.00 | Days Left: 2
Flight I5-747: AirAsia | Price: ₹5949.00 | Days Left: 2
Flight SG-8803: SpiceJet | Price: ₹5953.00 | Days Left: 2
Flight SG-8169: SpiceJet | Price: ₹5953.00 | Days Left: 2
Flight G8-334: GO FIRST | Price: ₹5954.00 | Days Left: 2
      airline   flight source_city departure_time  stops   arrival_time  \
147   AirAsia   I5-747       Delhi        Evening      1  Early Morning   
148   AirAsia   I5-747       Delhi        Evening      1        Morning   
118  SpiceJet  SG-8803       Delhi  Early Morning      0        Morning   
119  SpiceJet  SG-8169       Delhi        Evening      0          Night   
126  GO FIRST   G8-334       Delhi        Morning      0        Morning   

    destination_city    class  duration  days_left  price actual_dep_time  \
147           Mumbai  Economy     12.25          2   5949           19:00   
148           Mumbai  Economy     16.33          2   5949   

In [43]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error

# Load dataset
df = pd.read_csv("Clean_flight_data.csv")

# Ensure correct data types
df["days_left"] = df["days_left"].astype(int)
df["class"] = df["class"].str.capitalize()

# Selecting input features and target variable
features = ["source_city", "destination_city", "class", "days_left"]
target = "price"

X = df[features]
y = df[target].clip(upper=30000)  # Cap prices for domestic flights

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing: One-hot encoding categorical variables
categorical_features = ["source_city", "destination_city", "class"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

# Create pipeline with preprocessing and model
model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=300, max_depth=15, min_samples_split=5, random_state=42, n_jobs=-1))
])

# Train model
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.2f}")

# Save model and preprocessor
with open("flight_price_model.pkl", "wb") as model_file:
    pickle.dump(model, model_file)

print("Model saved successfully!")


Mean Absolute Error: 2055.77
Model saved successfully!


In [31]:
import datetime
import os.path
from datetime import datetime, timedelta
from google.auth.exceptions import RefreshError  # Import RefreshError
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

SCOPES = ["https://www.googleapis.com/auth/calendar.readonly"]


HOLIDAY_CALENDAR_ID = "en.indian#holiday@group.v.calendar.google.com"  

def get_holidays(days=364):
    """Fetch upcoming public holidays from Google Calendar."""
    creds = None
    token_file = "token.json"
    
    if os.path.exists(token_file):
        creds = Credentials.from_authorized_user_file(token_file, SCOPES)

    try:
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())  # Attempt to refresh the token
            else:
                raise RefreshError

    except RefreshError:
        print("Token expired or revoked. Re-authenticating...")
        flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
        creds = flow.run_local_server(port=0)

        # Save new credentials
        with open(token_file, "w") as token:
            token.write(creds.to_json())

    try:
        service = build("calendar", "v3", credentials=creds)

        # Fetch upcoming holidays within the specified range
        now = datetime.now().isoformat() + "Z"
        end_date = (datetime.now() + timedelta(days=days)).isoformat() + "Z"

        events_result = (
            service.events()
            .list(
                calendarId=HOLIDAY_CALENDAR_ID,
                timeMin=now,
                timeMax=end_date,
                maxResults=20,
                singleEvents=True,
                orderBy="startTime",
            )
            .execute()
        )

        events = events_result.get("items", [])
        holiday_list = [(event["summary"], event["start"].get("date")) for event in events]

        return holiday_list

    except HttpError as error:
        print(f"An error occurred: {error}")
        return []

# Fetch holidays for the next 364 days
holidays = get_holidays(364)
print("Upcoming Holidays:", holidays)


Upcoming Holidays: [('Mesadi', '2025-04-14'), ('Ambedkar Jayanti', '2025-04-14'), ('Bahag Bihu/Vaisakhadi', '2025-04-15'), ('Good Friday', '2025-04-18'), ('Easter Day', '2025-04-20'), ('Birthday of Rabindranath', '2025-05-09'), ('Buddha Purnima/Vesak', '2025-05-12'), ('Bakrid (tentative)', '2025-06-07'), ('Rath Yatra', '2025-06-27'), ('Muharram/Ashura (tentative)', '2025-07-06'), ('Raksha Bandhan (Rakhi)', '2025-08-09'), ('Janmashtami (Smarta)', '2025-08-15'), ('Parsi New Year', '2025-08-15'), ('Independence Day', '2025-08-15'), ('Janmashtami', '2025-08-16'), ('Ganesh Chaturthi/Vinayaka Chaturthi', '2025-08-27'), ('Milad un-Nabi/Id-e-Milad (tentative)', '2025-09-05'), ('Onam', '2025-09-05'), ('First Day of Sharad Navratri', '2025-09-22'), ('First Day of Durga Puja Festivities', '2025-09-28')]


In [32]:
def get_demand_factor(travel_date, holidays):
    """Adjust demand factor based on holidays near the travel date."""
    event_factor = 1.0  # Default factor
    
    # ✅ Define high & medium demand holidays
    high_demand_events = [
        "diwali", "new year", "christmas", "eid", "bakrid", "muharram", "gudi padwa", 
        "ugadi", "ramzan id", "eid-ul-fitar"
    ]
    
    medium_demand_events = [
        "long weekend", "festival", "holi", "raksha bandhan", "janmashtami", "good friday", 
        "republic day", "independence day", "rath yatra", "rama navami", "vaisakhi", 
        "buddha purnima", "ambedkar jayanti", "easter", "mahavir jayanti"
    ]
    
    for holiday_name, holiday_date in holidays:
        if holiday_date == travel_date:  # ✅ Exact match
            event_name = holiday_name.lower()
            
            # ✅ High-demand events (30% increase)
            if any(keyword in event_name for keyword in high_demand_events):
                event_factor = 1.5  
                break  
            
            # ✅ Medium-demand events (20% increase)
            elif any(keyword in event_name for keyword in medium_demand_events):
                event_factor = 1.99  

    return event_factor

# ✅ Updated Holiday List
holidays = [
    ("Jamat Ul-Vida (tentative)", "2025-03-28"), 
    ("Chaitra Sukhladi", "2025-03-30"), 
    ("Gudi Padwa", "2025-03-30"), 
    ("Ugadi", "2025-03-30"), 
    ("Ramzan Id/Eid-ul-Fitar (tentative)", "2025-03-31"), 
    ("Rama Navami", "2025-04-06"), 
    ("Mahavir Jayanti", "2025-04-10"), 
    ("Vaisakhi", "2025-04-13"), 
    ("Mesadi", "2025-04-14"), 
    ("Ambedkar Jayanti", "2025-04-14"), 
    ("Bahag Bihu/Vaisakhadi", "2025-04-15"), 
    ("Good Friday", "2025-04-18"), 
    ("Easter Day", "2025-04-20"), 
    ("Birthday of Rabindranath", "2025-05-09"), 
    ("Buddha Purnima/Vesak", "2025-05-12"), 
    ("Bakrid (tentative)", "2025-06-07"), 
    ("Rath Yatra", "2025-06-27"), 
    ("Muharram/Ashura (tentative)", "2025-07-06"), 
    ("Raksha Bandhan (Rakhi)", "2025-08-09"), 
    ("Janmashtami (Smarta)", "2025-08-15")
]

# ✅ Example Usage
travel_date = "2025-04-06"  # Janmashtami
demand_factor = get_demand_factor(travel_date, holidays)
print(f"Demand Factor for {travel_date}: {demand_factor}")


Demand Factor for 2025-04-06: 1.99


In [14]:
import pandas as pd
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error

df = pd.read_csv("Clean_flight_data.csv")

df["days_left"] = df["days_left"].astype(int)
df["class"] = df["class"].str.capitalize()

features = ["source_city", "destination_city", "class", "days_left"]
target = "price"

X = df[features]
y = df[target]

y = y.clip(upper=30000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

categorical_features = ["source_city", "destination_city", "class"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=300, max_depth=15, min_samples_split=5, random_state=42, n_jobs=-1))
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print(f"Mean Absolute Error: {mae:.2f}")

# Function to recommend flights with demand impact
def recommend_flights(source, destination, flight_class, days_left, df, model, top_n=10):
    """Recommend flights based on dataset price and ML-predicted adjustments."""
    
    # Filter available flights
    filtered_flights = df[(df["source_city"] == source) & 
                          (df["destination_city"] == destination) & 
                          (df["class"] == flight_class)].copy()

    if filtered_flights.empty:
        return "No matching flights found. Try adjusting your input."

    # Predict the adjustment factor (percentage change)
    filtered_flights["price_adjustment"] = model.predict(filtered_flights[["source_city", "destination_city", "class", "days_left"]])

    # Convert percentage change to factor
    adjustment_factor = (filtered_flights["price_adjustment"] / 100) + 1  

    # Apply the ML adjustment on dataset price
    filtered_flights["final_price"] = filtered_flights["price"] * adjustment_factor  

    # Calculate price difference
    filtered_flights["price_change"] = filtered_flights["final_price"] - filtered_flights["price"]

    # Sort by best price
    sorted_flights = filtered_flights.sort_values(by=["days_left", "final_price"]).head(top_n)

    return sorted_flights[["airline", "flight", "departure_time", "stops", "arrival_time", "days_left", "price", "final_price", "price_change"]]


# **Corrected `days_left` for Janmashtami Booking**
travel_date = "2025-08-15"  # Janmashtami Date
today = datetime.today().strftime('%Y-%m-%d')  # Current Date
days_left_input = (datetime.strptime(travel_date, "%Y-%m-%d") - datetime.strptime(today, "%Y-%m-%d")).days

# Example Input
source_input = "Delhi"
destination_input = "Mumbai"
class_input = "Economy"

recommended_flights = recommend_flights(source_input, destination_input, class_input, days_left_input, df, model)
print(recommended_flights)


Mean Absolute Error: 2055.77
     airline   flight departure_time  stops   arrival_time  days_left  price  \
18   AirAsia   I5-747        Evening      1  Early Morning          1   5949   
19   AirAsia   I5-747        Evening      1        Morning          1   5949   
0   SpiceJet  SG-8709        Evening      0          Night          1   5953   
1   SpiceJet  SG-8157  Early Morning      0        Morning          1   5953   
8   GO FIRST   G8-334        Morning      0        Morning          1   5954   
9   GO FIRST   G8-336      Afternoon      0        Evening          1   5954   
10  GO FIRST   G8-392      Afternoon      0        Evening          1   5954   
11  GO FIRST   G8-338        Morning      0      Afternoon          1   5954   
20  GO FIRST   G8-266  Early Morning      1        Evening          1   5954   
21  GO FIRST   G8-101  Early Morning      1          Night          1   5954   

      final_price   price_change  
18  366474.443106  360525.443106  
19  366474.443106  3

In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error
from datetime import datetime,date
import pickle
df = pd.read_csv("Clean_flight_data.csv")

df["days_left"] = df["days_left"].astype(int)
df["class"] = df["class"].str.capitalize()

features = ["source_city", "destination_city", "class", "days_left"]
target = "price"

X = df[features]
y = df[target]

y = y.clip(upper=30000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

categorical_features = ["source_city", "destination_city", "class"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=300, max_depth=15, min_samples_split=5, random_state=42, n_jobs=-1))
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

def recommend_flights(source, destination, flight_class, travel_date, df, model, holidays, top_n=10):
    """Recommend flights based on price and event-based demand."""

    # ✅ Step 1: Convert travel_date to YYYY-MM-DD format
    today_date = date.today()  # Get today's date
    travel_date_obj = datetime.strptime(travel_date, "%d-%m-%Y").date()  # Convert input to date object
    travel_date_str = travel_date_obj.strftime("%Y-%m-%d")  # Convert to correct format
    days_left = (travel_date_obj - today_date).days  # Calculate days left

    # ✅ Step 2: Filter flights based on days_left
    filtered_flights = df[
        (df["source_city"] == source) & 
        (df["destination_city"] == destination) & 
        (df["class"] == flight_class) & 
        (df["days_left"] == days_left)
    ].copy()

    if filtered_flights.empty:
        return "No matching flights found. Try adjusting your input."

    # ✅ Step 3: Check if travel_date matches any holiday
    event_days = {event_date: event_name for event_name, event_date in holidays}  # Ensure keys are YYYY-MM-DD

    print("Event Days:", event_days)  # Debugging
    print("User Travel Date:", travel_date_str)  # Debugging

    if travel_date_str in event_days:
        demand_factor = get_demand_factor(travel_date_str, holidays)  # Pass full list of holidays
        print(f"Demand Factor for {travel_date_str}: {demand_factor}")  # Debugging
    else:
        demand_factor = 1.0  # No price increase

    # ✅ Step 4: Apply demand factor to prices
    filtered_flights["final_price"] = filtered_flights["price"] * demand_factor
    filtered_flights["price_change"] = filtered_flights["final_price"] - filtered_flights["price"]

    # ✅ Step 5: Return sorted flights
    sorted_flights = filtered_flights.sort_values(by=["final_price", "days_left"]).head(top_n)
    return sorted_flights[["airline", "flight", "departure_time","actual_dep_time", "stops", "arrival_time","actual_arr_time", "days_left", "price", "final_price", "price_change"]]

# Example Input
source_input = "Chennai"
destination_input = "Mumbai"
class_input = "Economy"
travel_date = "11-05-2025" # User inputs this date in DD-MM-YYYY format
holidays = get_holidays()
recommended_flights = recommend_flights(source_input, destination_input, class_input, travel_date, df, model, holidays)
print(recommended_flights)

#with open("flight_price_model.pkl", "wb") as model_file:
#    pickle.dump(model, model_file)

Event Days: {'2025-04-14': 'Ambedkar Jayanti', '2025-04-15': 'Bahag Bihu/Vaisakhadi', '2025-04-18': 'Good Friday', '2025-04-20': 'Easter Day', '2025-05-09': 'Birthday of Rabindranath', '2025-05-12': 'Buddha Purnima/Vesak', '2025-06-07': 'Bakrid (tentative)', '2025-06-27': 'Rath Yatra', '2025-07-06': 'Muharram/Ashura (tentative)', '2025-08-09': 'Raksha Bandhan (Rakhi)', '2025-08-15': 'Independence Day', '2025-08-16': 'Janmashtami', '2025-08-27': 'Ganesh Chaturthi/Vinayaka Chaturthi', '2025-09-05': 'Onam', '2025-09-22': 'First Day of Sharad Navratri', '2025-09-28': 'First Day of Durga Puja Festivities'}
User Travel Date: 2025-05-11
          airline   flight departure_time actual_dep_time  stops  \
190900  Air India   AI-569  Early Morning           06:20      0   
190901  Air India   AI-672      Afternoon           14:55      0   
190902    AirAsia  I5-1229        Evening           16:05      1   
190903    AirAsia  I5-1229        Evening           16:05      1   
190904    AirAsia   I5

In [1]:
import pandas as pd
import numpy as np
import os
import joblib
from datetime import datetime, date, timedelta
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# Google Calendar API Setup
SCOPES = ["https://www.googleapis.com/auth/calendar.readonly"]
HOLIDAY_CALENDAR_ID = "en.indian#holiday@group.v.calendar.google.com"  

def enrich_features(df, user_date=None, holidays_map=None):

    df = df.copy()

    if user_date:  # Prediction mode
        travel_date_obj = datetime.strptime(user_date, "%d-%m-%Y").date()
        day_of_week = travel_date_obj.weekday()  # Monday=0, Sunday=6
        is_holiday = 1 if travel_date_obj.strftime("%Y-%m-%d") in holidays_map else 0

        df["day_of_week"] = day_of_week
        df["is_holiday"] = is_holiday

    else:  # Training mode — you don't have journey_date
        df["day_of_week"] = 0  # Placeholder
        df["is_holiday"] = 0   # Placeholder

    return df


def get_holidays():
    """Fetch upcoming public holidays from Google Calendar."""
    creds = None
    token_file = "token.json"

    if os.path.exists(token_file):
        creds = Credentials.from_authorized_user_file(token_file, SCOPES)

    if not creds or not creds.valid:
        flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
        creds = flow.run_local_server(port=0)

        with open(token_file, "w") as token:
            token.write(creds.to_json())

    try:
        service = build("calendar", "v3", credentials=creds)
        now = datetime.now().isoformat() + "Z"
        end_date = (datetime.now() + timedelta(days=365)).isoformat() + "Z"

        events_result = service.events().list(
            calendarId=HOLIDAY_CALENDAR_ID,
            timeMin=now,
            timeMax=end_date,
            maxResults=50,
            singleEvents=True,
            orderBy="startTime",
        ).execute()

        events = events_result.get("items", [])
        return {event["start"]["date"]: event["summary"] for event in events}

    except HttpError as error:
        print(f"An error occurred: {error}")
        return {}


# Load dataset
df = pd.read_csv("Clean_flight_data.csv")

# Data Preprocessing
df["days_left"] = df["days_left"].astype(int)
df["class"] = df["class"].str.capitalize()

df = enrich_features(df)  # Training mode (no user_date)

features = [
    "source_city",
    "destination_city",
    "airline",
    "departure_time",
    "arrival_time",
    "stops",
    "class",
    "days_left",
    "day_of_week",
    "is_holiday"
]

target = "price"

X = df[features]
y = df[target].clip(upper=30000)


# Train Base Price Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

categorical_features = [
    "source_city",
    "destination_city",
    "airline",
    "departure_time",
    "arrival_time",
    "class"
]

numeric_features = ["stops", "days_left"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
        ("num", StandardScaler(), numeric_features)
    ]
)

base_model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(
        n_estimators=50,
        max_depth=15,
        min_samples_split=5,
        random_state=42,
        n_jobs=-1
    ))
])

base_model.fit(X_train, y_train)
y_pred = base_model.predict(X_test)
print(f"Base Model MAE: {mean_absolute_error(y_test, y_pred):.2f}")

# Train Holiday Price Adjustment Model
holiday_prices = df.copy()
holiday_prices["holiday_increase"] = np.random.uniform(1.1, 1.5, size=len(df))  # Simulating holiday effects
holiday_rows = df[df["is_holiday"] == 1]
if not holiday_rows.empty:
    X_holiday = holiday_rows[features]
    y_holiday = holiday_prices.loc[holiday_rows.index, "holiday_increase"] * df.loc[holiday_rows.index, target]
else:
    X_holiday = df[features]
    y_holiday = holiday_prices["holiday_increase"] * df[target]

X_holiday = df[features]
y_holiday = holiday_prices["holiday_increase"] * df[target]

X_train_h, X_test_h, y_train_h, y_test_h = train_test_split(X_holiday, y_holiday, test_size=0.2, random_state=42)

holiday_model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(
        n_estimators=60,
        max_depth=10,
        min_samples_split=5,
        random_state=42,
        n_jobs=-1
    ))
])

holiday_model.fit(X_train_h, y_train_h)
y_pred_h = holiday_model.predict(X_test_h)
print(f"Holiday Model MAE: {mean_absolute_error(y_test_h, y_pred_h):.2f}")

def recommend_flights(source, destination, flight_class, travel_date, df, base_model, holiday_model, holidays, top_n=20):
    today_date = date.today()
    travel_date_obj = datetime.strptime(travel_date, "%d-%m-%Y").date()
    days_left = (travel_date_obj - today_date).days

    # Filter matching flights
    filtered_flights = df[
        (df["source_city"] == source) &
        (df["destination_city"] == destination) &
        (df["class"] == flight_class) &
        (df["days_left"] == days_left)
    ].copy()

    if filtered_flights.empty:
        return "No matching flights found."

    # Enrich with day_of_week and is_holiday
    filtered_flights = enrich_features(filtered_flights, user_date=travel_date, holidays_map=holidays)

    # Predict base price
    filtered_flights["predicted_price"] = base_model.predict(filtered_flights[features])

    # Holiday adjustment if applicable
    if filtered_flights["is_holiday"].iloc[0] == 1:
        holiday_price_adjustment = holiday_model.predict(filtered_flights[features])
        filtered_flights["predicted_price"] = (
            filtered_flights["predicted_price"] +
            (holiday_price_adjustment - filtered_flights["predicted_price"]) * 0.75
        )

    return filtered_flights[["airline", "flight", "departure_time", "stops", "arrival_time", "days_left", "price", "predicted_price"]].sort_values(by=["predicted_price", "days_left"]).head(top_n)


# Example Input
source_input = "Chennai"
destination_input = "Mumbai"
class_input = "Economy"
travel_date = "11-05-2025"  # User input in DD-MM-YYYY format

# Fetch holidays dynamically
holidays = get_holidays()
print("Upcoming Holidays:", holidays)

# Recommend flights
recommended_flights = recommend_flights(source_input, destination_input, class_input, travel_date, df, base_model, holiday_model, holidays)
print(recommended_flights)

#joblib.dump(base_model, "base_model.pkl")
#joblib.dump(holiday_model, "holiday_model.pkl")

Base Model MAE: 724.30
Holiday Model MAE: 3955.55
Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=668567308767-lg6jpvit6ru7nkhaqmlm78gmc0i058cr.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A16408%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcalendar.readonly&state=ylGTtU7uNeBjaQl2NS7u9mAgjtqHql&access_type=offline
Upcoming Holidays: {'2025-04-15': 'Bahag Bihu/Vaisakhadi', '2025-04-18': 'Good Friday', '2025-04-20': 'Easter Day', '2025-05-09': 'Birthday of Rabindranath', '2025-05-12': 'Buddha Purnima/Vesak', '2025-06-07': 'Bakrid (tentative)', '2025-06-27': 'Rath Yatra', '2025-07-06': 'Muharram/Ashura (tentative)', '2025-08-09': 'Raksha Bandhan (Rakhi)', '2025-08-15': 'Independence Day', '2025-08-16': 'Janmashtami', '2025-08-27': 'Ganesh Chaturthi/Vinayaka Chaturthi', '2025-09-05': 'Onam', '2025-09-22': 'First Day of Sharad Navratri', '2025-09-28': 'First Day of Durga Puja Festivities', '