In [40]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error

df = pd.read_csv("Clean_flight_data.csv")

df["days_left"] = df["days_left"].astype(int)
df["class"] = df["class"].str.capitalize()

features = ["source_city", "destination_city", "class", "days_left"]
target = "price"

X = df[features]
y = df[target]

y = y.clip(upper=30000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

categorical_features = ["source_city", "destination_city", "class"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=300, max_depth=15, min_samples_split=5, random_state=42, n_jobs=-1)) # Increased depth for better learning
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print(f"Mean Absolute Error: {mae:.2f}")

# Function to recommend cheapest flights
def recommend_flights(source, destination, flight_class, days_left, df, top_n=5):
    flight_class = flight_class.capitalize()
    filtered_flights = df.loc[(df["source_city"] == source) & 
                              (df["destination_city"] == destination) & 
                              (df["class"] == flight_class)].copy()
    
    if filtered_flights.empty:
        print("No matching flights found. Try adjusting your input.")
        print("Available source cities:", df["source_city"].unique())
        print("Available destination cities:", df["destination_city"].unique())
        print("Available classes:", df["class"].unique())
        return None

    filtered_flights.loc[:, "days_diff"] = abs(filtered_flights["days_left"] - days_left)
    sorted_flights = filtered_flights.sort_values(by=["days_diff", "price"])
    top_flights = sorted_flights.head(top_n)

    
    print("Recommended Flights:")
    for index, row in top_flights.iterrows():
        print(f"Flight {row['flight']}: {row['airline']} | Price: ₹{row['price']:.2f} | Days Left: {row['days_left']}")
    
    return top_flights

source_input = "Delhi"
destination_input = "Mumbai"
class_input = "Economy"
days_left_input = 20
recommended_flights = recommend_flights(source_input, destination_input, class_input, days_left_input, df)
print(recommended_flights)

Mean Absolute Error: 2055.77
Recommended Flights:
Flight I5-747: AirAsia | Price: ₹2409.00 | Days Left: 20
Flight I5-773: AirAsia | Price: ₹2409.00 | Days Left: 20
Flight I5-721: AirAsia | Price: ₹2409.00 | Days Left: 20
Flight I5-1529: AirAsia | Price: ₹2409.00 | Days Left: 20
Flight I5-773: AirAsia | Price: ₹2409.00 | Days Left: 20
      airline   flight source_city departure_time  stops   arrival_time  \
3608  AirAsia   I5-747       Delhi        Evening      1     Late Night   
3609  AirAsia   I5-773       Delhi          Night      1     Late Night   
3610  AirAsia   I5-721       Delhi          Night      1  Early Morning   
3611  AirAsia  I5-1529       Delhi        Morning      1          Night   
3612  AirAsia   I5-773       Delhi          Night      1  Early Morning   

     destination_city    class  duration  days_left  price  days_diff  
3608           Mumbai  Economy      5.58         20   2409          0  
3609           Mumbai  Economy      6.42         20   2409          0

In [43]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error

# Load dataset
df = pd.read_csv("Clean_flight_data.csv")

# Ensure correct data types
df["days_left"] = df["days_left"].astype(int)
df["class"] = df["class"].str.capitalize()

# Selecting input features and target variable
features = ["source_city", "destination_city", "class", "days_left"]
target = "price"

X = df[features]
y = df[target].clip(upper=30000)  # Cap prices for domestic flights

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing: One-hot encoding categorical variables
categorical_features = ["source_city", "destination_city", "class"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

# Create pipeline with preprocessing and model
model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=300, max_depth=15, min_samples_split=5, random_state=42, n_jobs=-1))
])

# Train model
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.2f}")

# Save model and preprocessor
with open("flight_price_model.pkl", "wb") as model_file:
    pickle.dump(model, model_file)

print("Model saved successfully!")


Mean Absolute Error: 2055.77
Model saved successfully!


In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error

df = pd.read_csv("Clean_flight_data.csv")

df["days_left"] = df["days_left"].astype(int)
df["class"] = df["class"].str.capitalize()

features = ["source_city", "destination_city", "class", "days_left"]
target = "price"

X = df[features]
y = df[target]

y = y.clip(upper=30000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

categorical_features = ["source_city", "destination_city", "class"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=300, max_depth=15, min_samples_split=5, random_state=42, n_jobs=-1)) # Increased depth for better learning
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print(f"Mean Absolute Error: {mae:.2f}")

# Function to recommend cheapest flights
def recommend_flights(source, destination, flight_class, days_left, df, model, top_n=10):
    """Recommend flights based on price and real-time event demand."""
    
    # Filter available flights
    filtered_flights = df[(df["source_city"] == source) & 
                          (df["destination_city"] == destination) & 
                          (df["class"] == flight_class)].copy()

    if filtered_flights.empty:
        return "No matching flights found. Try adjusting your input."

    # Predict base price using ML model
    filtered_flights["predicted_price"] = model.predict(filtered_flights[["source_city", "destination_city", "class", "days_left"]])

    # Fetch real-time event data
    events = get_upcoming_events(30)

    # Adjust prices based on demand
    filtered_flights["event_impact"] = filtered_flights["days_left"].apply(lambda x: get_demand_factor(x, events))
    filtered_flights["final_price"] = filtered_flights["predicted_price"] * filtered_flights["event_impact"]

    # Sort by best price
    sorted_flights = filtered_flights.sort_values(by=["days_left","final_price"]).head(top_n)

    return sorted_flights[["airline", "flight", "departure_time", "stops", "arrival_time","days_left", "final_price"]]

source_input = "Delhi"
destination_input = "Mumbai"
class_input = "Economy"
days_left_input = 1
recommended_flights = recommend_flights(source_input, destination_input, class_input, days_left_input, df,model)
print(recommended_flights)

Mean Absolute Error: 2055.77
    airline   flight departure_time  stops   arrival_time  days_left  \
0  SpiceJet  SG-8709        Evening      0          Night          1   
1  SpiceJet  SG-8157  Early Morning      0        Morning          1   
2   AirAsia   I5-764  Early Morning      0  Early Morning          1   
3   Vistara   UK-995        Morning      0      Afternoon          1   
4   Vistara   UK-963        Morning      0        Morning          1   
5   Vistara   UK-945        Morning      0      Afternoon          1   
6   Vistara   UK-927        Morning      0        Morning          1   
7   Vistara   UK-951      Afternoon      0        Evening          1   
8  GO FIRST   G8-334        Morning      0        Morning          1   
9  GO FIRST   G8-336      Afternoon      0        Evening          1   

   final_price  
0  6060.269677  
1  6060.269677  
2  6060.269677  
3  6060.269677  
4  6060.269677  
5  6060.269677  
6  6060.269677  
7  6060.269677  
8  6060.269677  
9  6060.

In [13]:
import datetime
import os.path

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# If modifying these scopes, delete the file token.json.
SCOPES = ["https://www.googleapis.com/auth/calendar.readonly"]

# Public Holiday Calendar ID (Example: India)
HOLIDAY_CALENDAR_ID = "en.indian#holiday@group.v.calendar.google.com"  # Change as needed

def get_holidays(days=364):
    """Fetch upcoming public holidays from Google Calendar."""
    creds = None
    if os.path.exists("token.json"):
        creds = Credentials.from_authorized_user_file("token.json", SCOPES)
    
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
            creds = flow.run_local_server(port=0)
        
        with open("token.json", "w") as token:
            token.write(creds.to_json())

    try:
        service = build("calendar", "v3", credentials=creds)

        # Fetch upcoming holidays within the specified range
        now = datetime.datetime.now().isoformat() + "Z"
        end_date = (datetime.datetime.now() + datetime.timedelta(days=days)).isoformat() + "Z"

        events_result = (
            service.events()
            .list(
                calendarId=HOLIDAY_CALENDAR_ID,
                timeMin=now,
                timeMax=end_date,
                maxResults=20,
                singleEvents=True,
                orderBy="startTime",
            )
            .execute()
        )

        events = events_result.get("items", [])
        holiday_list = [(event["summary"], event["start"].get("date")) for event in events]

        return holiday_list

    except HttpError as error:
        print(f"An error occurred: {error}")
        return []

# Fetch holidays for the next 30 days
holidays = get_holidays(364)
print("Upcoming Holidays:", holidays)

Upcoming Holidays: [('Jamat Ul-Vida (tentative)', '2025-03-28'), ('Chaitra Sukhladi', '2025-03-30'), ('Gudi Padwa', '2025-03-30'), ('Ugadi', '2025-03-30'), ('Ramzan Id/Eid-ul-Fitar (tentative)', '2025-03-31'), ('Rama Navami', '2025-04-06'), ('Mahavir Jayanti', '2025-04-10'), ('Vaisakhi', '2025-04-13'), ('Mesadi', '2025-04-14'), ('Ambedkar Jayanti', '2025-04-14'), ('Bahag Bihu/Vaisakhadi', '2025-04-15'), ('Good Friday', '2025-04-18'), ('Easter Day', '2025-04-20'), ('Birthday of Rabindranath', '2025-05-09'), ('Buddha Purnima/Vesak', '2025-05-12'), ('Bakrid (tentative)', '2025-06-07'), ('Rath Yatra', '2025-06-27'), ('Muharram/Ashura (tentative)', '2025-07-06'), ('Raksha Bandhan (Rakhi)', '2025-08-09'), ('Janmashtami (Smarta)', '2025-08-15')]


In [15]:
def get_demand_factor(travel_date, holidays):
    """Adjust demand factor based on holidays near the travel date."""
    event_factor = 1.0  # Default factor
    
    # Define high & medium demand holidays
    high_demand_events = ["diwali", "new year", "christmas", "eid", "bakrid", "muharram","Gudi Padwa"]
    medium_demand_events = ["long weekend", "festival", "holi", "raksha bandhan", "janmashtami", 
                            "good friday", "republic day", "independence day", "rath yatra"]
    
    for holiday_name, holiday_date in holidays:
        if holiday_date == travel_date:  # Exact match
            event_name = holiday_name.lower()
            
            # High-demand events (30% increase)
            if any(keyword in event_name for keyword in high_demand_events):
                event_factor = 1.3  
                break  # No need to check further
            
            # Medium-demand events (20% increase)
            elif any(keyword in event_name for keyword in medium_demand_events):
                event_factor = 1.2  

    return event_factor

# ✅ Fetch upcoming holidays
holidays = get_holidays(300)  # Fetch for the next 300 days

# Example Usage
travel_date = "2025-08-15"  # Janmashtami
demand_factor = get_demand_factor(travel_date, holidays)
print(f"Demand Factor for {travel_date}: {demand_factor}")


Demand Factor for 2025-08-15: 1.2


In [16]:
import pandas as pd
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error

df = pd.read_csv("Clean_flight_data.csv")

df["days_left"] = df["days_left"].astype(int)
df["class"] = df["class"].str.capitalize()

features = ["source_city", "destination_city", "class", "days_left"]
target = "price"

X = df[features]
y = df[target]

y = y.clip(upper=30000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

categorical_features = ["source_city", "destination_city", "class"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=300, max_depth=15, min_samples_split=5, random_state=42, n_jobs=-1))
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print(f"Mean Absolute Error: {mae:.2f}")

# Function to recommend flights with demand impact
def recommend_flights(source, destination, flight_class, days_left, df, model, top_n=10):
    """Recommend flights based on price and real-time event demand."""
    
    filtered_flights = df[(df["source_city"] == source) & 
                          (df["destination_city"] == destination) & 
                          (df["class"] == flight_class)].copy()

    if filtered_flights.empty:
        return "No matching flights found. Try adjusting your input."

    filtered_flights["predicted_price"] = model.predict(filtered_flights[["source_city", "destination_city", "class", "days_left"]])

    upcoming_holidays = get_holidays(300)

    filtered_flights["event_impact"] = filtered_flights["days_left"].apply(lambda x: get_demand_factor(x, upcoming_holidays))
    filtered_flights["final_price"] = filtered_flights["predicted_price"] * filtered_flights["event_impact"]
    
    filtered_flights["price_change"] = filtered_flights["final_price"] - filtered_flights["predicted_price"]

    sorted_flights = filtered_flights.sort_values(by=["days_left", "final_price"]).head(top_n)

    return sorted_flights[["airline", "flight", "departure_time", "stops", "arrival_time", "days_left", "predicted_price", "final_price", "price_change"]]

# **Corrected `days_left` for Janmashtami Booking**
travel_date = "2025-08-15"  # Janmashtami Date
today = datetime.today().strftime('%Y-%m-%d')  # Current Date
days_left_input = (datetime.strptime(travel_date, "%Y-%m-%d") - datetime.strptime(today, "%Y-%m-%d")).days

# Example Input
source_input = "Delhi"
destination_input = "Mumbai"
class_input = "Economy"

recommended_flights = recommend_flights(source_input, destination_input, class_input, days_left_input, df, model)
print(recommended_flights)


Mean Absolute Error: 2055.77


AttributeError: type object 'datetime.datetime' has no attribute 'datetime'