In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
full_fare_df = pd.read_csv('full_fare.csv')

In [18]:
full_fare_df.head()

Unnamed: 0,Route,Airline,Year,Direction,Price
0,Delhi ↔ Mumbai,IndiGo,2022,→,4473.74
1,Delhi ↔ Mumbai,IndiGo,2022,←,3935.42
2,Delhi ↔ Bangalore,IndiGo,2022,→,6084.36
3,Delhi ↔ Bangalore,IndiGo,2022,←,6273.5
4,Delhi ↔ Hyderabad,IndiGo,2022,→,4820.53


In [12]:
corrected_index_by_year = {
    2016: 100,
    2017: 97,
    2018: 93,
    2019: 90,
    2020: 65,
    2021: 82,
    2022: 88,
    2023: 85,
    2024: 78
}

In [14]:
def interpolate_fares(df, index_map):
    results = []
    grouped = df.groupby(["Route", "Airline", "Direction"])
    
    for (route, airline, direction), group in grouped:
        years_available = group["Year"].tolist()
        prices = group.set_index("Year")["Price"].to_dict()

        # Use average of 2022 and 2023 if both are present
        if 2022 in prices and 2023 in prices:
            base_year = "avg"
            base_index = (index_map[2022] + index_map[2023]) / 2
            base_price = (prices[2022] + prices[2023]) / 2
        elif 2022 in prices:
            base_year = 2022
            base_index = index_map[2022]
            base_price = prices[2022]
        elif 2023 in prices:
            base_year = 2023
            base_index = index_map[2023]
            base_price = prices[2023]
        else:
            continue  # Skip if no base year

        for year, index_val in index_map.items():
            if year not in years_available:
                scaled_price = round(base_price * (index_val / base_index), 2)
                results.append({
                    "Route": route,
                    "Airline": airline,
                    "Direction": direction,
                    "Year": year,
                    "Price": scaled_price,
                    "Estimated": True,
                    "Base_Used": base_year
                })

    return pd.DataFrame(results)

In [16]:
interpolated_df = interpolate_fares(full_fare_df, corrected_index_by_year)
interpolated_df.head()

Unnamed: 0,Route,Airline,Direction,Year,Price,Estimated,Base_Used
0,Bangalore ↔ Hyderabad,Air India,←,2016,8795.45,True,avg
1,Bangalore ↔ Hyderabad,Air India,←,2017,8531.59,True,avg
2,Bangalore ↔ Hyderabad,Air India,←,2018,8179.77,True,avg
3,Bangalore ↔ Hyderabad,Air India,←,2019,7915.91,True,avg
4,Bangalore ↔ Hyderabad,Air India,←,2020,5717.04,True,avg


In [20]:
# Mark the original rows as actual
full_fare_df["Estimated"] = False
full_fare_df["Base_Used"] = None

# Combine original and interpolated data
complete_fare_df = pd.concat([full_fare_df, interpolated_df], ignore_index=True)

# Optional: sort for better readability
complete_fare_df = complete_fare_df.sort_values(by=["Route", "Airline", "Direction", "Year"]).reset_index(drop=True)

# Preview
complete_fare_df.head()

Unnamed: 0,Route,Airline,Year,Direction,Price,Estimated,Base_Used
0,Bangalore ↔ Hyderabad,Air India,2016,←,8795.45,True,avg
1,Bangalore ↔ Hyderabad,Air India,2017,←,8531.59,True,avg
2,Bangalore ↔ Hyderabad,Air India,2018,←,8179.77,True,avg
3,Bangalore ↔ Hyderabad,Air India,2019,←,7915.91,True,avg
4,Bangalore ↔ Hyderabad,Air India,2020,←,5717.04,True,avg


In [22]:
airline_years = {
    "IndiGo": (2006, 2024),
    "Vistara": (2015, 2023),
    "Air India": (2000, 2024),
    "SpiceJet": (2005, 2024),
    "AirAsia": (2014, 2023),
    "Akasa": (2022, 2024),
    "Go First": (2005, 2023),
    "Jet Airways": (1993, 2019),
    "Trujet": (2015, 2022)
}

In [24]:
# Apply existence filter
interpolated_df = interpolated_df[
    interpolated_df.apply(lambda row: airline_years.get(row['Airline'], (0, 9999))[0] <= row['Year'] <= airline_years.get(row['Airline'], (0, 9999))[1], axis=1)
]

In [26]:
complete_fare_df = pd.concat([full_fare_df.assign(Estimated=False, Base_Used=None), interpolated_df], ignore_index=True)
complete_fare_df = complete_fare_df.sort_values(by=["Route", "Airline", "Direction", "Year"]).reset_index(drop=True)

In [28]:
complete_fare_df.info

<bound method DataFrame.info of                      Route    Airline  Year Direction    Price  Estimated  \
0    Bangalore ↔ Hyderabad  Air India  2016         ←  8795.45       True   
1    Bangalore ↔ Hyderabad  Air India  2017         ←  8531.59       True   
2    Bangalore ↔ Hyderabad  Air India  2018         ←  8179.77       True   
3    Bangalore ↔ Hyderabad  Air India  2019         ←  7915.91       True   
4    Bangalore ↔ Hyderabad  Air India  2020         ←  5717.04       True   
..                     ...        ...   ...       ...      ...        ...   
484     Mumbai ↔ Hyderabad    Vistara  2019         →  6516.07       True   
485     Mumbai ↔ Hyderabad    Vistara  2020         →  4706.05       True   
486     Mumbai ↔ Hyderabad    Vistara  2021         →  5936.86       True   
487     Mumbai ↔ Hyderabad    Vistara  2022         →  6231.47      False   
488     Mumbai ↔ Hyderabad    Vistara  2023         →  6293.86      False   

    Base_Used  
0         avg  
1         a

In [32]:
complete_fare_df.to_csv("complete_fare_data.csv", index=False, encoding='utf-8', sep=',')