In [None]:
import pandas as pd
import numpy as np

In [None]:
import pandas as pd

# Load the volume dataset
df = pd.read_csv("/content/new_to_look.csv")  # Replace with actual filename

# 1. EXPORT SHIPMENT DATA
shipment_data_cols = [
    'Origin Branch', 'Origin Hub', 'Origin Hub City', 'Origin Airport City',
    'Destination Airport City', 'Destination Hub City', 'Destination Hub',
    'Destination Branch', 'New Product GCP', 'Weights', 'Conno',
    'Flight_Mode', 'Product_Path'
]
shipment_data = df[shipment_data_cols]
shipment_data.to_csv("shipment_data.csv", index=False)
print("✅ Saved: shipment_data.csv")

# 2. EXPORT UNIQUE NO FLIGHT OD PAIRS
no_flight_df = df[df['Flight_Mode'] == 'NO_FLIGHT']

# Get unique city-to-city OD pairs (use city columns, not branch codes)
no_flight_od_pairs = no_flight_df[[
    'Origin Hub City', 'Destination Hub City'
]].drop_duplicates().reset_index(drop=True)

no_flight_od_pairs.rename(columns={
    'Origin Hub City': 'Origin City',
    'Destination Hub City': 'Destination City'
}, inplace=True)

# no_flight_od_pairs.to_csv("no_flight_OD_pairs.csv", index=False)
# print("✅ Saved: no_flight_OD_pairs.csv")

✅ Saved: shipment_data.csv


In [None]:
shipment_data.head(40)

Unnamed: 0,Origin Branch,Origin Hub,Origin Hub City,Origin Airport City,Destination Airport City,Destination Hub City,Destination Hub,Destination Branch,New Product GCP,Weights,Conno,Flight_Mode,Product_Path
0,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,AHMEDABAD,AHMEDABAD,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,EP,266.57376,49.0,NO_FLIGHT,ALT_REQUIRED
1,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,AHMEDABAD,AHMEDABAD,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,ES,62.72,31.0,NO_FLIGHT,ALT_REQUIRED
2,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,BARODA,BARODA,A17-BARODA SCS APEX,A02-BARODA BRANCH,EP,0.48,2.0,NO_FLIGHT,ALT_REQUIRED
3,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,BARODA,BARODA,A17-BARODA SCS APEX,A02-BARODA BRANCH,ES,1.614,85.0,NO_FLIGHT,ALT_REQUIRED
4,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,AHMEDABAD,AHMEDABAD,A01-AHMEDABAD APEX,A03-RAJKOT BRANCH,EP,1428.48,128.0,NO_FLIGHT,ALT_REQUIRED
5,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,AHMEDABAD,AHMEDABAD,A01-AHMEDABAD APEX,A03-RAJKOT BRANCH,ES,3.6136,116.0,NO_FLIGHT,ALT_REQUIRED
6,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,SURAT,SURAT,A04-SURAT APEX,A04-SURAT APEX,EP,4.6,5.0,NO_FLIGHT,ALT_REQUIRED
7,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,SURAT,SURAT,A04-SURAT APEX,A04-SURAT APEX,ES,0.548,25.0,NO_FLIGHT,ALT_REQUIRED
8,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,SURAT,SURAT,A04-SURAT APEX,A05-VAPI BRANCH,ES,4.30208,94.0,NO_FLIGHT,ALT_REQUIRED
9,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,SURAT,SURAT,A04-SURAT APEX,A07-ANKLESHWAR BRANCH,ES,0.42552,50.0,NO_FLIGHT,ALT_REQUIRED


In [None]:
import pandas as pd

# STEP 1: Load shipment dataset
df = pd.read_csv("shipment_data.csv")

# STEP 2: Clean and prepare data
df["Weights"] = df["Weights"].fillna(0)
df["Conno"] = df["Conno"].fillna(0)

# STEP 3: Create a Flight_Mode column
def get_mode(row):
    if row["Flight_Mode"] == "NO_FLIGHT":
        return "NO_FLIGHT"
    elif row["Flight_Mode"] == "PRIME+GCR":
        return "PRIME+GCR"
    elif row["Flight_Mode"] == "PRIME":
        return "PRIME"
    elif row["Flight_Mode"] == "GCR":
        return "GCR"
    else:
        return "UNKNOWN"

df["Flight_Mode"] = df.apply(get_mode, axis=1)

# STEP 4: Save records with NO_FLIGHT mode (ODs that need alternate routing)
no_flight_df = df[df["Flight_Mode"] == "NO_FLIGHT"]
no_flight_df.to_csv("no_flight_OD_pairs.csv", index=False)
print("✅ Saved OD pairs with NO_FLIGHT to 'no_flight_OD_pairs.csv'")

# STEP 5: Aggregate volume at airport level
airport_volume = (
    df.groupby(["Origin Airport City", "Destination Airport City"])
    .agg(
        Total_Weight=("Weights", "sum"),
        Total_ChargedWeight=("Conno", "sum"),
        Total_Shipments=("Conno", "sum"),
        Unique_Products=("New Product GCP", pd.Series.nunique),
        Num_OD_Branches=("Destination Branch", pd.Series.nunique),
        Modes=("Flight_Mode", lambda x: ", ".join(sorted(set(x))))
    )
    .reset_index()
)

# STEP 6: Save airport-to-airport volume matrix
airport_volume.to_csv("airport_to_airport_volume.csv", index=False)
print("✅ Aggregated airport volumes saved to 'airport_to_airport_volume.csv'")

✅ Saved OD pairs with NO_FLIGHT to 'no_flight_OD_pairs.csv'
✅ Aggregated airport volumes saved to 'airport_to_airport_volume.csv'


In [None]:
airport_volume.head(40)

Unnamed: 0,Origin Airport City,Destination Airport City,Total_Weight,Total_ChargedWeight,Total_Shipments,Unique_Products,Num_OD_Branches,Modes
0,AGARTALA,AGARTALA,135.29476,377.0,377.0,3,5,NO_FLIGHT
1,AGARTALA,AGRA,0.31552,13.0,13.0,2,4,NO_FLIGHT
2,AGARTALA,AHMEDABAD,1.45456,55.0,55.0,2,10,NO_FLIGHT
3,AGARTALA,BANGALORE,58.88232,318.0,318.0,2,21,GCR
4,AGARTALA,BARODA,0.24772,14.0,14.0,2,3,NO_FLIGHT
5,AGARTALA,BHUBANESHWAR,0.89832,31.0,31.0,2,5,NO_FLIGHT
6,AGARTALA,CHANDIGARH,3.0316,51.0,51.0,2,18,NO_FLIGHT
7,AGARTALA,CHENNAI,1.4912,65.0,65.0,2,15,GCR
8,AGARTALA,COCHIN,0.76208,23.0,23.0,2,10,NO_FLIGHT
9,AGARTALA,COIMBATORE,0.63068,6.0,6.0,2,3,NO_FLIGHT


In [10]:
# prompt: drop the columns Total_ChargedWeight	Total_Shipments	Unique_Products	Num_OD_Branches Modes, from airport_volume

airport_volume1 = airport_volume.drop(columns=['Total_ChargedWeight', 'Total_Shipments', 'Unique_Products', 'Num_OD_Branches', 'Modes'])

In [11]:
airport_volume1.head(30)

Unnamed: 0,Origin Airport City,Destination Airport City,Total_Weight
0,AGARTALA,AGARTALA,135.29476
1,AGARTALA,AGRA,0.31552
2,AGARTALA,AHMEDABAD,1.45456
3,AGARTALA,BANGALORE,58.88232
4,AGARTALA,BARODA,0.24772
5,AGARTALA,BHUBANESHWAR,0.89832
6,AGARTALA,CHANDIGARH,3.0316
7,AGARTALA,CHENNAI,1.4912
8,AGARTALA,COCHIN,0.76208
9,AGARTALA,COIMBATORE,0.63068


In [12]:
# prompt: rename the column total weight as weight

airport_volume1 = airport_volume1.rename(columns={'Total_Weight': 'Weight'})
airport_volume1.head(30)

Unnamed: 0,Origin Airport City,Destination Airport City,Weight
0,AGARTALA,AGARTALA,135.29476
1,AGARTALA,AGRA,0.31552
2,AGARTALA,AHMEDABAD,1.45456
3,AGARTALA,BANGALORE,58.88232
4,AGARTALA,BARODA,0.24772
5,AGARTALA,BHUBANESHWAR,0.89832
6,AGARTALA,CHANDIGARH,3.0316
7,AGARTALA,CHENNAI,1.4912
8,AGARTALA,COCHIN,0.76208
9,AGARTALA,COIMBATORE,0.63068


In [None]:
no_flight_df.head(40)

Unnamed: 0,Origin Branch,Origin Hub,Origin Hub City,Origin Airport City,Destination Airport City,Destination Hub City,Destination Hub,Destination Branch,New Product GCP,Weights,Conno,Flight_Mode,Product_Path
0,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,AHMEDABAD,AHMEDABAD,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,EP,266.57376,49.0,NO_FLIGHT,ALT_REQUIRED
1,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,AHMEDABAD,AHMEDABAD,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,ES,62.72,31.0,NO_FLIGHT,ALT_REQUIRED
2,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,BARODA,BARODA,A17-BARODA SCS APEX,A02-BARODA BRANCH,EP,0.48,2.0,NO_FLIGHT,ALT_REQUIRED
3,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,BARODA,BARODA,A17-BARODA SCS APEX,A02-BARODA BRANCH,ES,1.614,85.0,NO_FLIGHT,ALT_REQUIRED
4,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,AHMEDABAD,AHMEDABAD,A01-AHMEDABAD APEX,A03-RAJKOT BRANCH,EP,1428.48,128.0,NO_FLIGHT,ALT_REQUIRED
5,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,AHMEDABAD,AHMEDABAD,A01-AHMEDABAD APEX,A03-RAJKOT BRANCH,ES,3.6136,116.0,NO_FLIGHT,ALT_REQUIRED
6,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,SURAT,SURAT,A04-SURAT APEX,A04-SURAT APEX,EP,4.6,5.0,NO_FLIGHT,ALT_REQUIRED
7,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,SURAT,SURAT,A04-SURAT APEX,A04-SURAT APEX,ES,0.548,25.0,NO_FLIGHT,ALT_REQUIRED
8,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,SURAT,SURAT,A04-SURAT APEX,A05-VAPI BRANCH,ES,4.30208,94.0,NO_FLIGHT,ALT_REQUIRED
9,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,SURAT,SURAT,A04-SURAT APEX,A07-ANKLESHWAR BRANCH,ES,0.42552,50.0,NO_FLIGHT,ALT_REQUIRED


In [13]:
# prompt: make a dataframe which Contains Origin Branch, Destination Branch, Origin Airport City, Destination Airport City, Weights from no_flight_df

# Make a dataframe with specified columns from no_flight_df
no_flight_subset_df = no_flight_df[[
    'Origin Branch',
    'Destination Branch',
    'Origin Airport City',
    'Destination Airport City',
    'Weights'
]].copy()

no_flight_subset_df.head(30)

Unnamed: 0,Origin Branch,Destination Branch,Origin Airport City,Destination Airport City,Weights
0,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,266.57376
1,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,62.72
2,A01-AHMEDABAD APEX,A02-BARODA BRANCH,AHMEDABAD,BARODA,0.48
3,A01-AHMEDABAD APEX,A02-BARODA BRANCH,AHMEDABAD,BARODA,1.614
4,A01-AHMEDABAD APEX,A03-RAJKOT BRANCH,AHMEDABAD,AHMEDABAD,1428.48
5,A01-AHMEDABAD APEX,A03-RAJKOT BRANCH,AHMEDABAD,AHMEDABAD,3.6136
6,A01-AHMEDABAD APEX,A04-SURAT APEX,AHMEDABAD,SURAT,4.6
7,A01-AHMEDABAD APEX,A04-SURAT APEX,AHMEDABAD,SURAT,0.548
8,A01-AHMEDABAD APEX,A05-VAPI BRANCH,AHMEDABAD,SURAT,4.30208
9,A01-AHMEDABAD APEX,A07-ANKLESHWAR BRANCH,AHMEDABAD,SURAT,0.42552


In [15]:
# prompt: rename the Weights column as Volume in no_flights_df

no_flight_subset_df = no_flight_subset_df.rename(columns={'Weights': 'Volume'})

In [16]:
no_flight_subset_df.head(40)

Unnamed: 0,Origin Branch,Destination Branch,Origin Airport City,Destination Airport City,Volume
0,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,266.57376
1,A01-AHMEDABAD APEX,A01-AHMEDABAD APEX,AHMEDABAD,AHMEDABAD,62.72
2,A01-AHMEDABAD APEX,A02-BARODA BRANCH,AHMEDABAD,BARODA,0.48
3,A01-AHMEDABAD APEX,A02-BARODA BRANCH,AHMEDABAD,BARODA,1.614
4,A01-AHMEDABAD APEX,A03-RAJKOT BRANCH,AHMEDABAD,AHMEDABAD,1428.48
5,A01-AHMEDABAD APEX,A03-RAJKOT BRANCH,AHMEDABAD,AHMEDABAD,3.6136
6,A01-AHMEDABAD APEX,A04-SURAT APEX,AHMEDABAD,SURAT,4.6
7,A01-AHMEDABAD APEX,A04-SURAT APEX,AHMEDABAD,SURAT,0.548
8,A01-AHMEDABAD APEX,A05-VAPI BRANCH,AHMEDABAD,SURAT,4.30208
9,A01-AHMEDABAD APEX,A07-ANKLESHWAR BRANCH,AHMEDABAD,SURAT,0.42552


In [22]:
import pandas as pd

# === CONFIG ===
AIRPORT_BUFFER_HOURS = 4

# === STEP 1: LOAD DATA ===
air_routes = pd.read_csv("/content/flight_connection.csv")  # Columns: Aeroplane category, origin_city, destination_city
road_matrix = pd.read_csv("/content/city_distances_google.csv")  # Columns: Origin, Destination, Distance_km, Duration_hr
shipment_data = airport_volume1  # Contains Origin Airport City, Destination Airport City, Weight
city_coordinates = pd.read_csv("/content/city_coordinate.csv")  # Columns: City, Latitude, Longitude
no_flight_pairs = no_flight_subset_df # Contains Origin Branch, Destination Branch, Origin Airport City, Destination Airport City, Volume

# === STEP 2: DEFINE FLIGHT TIME ===
# If you have flight duration CSV, load from that instead
flight_times = pd.read_csv('/content/flight_time_matrix.csv')

# === STEP 3: UTILITY FUNCTIONS ===
air_routes_set = set(zip(air_routes['origin_city'], air_routes['destination_city']))

def get_road_time(origin, destination):
    match = road_matrix[(road_matrix['origin'] == origin) & (road_matrix['destination'] == destination)]
    if not match.empty:
        return float(match.iloc[0]['duration_hr'])
    return None

def get_flight_time(origin, destination):
    # Assuming flight_times DataFrame has 'Origin', 'Destination', and 'Estimated_Flight_Time_Minutes' columns
    match = flight_times[(flight_times['Origin'] == origin) & (flight_times['Destination'] == destination)]
    if not match.empty:
        return (float(match.iloc[0]['Estimated_Flight_Time_Minutes']) / 60) + AIRPORT_BUFFER_HOURS
    return None


def get_existing_volume(o_air, d_air):
    subset = shipment_data[
        (shipment_data['Origin Airport City'] == o_air) &
        (shipment_data['Destination Airport City'] == d_air)
    ]
    # Assuming the 'Weight' column in shipment_data is the relevant volume
    return subset['Weight'].sum()


# === STEP 4: SUGGEST INDIRECT PATHS ===
suggestions = []

for _, row in no_flight_pairs.iterrows():
    o_air = row['Origin Airport City']
    d_air = row['Destination Airport City']
    origin_branch = row['Origin Branch']
    dest_branch = row['Destination Branch']
    new_vol = row['Volume']

    road_time = get_road_time(o_air, d_air)

    for mid_air in city_coordinates['City'].unique():
        if mid_air in [o_air, d_air]:
            continue

        if ((o_air, mid_air) in air_routes_set) and ((mid_air, d_air) in air_routes_set):
            ft1 = get_flight_time(o_air, mid_air)
            ft2 = get_flight_time(mid_air, d_air)

            if ft1 is not None and ft2 is not None:
                total_time = ft1 + ft2
                total_volume = get_existing_volume(o_air, mid_air) + get_existing_volume(mid_air, d_air) + new_vol
                time_saved = round(road_time - total_time, 2) if road_time is not None else "N/A"

                suggestions.append({
                    "Origin Branch": origin_branch,
                    "Destination Branch": dest_branch,
                    "Mid Airport": mid_air,
                    "Suggested Route": f"{o_air} \u2192 {mid_air} \u2192 {d_air}",
                    "Updated Volume": total_volume,
                    "Time Saved (hrs)": time_saved
                })

# === STEP 5: EXPORT RESULTS ===
result_df = pd.DataFrame(suggestions)
result_df = result_df.sort_values(by="Updated Volume", ascending=False)
result_df.to_csv("indirect_air_path_suggestions.csv", index=False)
print("\u2705 Suggestion file saved: indirect_air_path_suggestions.csv")

KeyboardInterrupt: 

In [23]:
import pandas as pd

# === CONFIG ===
AIRPORT_BUFFER_HOURS = 4

# === STEP 1: LOAD DATA ===
air_routes = pd.read_csv("/content/flight_connection.csv")
road_matrix = pd.read_csv("/content/city_distances_google.csv")
shipment_data = airport_volume1  # Already loaded
city_coordinates = pd.read_csv("/content/city_coordinate.csv")
no_flight_pairs = no_flight_subset_df  # Already subset
flight_times = pd.read_csv('/content/flight_time_matrix.csv')

# === STEP 2: PREPROCESS ===
# Set for fast lookup of valid air routes
air_routes_set = set(zip(air_routes['origin_city'], air_routes['destination_city']))

# Create flight time dictionary for fast lookup
flight_time_dict = {
    (row['Origin'], row['Destination']): (row['Estimated_Flight_Time_Minutes'] / 60 + AIRPORT_BUFFER_HOURS)
    for _, row in flight_times.iterrows()
}

# Create volume dictionary from shipment data
volume_dict = shipment_data.groupby(
    ['Origin Airport City', 'Destination Airport City']
)['Weight'].sum().to_dict()

# Road time lookup dict
road_time_dict = {
    (row['origin'], row['destination']): row['duration_hr']
    for _, row in road_matrix.iterrows()
}

# === STEP 3: LOGIC ===
suggestions = []
mid_airports = set(city_coordinates['City'].unique())

for _, row in no_flight_pairs.iterrows():
    o_air = row['Origin Airport City']
    d_air = row['Destination Airport City']
    origin_branch = row['Origin Branch']
    dest_branch = row['Destination Branch']
    new_vol = row['Volume']
    road_time = road_time_dict.get((o_air, d_air), None)

    for mid_air in mid_airports:
        if mid_air in {o_air, d_air}:
            continue

        # Check if both flight legs exist
        if ((o_air, mid_air) in air_routes_set) and ((mid_air, d_air) in air_routes_set):
            ft1 = flight_time_dict.get((o_air, mid_air))
            ft2 = flight_time_dict.get((mid_air, d_air))

            if ft1 is not None and ft2 is not None:
                total_time = ft1 + ft2
                vol1 = volume_dict.get((o_air, mid_air), 0)
                vol2 = volume_dict.get((mid_air, d_air), 0)
                total_volume = vol1 + vol2 + new_vol
                time_saved = round(road_time - total_time, 2) if road_time is not None else "N/A"

                suggestions.append({
                    "Origin Branch": origin_branch,
                    "Destination Branch": dest_branch,
                    "Mid Airport": mid_air,
                    "Suggested Route": f"{o_air} → {mid_air} → {d_air}",
                    "Updated Volume": total_volume,
                    "Time Saved (hrs)": time_saved
                })

# === STEP 4: EXPORT RESULTS ===
result_df = pd.DataFrame(suggestions)
result_df = result_df.sort_values(by="Updated Volume", ascending=False)
result_df.to_csv("indirect_air_path_suggestions.csv", index=False)
print("✅ Suggestion file saved: indirect_air_path_suggestions.csv")

✅ Suggestion file saved: indirect_air_path_suggestions.csv


In [24]:
import pandas as pd

# === CONFIG ===
AIRPORT_BUFFER_HOURS = 4

# === LOAD DATA ===
air_routes = pd.read_csv("/content/flight_connection.csv")
road_matrix = pd.read_csv("/content/city_distances_google.csv")
shipment_data = airport_volume1  # Already loaded
city_coordinates = pd.read_csv("/content/city_coordinate.csv")
no_flight_pairs = no_flight_subset_df  # Already subset
flight_times = pd.read_csv('/content/flight_time_matrix.csv')

# === PREPROCESS LOOKUPS ===
air_routes_set = set(zip(air_routes['origin_city'], air_routes['destination_city']))

flight_time_dict = {
    (row['Origin'], row['Destination']): (row['Estimated_Flight_Time_Minutes'] / 60 + AIRPORT_BUFFER_HOURS)
    for _, row in flight_times.iterrows()
}

volume_dict = shipment_data.groupby(
    ['Origin Airport City', 'Destination Airport City']
)['Weight'].sum().to_dict()

road_time_dict = {
    (row['origin'], row['destination']): row['duration_hr']
    for _, row in road_matrix.iterrows()
}

mid_airports = set(city_coordinates['City'].unique())

# === FIND INDIRECT PATHS ===
suggestions = []

for _, row in no_flight_pairs.iterrows():
    o_air = row['Origin Airport City']
    d_air = row['Destination Airport City']
    origin_branch = row['Origin Branch']
    dest_branch = row['Destination Branch']
    new_vol = row['Volume']

    # Skip same origin & destination
    if o_air == d_air or origin_branch == dest_branch:
        continue

    # Get road time between the airport cities
    road_time = road_time_dict.get((o_air, d_air), None)
    if road_time is None:
        continue

    for mid_air in mid_airports:
        if mid_air in {o_air, d_air}:
            continue

        if ((o_air, mid_air) in air_routes_set) and ((mid_air, d_air) in air_routes_set):
            ft1 = flight_time_dict.get((o_air, mid_air))
            ft2 = flight_time_dict.get((mid_air, d_air))

            if ft1 is not None and ft2 is not None:
                total_time = ft1 + ft2

                # ✅ Skip if total air time is more than road time
                if total_time >= road_time:
                    continue

                vol1 = volume_dict.get((o_air, mid_air), 0)
                vol2 = volume_dict.get((mid_air, d_air), 0)
                total_volume = vol1 + vol2 + new_vol
                time_saved = round(road_time - total_time, 2)

                suggestions.append({
                    "Origin Branch": origin_branch,
                    "Destination Branch": dest_branch,
                    "Mid Airport": mid_air,
                    "Suggested Route": f"{o_air} → {mid_air} → {d_air}",
                    "Updated Volume": total_volume,
                    "Time Saved (hrs)": time_saved
                })

# === EXPORT ===
result_df = pd.DataFrame(suggestions)
result_df = result_df.sort_values(by="Updated Volume", ascending=False)
result_df.to_csv("indirect_air_path_suggestions.csv", index=False)
print("✅ Cleaned suggestion file saved: indirect_air_path_suggestions.csv")

✅ Cleaned suggestion file saved: indirect_air_path_suggestions.csv


In [27]:
import pandas as pd

# === CONFIG ===
AIRPORT_BUFFER_HOURS = 4
W_VOLUME = 0.3
W_TIME = 0.4
W_DISTANCE = 0.3

# === LOAD DATA ===
air_routes = pd.read_csv("/content/flight_connection.csv")  # origin_city, destination_city
road_matrix = pd.read_csv("/content/city_distances_google.csv")  # origin, destination, distance_km, duration_hr
shipment_data = airport_volume1  # Origin Airport City, Destination Airport City, Weight
city_coordinates = pd.read_csv("/content/city_coordinate.csv")  # City
flight_times = pd.read_csv("/content/flight_time_matrix.csv")  # Origin, Destination, Estimated_Flight_Time_Minutes
no_flight_pairs = no_flight_subset_df  # Origin Branch, Destination Branch, Origin Airport City, Destination Airport City, Volume

# === PREP ===
air_routes_set = set(zip(air_routes['origin_city'], air_routes['destination_city']))
city_list = city_coordinates['City'].unique()

# Normalize road matrix keys for fast lookup
road_time_map = {
    (row['origin'], row['destination']): row['duration_hr']
    for _, row in road_matrix.iterrows()
}
road_dist_map = {
    (row['origin'], row['destination']): row['distance_km']
    for _, row in road_matrix.iterrows()
}

# Normalize flight time for lookup
flight_time_map = {
    (row['Origin'], row['Destination']): (row['Estimated_Flight_Time_Minutes'] / 60) + AIRPORT_BUFFER_HOURS
    for _, row in flight_times.iterrows()
}

# Pre-aggregate shipment volume
vol_map = shipment_data.groupby(
    ['Origin Airport City', 'Destination Airport City']
)['Weight'].sum().to_dict()

# === SUGGEST INDIRECT PATHS ===
suggestions = []

for _, row in no_flight_pairs.iterrows():
    o_air, d_air = row['Origin Airport City'], row['Destination Airport City']
    origin_branch, dest_branch, new_vol = row['Origin Branch'], row['Destination Branch'], row['Volume']

    # Skip same-city paths
    if o_air == d_air:
        continue

    road_time = road_time_map.get((o_air, d_air))
    road_dist = road_dist_map.get((o_air, d_air))

    for mid_air in city_list:
        if mid_air in [o_air, d_air]:
            continue

        if ((o_air, mid_air) in air_routes_set) and ((mid_air, d_air) in air_routes_set):

            ft1 = flight_time_map.get((o_air, mid_air))
            ft2 = flight_time_map.get((mid_air, d_air))

            if ft1 is None or ft2 is None:
                continue

            total_air_time = ft1 + ft2
            if road_time is not None and total_air_time >= road_time:
                continue  # Skip if flight path is slower

            # Volumes
            vol1 = vol_map.get((o_air, mid_air), 0)
            vol2 = vol_map.get((mid_air, d_air), 0)
            total_volume = vol1 + vol2 + new_vol

            # Time & Distance savings
            time_saved = round(road_time - total_air_time, 2) if road_time else None

            dist1 = road_dist_map.get((o_air, mid_air), 0)
            dist2 = road_dist_map.get((mid_air, d_air), 0)
            total_air_dist = dist1 + dist2
            dist_saved = round(road_dist - total_air_dist, 2) if road_dist else None

            # Normalize for scoring
            vol_score = total_volume / 1_000_000  # Scale down
            time_score = (time_saved / road_time) if road_time and time_saved else 0
            dist_score = (dist_saved / road_dist) if road_dist and dist_saved else 0

            composite_score = round(
                (W_VOLUME * vol_score) +
                (W_TIME * time_score) +
                (W_DISTANCE * dist_score), 4
            )

            suggestions.append({
                "Origin Branch": origin_branch,
                "Destination Branch": dest_branch,
                "Mid Airport": mid_air,
                "Suggested Route": f"{o_air} → {mid_air} → {d_air}",
                "Updated Volume": round(total_volume, 2),
                "Time Saved (hrs)": time_saved,
                "Distance Saved (km)": dist_saved,
                "Composite Score": composite_score
            })

# === EXPORT ===
result_df = pd.DataFrame(suggestions)
result_df = result_df.sort_values(by="Composite Score", ascending=False)
result_df.to_csv("indirect_air_path_suggestions_new.csv", index=False)
print("✅ Final suggestions saved to 'indirect_air_path_suggestions.csv'")

✅ Final suggestions saved to 'indirect_air_path_suggestions.csv'


In [30]:
import pandas as pd

# === CONFIGURATION ===
AIRPORT_BUFFER_HOURS = 4
VOLUME_WEIGHT = 0.4
TIME_SAVING_WEIGHT = 0.5
DIST_SAVING_WEIGHT = 0.1

# === LOAD DATA ===
air_routes = pd.read_csv("/content/flight_connection.csv")  # origin_city, destination_city
road_matrix = pd.read_csv("/content/city_distances_google.csv")  # origin, destination, distance_km, duration_hr
shipment_data = airport_volume1  # Origin Airport City, Destination Airport City, Weight
flight_times = pd.read_csv('/content/flight_time_matrix.csv')  # Origin, Destination, Estimated_Flight_Time_Minutes
no_flight_pairs = no_flight_subset_df  # Origin Branch, Destination Branch, Origin Airport City, Destination Hub City, Volume

# === PREPROCESSING ===
air_routes_set = set(zip(air_routes['origin_city'], air_routes['destination_city']))

# Flight time mapping (in hours, buffer included)
flight_time_map = {
    (row['Origin'], row['Destination']): (row['Estimated_Flight_Time_Minutes'] / 60) + AIRPORT_BUFFER_HOURS
    for _, row in flight_times.iterrows()
}

# Road time and distance mapping
road_time_map = {
    (row['origin'], row['destination']): float(row['duration_hr'])
    for _, row in road_matrix.iterrows()
}
road_dist_map = {
    (row['origin'], row['destination']): float(row['distance_km'])
    for _, row in road_matrix.iterrows()
}

def get_existing_volume(o_air, d_air):
    subset = shipment_data[
        (shipment_data['Origin Airport City'] == o_air) &
        (shipment_data['Destination Airport City'] == d_air)
    ]
    return subset['Weight'].sum()

# === MAIN COMPUTATION ===
suggestions = []

all_cities = set(road_matrix['origin']) | set(road_matrix['destination'])

for _, row in no_flight_pairs.iterrows():
    o_air = row['Origin Airport City']
    d_air = row['Destination Airport City']  # Fixed from d_hub
    origin_branch = row['Origin Branch']
    dest_branch = row['Destination Branch']
    new_vol = row['Volume']

    # Direct road route time (benchmark)
    road_key = (o_air, d_air)
    road_only_time = road_time_map.get(road_key)
    road_only_dist = road_dist_map.get(road_key)

    if road_only_time is None:
        continue

    for mid_air in all_cities:
        if mid_air in [o_air, d_air]:
            continue

        if (o_air, mid_air) not in air_routes_set:
            continue

        # Now use mid_air to d_air
        road_leg_key = (mid_air, d_air)
        road_leg_time = road_time_map.get(road_leg_key)
        road_leg_dist = road_dist_map.get(road_leg_key)

        if road_leg_time is None:
            continue

        flight_leg_time = flight_time_map.get((o_air, mid_air))
        if flight_leg_time is None:
            continue

        total_time = flight_leg_time + road_leg_time
        if total_time >= road_only_time:
            continue

        existing_volume = get_existing_volume(o_air, mid_air)
        total_volume = existing_volume + new_vol

        time_saved = round(road_only_time - total_time, 2)
        dist_saved = round(road_only_dist - road_leg_dist, 2) if road_leg_dist and road_only_dist else 0
        score = (
            VOLUME_WEIGHT * total_volume +
            TIME_SAVING_WEIGHT * time_saved +
            DIST_SAVING_WEIGHT * dist_saved
        )

        suggestions.append({
            "Origin Branch": origin_branch,
            "Destination Branch": dest_branch,
            "Middle Airport": mid_air,
            "Suggested Route": f"{o_air} → {mid_air} (✈️) → {d_air} (🚛)",
            "Air Time (hr)": round(flight_leg_time, 2),
            "Road Time (hr)": round(road_leg_time, 2),
            "Total Time (hr)": round(total_time, 2),
            "Road Only Time (hr)": round(road_only_time, 2),
            "Time Saved (hr)": time_saved,
            "Volume Used (kg)": round(total_volume, 2),
            "Composite Score": round(score, 2)
        })

# === SAVE RESULTS ===
result_df = pd.DataFrame(suggestions)
result_df = result_df.sort_values(by="Composite Score", ascending=False)
result_df.to_csv("indirect_path_single_flight.csv", index=False)
print("✅ Suggestions saved to: indirect_path_single_flight.csv")

✅ Suggestions saved to: indirect_path_single_flight.csv


In [31]:
airport_volume.to_csv('airport_to_airport_volume.csv')

In [32]:
no_flight_subset_df.to_csv('no_flight_OD_pairs.csv')

In [34]:
import pandas as pd

# === CONFIGURATION ===
AIRPORT_BUFFER_HOURS = 4
VOLUME_WEIGHT = 0.4
TIME_SAVING_WEIGHT = 0.5
DIST_SAVING_WEIGHT = 0.1

# === LOAD DATA ===
air_routes = pd.read_csv("/content/flight_connection.csv")  # origin_city, destination_city
road_matrix = pd.read_csv("/content/city_distances_google.csv")  # origin, destination, distance_km, duration_hr
shipment_data = airport_volume1  # Origin Airport City, Destination Airport City, Weight
flight_times = pd.read_csv('/content/flight_time_matrix.csv')  # Origin, Destination, Estimated_Flight_Time_Minutes
no_flight_pairs = no_flight_subset_df  # Origin Branch, Destination Branch, Origin Airport City, Destination Hub City, Volume

# === PREPROCESSING ===
air_routes_set = set(zip(air_routes['origin_city'], air_routes['destination_city']))

# Flight time mapping (in hours, buffer included)
flight_time_map = {
    (row['Origin'], row['Destination']): (row['Estimated_Flight_Time_Minutes'] / 60) + AIRPORT_BUFFER_HOURS
    for _, row in flight_times.iterrows()
}

# Road time and distance mapping
road_time_map = {
    (row['origin'], row['destination']): float(row['duration_hr'])
    for _, row in road_matrix.iterrows()
}
road_dist_map = {
    (row['origin'], row['destination']): float(row['distance_km'])
    for _, row in road_matrix.iterrows()
}

def get_existing_volume(o_air, d_air):
    subset = shipment_data[
        (shipment_data['Origin Airport City'] == o_air) &
        (shipment_data['Destination Airport City'] == d_air)
    ]
    return subset['Weight'].sum()

# === MAIN COMPUTATION ===
suggestions = []

all_cities = set(road_matrix['origin']) | set(road_matrix['destination'])

for _, row in no_flight_pairs.iterrows():
    o_air = row['Origin Airport City']
    d_air = row['Destination Airport City']  # Fixed from d_hub
    origin_branch = row['Origin Branch']
    dest_branch = row['Destination Branch']
    new_vol = row['Volume']

    # Direct road route time (benchmark)
    road_key = (o_air, d_air)
    road_only_time = road_time_map.get(road_key)
    road_only_dist = road_dist_map.get(road_key)

    if road_only_time is None:
        continue

    for mid_air in all_cities:
        if mid_air in [o_air, d_air]:
            continue

        if (o_air, mid_air) not in air_routes_set:
            continue

        # Now use mid_air to d_air
        road_leg_key = (mid_air, d_air)
        road_leg_time = road_time_map.get(road_leg_key)
        road_leg_dist = road_dist_map.get(road_leg_key)

        if road_leg_time is None:
            continue

        flight_leg_time = flight_time_map.get((o_air, mid_air))
        if flight_leg_time is None:
            continue

        total_time = flight_leg_time + road_leg_time
        if total_time >= road_only_time:
            continue

        existing_volume = get_existing_volume(o_air, mid_air)
        total_volume = existing_volume + new_vol

        time_saved = round(road_only_time - total_time, 2)
        dist_saved = round(road_only_dist - road_leg_dist, 2) if road_leg_dist and road_only_dist else 0
        score = (
            VOLUME_WEIGHT * total_volume +
            TIME_SAVING_WEIGHT * time_saved +
            DIST_SAVING_WEIGHT * dist_saved
        )

        suggestions.append({
            "Origin Branch": origin_branch,
            "Destination Branch": dest_branch,
            "Middle Airport": mid_air,
            "Suggested Route": f"{o_air} → {mid_air} (✈️) → {d_air} (🚛)",
            "Air Time (hr)": round(flight_leg_time, 2),
            "Road Time (hr)": round(road_leg_time, 2),
            "Total Time (hr)": round(total_time, 2),
            "Road Only Time (hr)": round(road_only_time, 2),
            "Time Saved (hr)": time_saved,
            "Volume Used (kg)": round(total_volume, 2),
            "Composite Score": round(score, 2)
        })

# === SAVE RESULTS ===
result_df = pd.DataFrame(suggestions)
result_df = result_df.sort_values(by="Composite Score", ascending=False)
result_df.to_csv("indirect_path_single_flight1.csv", index=False)
print("✅ Suggestions saved to: indirect_path_single_flight.csv")

✅ Suggestions saved to: indirect_path_single_flight.csv


In [None]:
# Diagnostic Version - Find out what's wrong with the data

import pandas as pd
import numpy as np
from collections import defaultdict
import warnings
import os
warnings.filterwarnings('ignore')

# ===== STEP 1: Set Your File Paths =====
FILE_PATHS = {
    'shipment_data': '/content/shipment_data.csv',
    'airport_volume': '/content/airport_to_airport_volume.csv',
    'air_routes': '/content/air_routes.csv',
    'road_matrix': '/content/road_matrix.csv',
    'no_flight_pairs': '/content/no_flight_OD_pairs.csv'
}

print("🔍 DIAGNOSTIC MODE - Let's find out what's wrong!")
print("="*60)

# ===== STEP 2: Load and Examine Each File =====
def examine_file(file_path, file_name):
    print(f"\n📋 EXAMINING: {file_name}")
    print("-" * 40)

    if not os.path.exists(file_path):
        print(f"❌ FILE NOT FOUND: {file_path}")
        return None

    try:
        df = pd.read_csv(file_path)
        print(f"✅ Shape: {df.shape}")
        print(f"📊 Columns: {list(df.columns)}")
        print(f"🔢 Data types:\n{df.dtypes}")

        # Show first few rows
        print(f"\n📄 First 3 rows:")
        print(df.head(3).to_string())

        # Check for null values
        null_counts = df.isnull().sum()
        if null_counts.sum() > 0:
            print(f"\n⚠️  Null values found:")
            print(null_counts[null_counts > 0])
        else:
            print(f"\n✅ No null values")

        return df

    except Exception as e:
        print(f"❌ Error loading file: {e}")
        return None

# Load all files
dfs = {}
for key, path in FILE_PATHS.items():
    dfs[key] = examine_file(path, key)

print("\n" + "="*60)
print("🔍 DETAILED ANALYSIS")
print("="*60)

# ===== STEP 3: Analyze Air Routes =====
if dfs['air_routes'] is not None:
    print(f"\n🛩️  AIR ROUTES ANALYSIS:")
    air_df = dfs['air_routes']

    # Try to identify origin/destination columns
    possible_origin_cols = [col for col in air_df.columns if 'origin' in col.lower()]
    possible_dest_cols = [col for col in air_df.columns if any(word in col.lower() for word in ['destination', 'dest', 'to'])]

    print(f"Possible origin columns: {possible_origin_cols}")
    print(f"Possible destination columns: {possible_dest_cols}")

    if possible_origin_cols and possible_dest_cols:
        origin_col = possible_origin_cols[0]
        dest_col = possible_dest_cols[0]

        print(f"Using: {origin_col} -> {dest_col}")

        # Show unique cities
        origin_cities = air_df[origin_col].unique()
        dest_cities = air_df[dest_col].unique()

        print(f"Origin cities ({len(origin_cities)}): {origin_cities[:10]}")
        print(f"Destination cities ({len(dest_cities)}): {dest_cities[:10]}")

        # Show sample routes
        print(f"\nSample air routes:")
        for i in range(min(5, len(air_df))):
            print(f"  {air_df.iloc[i][origin_col]} -> {air_df.iloc[i][dest_col]}")

# ===== STEP 4: Analyze Road Matrix =====
if dfs['road_matrix'] is not None:
    print(f"\n🛣️  ROAD MATRIX ANALYSIS:")
    road_df = dfs['road_matrix']

    # Check required columns
    required_road_cols = ['Origin', 'Destination', 'Distance_km', 'Duration_hr']
    missing_road_cols = [col for col in required_road_cols if col not in road_df.columns]

    if missing_road_cols:
        print(f"❌ Missing required columns: {missing_road_cols}")
        print(f"Available columns: {list(road_df.columns)}")

        # Try to find similar columns
        for missing_col in missing_road_cols:
            similar_cols = [col for col in road_df.columns if missing_col.lower() in col.lower()]
            if similar_cols:
                print(f"Similar to '{missing_col}': {similar_cols}")
    else:
        print(f"✅ All required columns present")

        # Show unique cities
        origin_cities = road_df['Origin'].unique()
        dest_cities = road_df['Destination'].unique()

        print(f"Road origin cities ({len(origin_cities)}): {origin_cities[:10]}")
        print(f"Road destination cities ({len(dest_cities)}): {dest_cities[:10]}")

# ===== STEP 5: Analyze Airport Volume =====
if dfs['airport_volume'] is not None:
    print(f"\n✈️  AIRPORT VOLUME ANALYSIS:")
    volume_df = dfs['airport_volume']

    # Check required columns
    required_volume_cols = ['Origin Airport City', 'Destination Airport City', 'Total_Weight']
    missing_volume_cols = [col for col in required_volume_cols if col not in volume_df.columns]

    if missing_volume_cols:
        print(f"❌ Missing required columns: {missing_volume_cols}")
        print(f"Available columns: {list(volume_df.columns)}")
    else:
        print(f"✅ All required columns present")

        # Show statistics
        print(f"Total weight stats:")
        print(volume_df['Total_Weight'].describe())

# ===== STEP 6: Analyze No Flight Pairs =====
if dfs['no_flight_pairs'] is not None:
    print(f"\n🚫 NO FLIGHT PAIRS ANALYSIS:")
    no_flight_df = dfs['no_flight_pairs']

    # Check for same branch pairs
    same_branch = no_flight_df[no_flight_df['Origin Branch'] == no_flight_df['Destination Branch']]
    print(f"Same branch pairs: {len(same_branch)} (will be filtered out)")
    print(f"Unique OD pairs after filtering: {len(no_flight_df) - len(same_branch)}")

    # Show sample pairs
    different_branch = no_flight_df[no_flight_df['Origin Branch'] != no_flight_df['Destination Branch']]
    if len(different_branch) > 0:
        print(f"\nSample no-flight pairs:")
        sample_cols = ['Origin Branch', 'Destination Branch', 'Origin Airport City', 'Destination Airport City']
        print(different_branch[sample_cols].head(3).to_string(index=False))

# ===== STEP 7: Cross-Reference Analysis =====
print(f"\n🔗 CROSS-REFERENCE ANALYSIS:")
print("-" * 40)

if dfs['air_routes'] is not None and dfs['no_flight_pairs'] is not None:
    air_df = dfs['air_routes']
    no_flight_df = dfs['no_flight_pairs']

    # Get origin airports from no_flight pairs
    origin_airports = set(no_flight_df['Origin Airport City'].unique())
    print(f"Origin airports in no_flight_pairs: {len(origin_airports)}")
    print(f"Sample: {list(origin_airports)[:5]}")

    # Check if these airports exist in air routes
    if possible_origin_cols:
        air_origins = set(air_df[possible_origin_cols[0]].unique())
        common_origins = origin_airports.intersection(air_origins)
        print(f"Origin airports also in air_routes: {len(common_origins)}")
        print(f"Missing from air_routes: {origin_airports - air_origins}")

if dfs['road_matrix'] is not None and dfs['no_flight_pairs'] is not None:
    road_df = dfs['road_matrix']
    no_flight_df = dfs['no_flight_pairs']

    if 'Origin' in road_df.columns:
        # Check branches in road matrix
        road_origins = set(road_df['Origin'].unique())
        no_flight_branches = set(no_flight_df['Origin Branch'].unique())

        common_branches = no_flight_branches.intersection(road_origins)
        print(f"\nBranches in both no_flight and road_matrix: {len(common_branches)}")
        print(f"Missing branches from road_matrix: {no_flight_branches - road_origins}")

# ===== STEP 8: Recommendations =====
print(f"\n💡 RECOMMENDATIONS:")
print("=" * 40)

recommendations = []

if dfs['air_routes'] is None:
    recommendations.append("❌ Fix air_routes file path")
elif not possible_origin_cols or not possible_dest_cols:
    recommendations.append("❌ Air routes file needs proper origin/destination columns")

if dfs['road_matrix'] is None:
    recommendations.append("❌ Fix road_matrix file path")
elif 'Origin' not in dfs['road_matrix'].columns:
    recommendations.append("❌ Road matrix needs 'Origin', 'Destination', 'Distance_km', 'Duration_hr' columns")

if dfs['airport_volume'] is None:
    recommendations.append("❌ Fix airport_volume file path")
elif 'Total_Weight' not in dfs['airport_volume'].columns:
    recommendations.append("❌ Airport volume needs 'Origin Airport City', 'Destination Airport City', 'Total_Weight' columns")

if len(recommendations) == 0:
    recommendations.append("✅ All files loaded correctly - issue might be in data content or city name mismatches")
    recommendations.append("🔍 Check if city names match exactly between files (case sensitive)")
    recommendations.append("🔍 Verify that origin airports in no_flight_pairs have outgoing air routes")

for i, rec in enumerate(recommendations, 1):
    print(f"{i}. {rec}")

print(f"\n🎯 Next steps:")
print("1. Fix any file/column issues identified above")
print("2. Check for exact city name matches between files")
print("3. Verify data quality and completeness")