In [2]:
### TEST PLOTS
import ast
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import pandas as pd
DIVIDER = "----------------"

In [3]:
df = pd.read_csv('result.csv')
print(df.keys()) 

Index(['pnr', 'pnr_score', 'canclled_flight', 'canclled_class',
       'canclled_flight_departure', 'canclled_flight_arrival', 'canclled_src',
       'canclled_dst', 'allocated_src', 'allocated_dst', 'allocated_flights',
       'allocated_flights_departure', 'allocated_flights_arrival',
       'allocated_classes', 'allocated_flights_score'],
      dtype='object')


In [4]:
## STATIC REPOPRT
mean_score = df["pnr_score"].mean()
std_dev_score = df["pnr_score"].std()
print("PNR Score Stats")
print("Mean PNR Score: ", mean_score)
print("Std Dev. PNR Score: ", std_dev_score)
print("Max PNR Score: ", df["pnr_score"].max())
print("Min PNR Score: ", df["pnr_score"].min())
print(DIVIDER)

mean_score = df["allocated_flights_score"].mean()
std_dev_score = df["allocated_flights_score"].std()
print("PNR-FLIGHT Score Stats")
print("Mean PNR-FLIGHT Score: ", mean_score)
print("Std Dev. PNR-FLIGHT Score: ", std_dev_score)
print("Max PNR-FLIGHT Score: ", df["allocated_flights_score"].max())
print("Min PNR-FLIGHT Score: ", df["allocated_flights_score"].min())
print(DIVIDER)

print("Allocation Stats")
total = df["pnr"].count()
allocated = df["allocated_src"].count()
print("Total pnr: ", total)
print("Total allocated pnr: ", allocated)
print("Total unallocated pnr: ", total - allocated)

print(DIVIDER)
print("Connection Stats")

conn = [0, 0, 0, 0]
for x in df["allocated_flights"]:
    # laod to a list
    x = ast.literal_eval(x)
    conn[len(x)] += 1

print("Unallocated : ", conn[0])
print("Direct : ", conn[1])
print("One Stop : ", conn[2])
print("Two Stop : ", conn[3])

print(DIVIDER)
class_map = {"F": 0, "B": 1, "P": 2, "E": 3}
flight_stats = {}
for _, row in df.iterrows():
    r_flight = ast.literal_eval(row["canclled_flight"])[0]
    if r_flight not in flight_stats:
        flight_stats[r_flight] = {
            "total_pnr": 0,
            "allocated_pnr": 0,
            "unallocated_pnr": 0,
            "upgraded_pnr": 0,
            "samestate_pnr": 0,
            "downgraded_pnr": 0,
            "allocated_flight": {},
            "default_flight": None,
            "default_allocation": 0
        }

    flight_stats[r_flight]["total_pnr"] += 1
    if type(row["allocated_src"]) == str:
        flight_stats[r_flight]["allocated_pnr"] += 1
    else:
        flight_stats[r_flight]["unallocated_pnr"] += 1


    if type(row["allocated_src"]) == str:
        r_class_score = float(class_map[row["canclled_class"]])
        a_class_score = sum([class_map[x] for x in ast.literal_eval(row["allocated_classes"])]) / max(
            len(ast.literal_eval(row["allocated_classes"])), 1
        )
        # print(r_class_score, a_class_score, row["canclled_class"], ast.literal_eval(row["allocated_classes"]))
        if r_class_score > a_class_score:
            flight_stats[r_flight]["upgraded_pnr"] += 1
        elif r_class_score < a_class_score:
            flight_stats[r_flight]["downgraded_pnr"] += 1
        else:
            flight_stats[r_flight]["samestate_pnr"] += 1

        k = tuple(ast.literal_eval(row["allocated_flights"]))
        if flight_stats[r_flight]["allocated_flight"].get(k) is None:
            flight_stats[r_flight]["allocated_flight"][k] = 0
        flight_stats[r_flight]["allocated_flight"][k] += 1

for flight, details in flight_stats.items():
    max_count = 0
    max_flight = None
    for k, v in details["allocated_flight"].items():
        if v > max_count:
            max_count = v
            max_flight = k
    flight_stats[flight]["default_flight"] = max_flight
    flight_stats[flight]["default_allocation"] = max_count


print("Cancelled Flight Level Stats")
print("Total Cancelled Flights: ", len(flight_stats))
print(
    "Flight id\tTotal PNR\tReAllocated PNR\tUnallocated PNR\tUpgraded PNR\tSameState PNR\tDowngraded PNR\tDefault Flight\tDefault Allocation"
)
for k, v in flight_stats.items():
    print(
        f"{k}\t{v['total_pnr']}\t{v['allocated_pnr']}\t{v['unallocated_pnr']}\t{v['upgraded_pnr']}\t{v['samestate_pnr']}\t{v['downgraded_pnr']}\t{v['default_flight']}\t{v['default_allocation']}"
    )


PNR Score Stats
Mean PNR Score:  6965.384615384615
Std Dev. PNR Score:  4643.073332769441
Max PNR Score:  21700
Min PNR Score:  600
----------------
PNR-FLIGHT Score Stats
Mean PNR-FLIGHT Score:  5859553.777698683
Std Dev. PNR-FLIGHT Score:  22560676.985620238
Max PNR-FLIGHT Score:  223253492.0081906
Min PNR-FLIGHT Score:  -1.0
----------------
Allocation Stats
Total pnr:  494
Total allocated pnr:  330
Total unallocated pnr:  164
----------------
Connection Stats
Unallocated :  164
Direct :  143
One Stop :  152
Two Stop :  35
----------------
Cancelled Flight Level Stats
Total Cancelled Flights:  24
Flight id	Total PNR	ReAllocated PNR	Unallocated PNR	Upgraded PNR	SameState PNR	Downgraded PNR	Default Flight	Default Allocation
INV-ZZ-3577191	35	35	0	18	4	13	('INV-ZZ-7674145', 'INV-ZZ-1413506')	35
INV-ZZ-2502887	16	16	0	3	6	7	('INV-ZZ-9639797', 'INV-ZZ-9399965', 'INV-ZZ-3591392')	7
INV-ZZ-1594157	50	0	50	0	0	0	None	0
INV-ZZ-9711495	13	0	13	0	0	0	None	0
INV-ZZ-7226365	21	21	0	7	7	7	('INV-Z

In [5]:
# plot for pnr_score distribution of cancelled flights-pnrs
fig = px.bar(
    df, x="pnr", y="pnr_score", title="PNR Scores", labels={"pnr_score": "PNR Score"}
)
# Add mean and standard deviation annotations
fig.add_shape(
    type="line",
    x0=-0.5,
    x1=len(df) - 0.5,
    y0=mean_score,
    y1=mean_score,
    line=dict(color="red", width=2),
    name="Mean",
)
# Show the chart
print(mean_score, std_dev_score)
fig.show()

5859553.777698683 22560676.985620238


In [6]:
# plot for class distribution for cancelled flights
fig = px.pie(df, names='allocated_classes', title='Class Distribution for All PNRS')

# plot for class distribution for cancelled flights
fig2 = px.pie(df, names='canclled_class', title='Class Distribution for Cancelled PNRS')
fig.show()
fig2.show()



In [7]:
#Pnr vs delay
# Create a scatter plot for PNR vs delay
df.sort_values(by=['allocated_flights_score'], inplace=True)
df['canclled_flight_arrival'] = pd.to_datetime(df['canclled_flight_arrival'])
df['allocated_flights_arrival'] = pd.to_datetime(df['allocated_flights_arrival'])

# Calculate the delay
df['delay'] = (df['allocated_flights_arrival'] - df['canclled_flight_arrival']).dt.total_seconds() / 3600

# Create a scatter plot for PNR vs delay
fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.1, subplot_titles=["Scores", "PNR vs Delay", "Pnr Scores"])

# Plot PNR Scores
fig.add_trace(
    px.bar(df, x="pnr", y="allocated_flights_score").update_traces(marker_color='blue').data[0],
    row=1, col=1
)

# Plot PNR vs Delay
fig.add_trace(
    px.scatter(df, x='pnr', y='delay').data[0],
    row=1, col=1
)

fig.add_trace(
    px.scatter(df, x='pnr', y='delay').data[0],
    row=2, col=1
)

fig.add_trace(
    px.bar(df, x="pnr", y="pnr_score").update_traces(marker_color='green').data[0],
    row=2, col=1
)

# Calculate and print the mean delay
mean_delay = df['delay'].mean()
print(f'Mean Delay: {mean_delay} hours, std_dev: {df["delay"].std()}')

# Show the plot
fig.show()

Mean Delay: 36.05383838383838 hours, std_dev: 18.872059477531337


In [8]:
import ast

# map class with numbers
class_mapper = {'F': 0, 'B': 1 , 'P': 2, 'E': 3}

# find {class_cancelled - class_allocated} -> positive: upgrade, negetive: downgrade, zero: same class
# print(df.head())
list_up_down = []
for index, row in df.iterrows():
    # print(row['canclled_class'], row['allocated_classes'])
    cancelled_class = row['canclled_class']
    allocated_classes = ast.literal_eval(row['allocated_classes'])
    up_down = list(map(lambda x: class_mapper[x] - class_mapper[cancelled_class], allocated_classes))
    list_up_down += up_down
df_up_down = pd.DataFrame(list_up_down, columns=['up_down'])
fig = px.pie(df_up_down, names='up_down', title='Class Upgradation/Downgradation for Cancelled PNRS')
fig.show()

In [9]:
# print(df.columns)
total_allocated = 0
count_same_arr = 0
count_same_dep = 0
count_diff_arrNdep = 0

for index, row in df.iterrows():
    if(len(row['allocated_classes'])!=2):
        total_allocated += 1
        count_same_arr += row['allocated_dst']==row['canclled_dst']
        count_same_dep += row['allocated_src']==row['canclled_src']
        count_diff_arrNdep += row['allocated_dst']!=row['canclled_dst'] and row['allocated_src']!=row['canclled_src']
        
df_allocated = pd.DataFrame({
    'type': ['allocated PNRs']*2 + ['have same arrival airports']*2 + ['have same departure airports']*2 + ['both arrival and departure airports are different']*2,
    'y/n': ['yes', 'no']*4,
    'count': [total_allocated, len(df)-total_allocated, count_same_arr, total_allocated-count_same_arr, count_same_dep, total_allocated-count_same_dep, count_diff_arrNdep, total_allocated-count_diff_arrNdep]
})
df_allocated['percentage'] = df_allocated.groupby('type')['count'].transform(lambda x: (x / x.sum()) * 100)
percentage_list = df_allocated['percentage'].tolist()
print(df_allocated)
fig = px.bar(df_allocated, x="type", y="count", color="y/n", title="Allocated PNR Stats", text="percentage")
fig.update_traces(texttemplate="%{text:.2f}%", textposition='outside')
# texttemplate="%{y} x %{width} =<br>%{customdata[1]}",
fig.show()

                                                type  y/n  count  percentage
0                                     allocated PNRs  yes    330   66.801619
1                                     allocated PNRs   no    164   33.198381
2                         have same arrival airports  yes    187   56.666667
3                         have same arrival airports   no    143   43.333333
4                       have same departure airports  yes    187   56.666667
5                       have same departure airports   no    143   43.333333
6  both arrival and departure airports are different  yes    143   43.333333
7  both arrival and departure airports are different   no    187   56.666667


In [10]:
df_confusion_matrix = pd.DataFrame([[0]*4]*4, index = ['F', 'B', 'P', 'E'], columns=['F', 'B', 'P', 'E'])
for index, row in df.iterrows():
    cancelled_class = row['canclled_class']
    allocated_classes = ast.literal_eval(row['allocated_classes'])
    for allocated_class in allocated_classes:
        df_confusion_matrix[cancelled_class][allocated_class] += 1 
print(df_confusion_matrix)
fig = px.imshow(df_confusion_matrix, title="Confusion Matrix")
fig.update_xaxes(title_text='Cancelled Class')
fig.update_yaxes(title_text='Allocated Class')
fig.show()

     F  B  P    E
F   17  0  0    1
B  189  0  0    4
P   21  0  0  151
E    2  0  0  167


In [12]:
class_mapper = {'F': 0, 'B': 1, 'P': 2, 'E': 3}
total_allocated = 0
count_same_arr = 0
count_same_dst = 0
count_diff_arrNdep = 0
count_upgrade = 0
count_downgrade = 0
count_same = 0
mean_allocated_pnr_score = 0
mean_unallocated_pnr_score = 0

for index, row in df.iterrows():
    if len(row['allocated_classes']) != 2:
        total_allocated += 1
        count_same_arr += row['allocated_dst'] == row['canclled_dst']
        count_same_dst += row['allocated_src'] == row['canclled_src']
        count_diff_arrNdep += row['allocated_dst'] != row['canclled_dst'] and row['allocated_src'] != row['canclled_src']
        allocated_classes = ast.literal_eval(row['allocated_classes'])
        mean_allocated_pnr_score += row["pnr_score"]
        for allocated_class in allocated_classes:
            cancelled_class = row['canclled_class']
            count_upgrade += class_mapper[allocated_class] - class_mapper[cancelled_class] > 0
            count_downgrade += class_mapper[allocated_class] - class_mapper[cancelled_class] < 0
            count_same += class_mapper[allocated_class] - class_mapper[cancelled_class] == 0
    else:
        mean_unallocated_pnr_score += row["pnr_score"]

mean_allocated_pnr_score /= total_allocated
mean_unallocated_pnr_score /= len(df) - total_allocated

# plot allocated pi chart

fig1 = go.Figure()
fig1.add_trace(go.Pie(labels=["Allocated PNRs", "Unallocated PNRs"], values=[total_allocated, len(df) - total_allocated], name="Allocation", title="Allocation"))
fig1.show()

fig2 = go.Figure()
fig2.add_trace(go.Pie(labels=["Mean Allocated PNR score", "Mean Unallocated PNR score"], values=[mean_allocated_pnr_score, mean_unallocated_pnr_score], name="Mean PNR Scores", title="Mean PNR Scores"))
fig2.show()

fig3 = go.Figure()
fig3.add_trace(go.Pie(labels=["Upgraded Class", "Downgraded Class", "Same Class"], values=[count_upgrade, count_downgrade, count_same], name="Upgradation/Downgradation", title="Upgradation/Downgradation"))
fig3.show()

fig4 = go.Figure()
fig4.add_trace(go.Pie(labels=["Departure airports are same", "Departure airports are different"], values=[count_same_arr, total_allocated - count_same_arr], name="Departure Airports", title="Departure Airports"))
fig4.show()

fig5 = go.Figure()
fig5.add_trace(go.Pie(labels=["Arrival airports are same", "Arrival airports are different"], values=[count_same_dst, total_allocated - count_same_dst], name="Arrival Airports", title="Arrival Airports"))
fig5.show()

fig6 = go.Figure()
fig6.add_trace(go.Pie(labels=["Arrival and departure airports are both different", "Either arrival and/or departure airports are same"], values=[count_diff_arrNdep, total_allocated - count_diff_arrNdep], name="Arrival and Departure Airports", title="Arrival and Departure Airports"))
fig6.show()