In [355]:
### TEST PLOTS
import ast
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import pandas as pd
DIVIDER = "----------------"

In [356]:
df = pd.read_csv('result_2q.csv')
print(df.keys()) 

Index(['pnr', 'pnr_score', 'canclled_flight', 'canclled_class',
       'canclled_flight_departure', 'canclled_flight_arrival', 'canclled_src',
       'canclled_dst', 'allocated_src', 'allocated_dst', 'allocated_flights',
       'allocated_flights_departure', 'allocated_flights_arrival',
       'allocated_classes', 'flight_time', 'allocated_flights_score'],
      dtype='object')


In [357]:
## STATIC REPOPRT
mean_score = df["pnr_score"].mean()
std_dev_score = df["pnr_score"].std()
print("PNR Score Stats")
print("Mean PNR Score: ", mean_score)
print("Std Dev. PNR Score: ", std_dev_score)
print("Max PNR Score: ", df["pnr_score"].max())
print("Min PNR Score: ", df["pnr_score"].min())
print(DIVIDER)

mean_score = df["allocated_flights_score"].mean()
std_dev_score = df["allocated_flights_score"].std()
print("PNR-FLIGHT Score Stats")
print("Mean PNR-FLIGHT Score: ", mean_score)
print("Std Dev. PNR-FLIGHT Score: ", std_dev_score)
print("Max PNR-FLIGHT Score: ", df["allocated_flights_score"].max())
print("Min PNR-FLIGHT Score: ", df["allocated_flights_score"].min())
print(DIVIDER)

print("Allocation Stats")
total = df["pnr"].count()
allocated = df["allocated_src"].count()
print("Total pnr: ", total)
print("Total allocated pnr: ", allocated)
print("Total unallocated pnr: ", total - allocated)

print(DIVIDER)
print("Connection Stats")

conn = [0, 0, 0, 0]
for x in df["allocated_flights"]:
    # laod to a list
    x = ast.literal_eval(x)
    conn[len(x)] += 1

print("Unallocated : ", conn[0])
print("Direct : ", conn[1])
print("One Stop : ", conn[2])
print("Two Stop : ", conn[3])

print(DIVIDER)
class_map = {"F": 0, "B": 1, "P": 2, "E": 3}
flight_stats = {}
for _, row in df.iterrows():
    r_flight = ast.literal_eval(row["canclled_flight"])[0]
    if r_flight not in flight_stats:
        flight_stats[r_flight] = {
            "total_pnr": 0,
            "allocated_pnr": 0,
            "unallocated_pnr": 0,
            "upgraded_pnr": 0,
            "samestate_pnr": 0,
            "downgraded_pnr": 0,
            "allocated_flight": {},
            "default_flight": None,
            "default_allocation": 0
        }

    flight_stats[r_flight]["total_pnr"] += 1
    if type(row["allocated_src"]) == str:
        flight_stats[r_flight]["allocated_pnr"] += 1
    else:
        flight_stats[r_flight]["unallocated_pnr"] += 1


    if type(row["allocated_src"]) == str:
        r_class_score = float(class_map[row["canclled_class"]])
        a_class_score = sum([class_map[x] for x in ast.literal_eval(row["allocated_classes"])]) / max(
            len(ast.literal_eval(row["allocated_classes"])), 1
        )
        # print(r_class_score, a_class_score, row["canclled_class"], ast.literal_eval(row["allocated_classes"]))
        if r_class_score > a_class_score:
            flight_stats[r_flight]["upgraded_pnr"] += 1
        elif r_class_score < a_class_score:
            flight_stats[r_flight]["downgraded_pnr"] += 1
        else:
            flight_stats[r_flight]["samestate_pnr"] += 1

        k = tuple(ast.literal_eval(row["allocated_flights"]))
        if flight_stats[r_flight]["allocated_flight"].get(k) is None:
            flight_stats[r_flight]["allocated_flight"][k] = 0
        flight_stats[r_flight]["allocated_flight"][k] += 1

for flight, details in flight_stats.items():
    max_count = 0
    max_flight = None
    for k, v in details["allocated_flight"].items():
        if v > max_count:
            max_count = v
            max_flight = k
    flight_stats[flight]["default_flight"] = max_flight
    flight_stats[flight]["default_allocation"] = max_count


print("Cancelled Flight Level Stats")
print("Total Cancelled Flights: ", len(flight_stats))
print(
    "Flight id\tTotal PNR\tReAllocated PNR\tUnallocated PNR\tUpgraded PNR\tSameState PNR\tDowngraded PNR\tDefault Flight\tDefault Allocation"
)
for k, v in flight_stats.items():
    print(
        f"{k}\t{v['total_pnr']}\t{v['allocated_pnr']}\t{v['unallocated_pnr']}\t{v['upgraded_pnr']}\t{v['samestate_pnr']}\t{v['downgraded_pnr']}\t{v['default_flight']}\t{v['default_allocation']}"
    )


PNR Score Stats
Mean PNR Score:  6842.622950819672
Std Dev. PNR Score:  4487.443927835142
Max PNR Score:  18400
Min PNR Score:  600
----------------
PNR-FLIGHT Score Stats
Mean PNR-FLIGHT Score:  2282.836902809843
Std Dev. PNR-FLIGHT Score:  7402.477447139404
Max PNR-FLIGHT Score:  42361.702317758536
Min PNR-FLIGHT Score:  -1.0
----------------
Allocation Stats
Total pnr:  61
Total allocated pnr:  38
Total unallocated pnr:  23
----------------
Connection Stats
Unallocated :  23
Direct :  16
One Stop :  1
Two Stop :  21
----------------
Cancelled Flight Level Stats
Total Cancelled Flights:  3
Flight id	Total PNR	ReAllocated PNR	Unallocated PNR	Upgraded PNR	SameState PNR	Downgraded PNR	Default Flight	Default Allocation
INV-ZZ-6335749	36	36	0	1	26	9	('INV-ZZ-8591127',)	16
INV-ZZ-3076605	23	0	23	0	0	0	None	0
INV-ZZ-1828959	2	2	0	1	0	1	('INV-ZZ-1129237', 'INV-ZZ-8660705', 'INV-ZZ-5340091')	2


In [358]:
# plot for pnr_score distribution of cancelled flights-pnrs
fig = px.bar(
    df, x="pnr", y="pnr_score", title="PNR Scores", labels={"pnr_score": "PNR Score"}
)
# Add mean and standard deviation annotations
fig.add_shape(
    type="line",
    x0=-0.5,
    x1=len(df) - 0.5,
    y0=mean_score,
    y1=mean_score,
    line=dict(color="red", width=2),
    name="Mean",
)
# Show the chart
print(mean_score, std_dev_score)
fig.show()

2282.836902809843 7402.477447139404


In [359]:
# plot for class distribution for cancelled flights
fig = px.pie(df, names='allocated_classes', title='Class Distribution for All PNRS')

# plot for class distribution for cancelled flights
fig2 = px.pie(df, names='canclled_class', title='Class Distribution for Cancelled PNRS')
fig.show()
fig2.show()



In [360]:
#Pnr vs delay
# Create a scatter plot for PNR vs delay
df.sort_values(by=['allocated_flights_score'], inplace=True)
df['canclled_flight_arrival'] = pd.to_datetime(df['canclled_flight_arrival'])
df['allocated_flights_arrival'] = pd.to_datetime(df['allocated_flights_arrival'])

# Calculate the delay
df['delay'] = (df['allocated_flights_arrival'] - df['canclled_flight_arrival']).dt.total_seconds() / 3600

# Create a scatter plot for PNR vs delay
fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.1, subplot_titles=["Scores", "PNR vs Delay", "Pnr Scores"])

# Plot PNR Scores
fig.add_trace(
    px.bar(df, x="pnr", y="allocated_flights_score").update_traces(marker_color='blue').data[0],
    row=1, col=1
)

# Plot PNR vs Delay
fig.add_trace(
    px.scatter(df, x='pnr', y='delay').data[0],
    row=1, col=1
)

fig.add_trace(
    px.scatter(df, x='pnr', y='delay').data[0],
    row=2, col=1
)

fig.add_trace(
    px.bar(df, x="pnr", y="pnr_score").update_traces(marker_color='green').data[0],
    row=2, col=1
)

# Calculate and print the mean delay
mean_delay = df['delay'].mean()
print(f'Mean Delay: {mean_delay} hours, std_dev: {df["delay"].std()}')

# Show the plot
fig.show()

Mean Delay: 35.56929824561403 hours, std_dev: 29.538830128834093
