In [None]:
import pandas as pd
import sqlite3

conn = sqlite3.connect("../data/processed/flight_customer.db")

pd.read_sql("SELECT * FROM all_customers_loyalty LIMIT 5", conn)


Unnamed: 0,id,gender,customer_type,age,type_of_travel,class,flight_distance,inflight_wifi_service,departure/arrival_time_convenient,ease_of_online_booking,...,inflight_entertainment,on_board_service,leg_room_service,baggage_handling,checkin_service,inflight_service,cleanliness,departure_delay_in_minutes,arrival_delay_in_minutes,satisfaction
0,19556,Female,Loyal Customer,52,Business travel,Eco,160,5,4,3,...,5,5,5,5,2,5,5,50,44.0,satisfied
1,90035,Female,Loyal Customer,36,Business travel,Business,2863,1,1,3,...,4,4,4,4,3,4,5,0,0.0,satisfied
2,12360,Male,disloyal Customer,20,Business travel,Eco,192,2,0,2,...,2,4,1,3,2,2,2,0,0.0,neutral or dissatisfied
3,77959,Male,Loyal Customer,44,Business travel,Business,3377,0,0,0,...,1,1,1,1,3,1,4,0,6.0,satisfied
4,36875,Female,Loyal Customer,49,Business travel,Eco,1182,2,3,4,...,2,2,2,2,4,2,4,0,20.0,satisfied


### Which service ratings correlate most strongly with satisfaction?

In [None]:
ss = """ 
SELECT 
     satisfaction, 
     AVG(on_board_service) AS on_board_service, 
     AVG(leg_room_service) AS leg_room_service, 
     AVG(inflight_service) AS inflight_service, 
     AVG(checkin_service) AS checkin_service
FROM 
    all_customers_loyalty
WHERE 
    satisfaction = 'satisfied'
GROUP BY 
    satisfaction
"""
strongest_service = pd.read_sql(ss, conn)
strongest_service

Unnamed: 0,satisfaction,on_board_service,leg_room_service,inflight_service,checkin_service
0,satisfied,3.856475,3.820376,3.971277,3.648786


### Analysis

The service that is the highest while also meeting satisfication are the inflight services with an average of 3.97 and the lowest with checkin services at 3.65.

We have to come up with a way to make checkin easier. 

---

### Are delays or service scores stronger predictors of satisfaction?

In [None]:
dss = """ 
SELECT 
    satisfaction,
    AVG(departure_delay_in_minutes) AS departure_delay, 
    AVG(arrival_delay_in_minutes) AS arrival_delay, 
    AVG(on_board_service) AS on_board_service, 
    AVG(leg_room_service) AS leg_room_service, 
    AVG(inflight_service) AS inflight_service, 
    AVG(checkin_service) AS checkin_service
FROM 
    all_customers_loyalty
GROUP BY 
    satisfaction
"""
delays_services_satisfaction = pd.read_sql(dss, conn)
delays_services_satisfaction

Unnamed: 0,satisfaction,departure_delay,arrival_delay,on_board_service,leg_room_service,inflight_service,checkin_service
0,neutral or dissatisfied,16.340184,17.055992,3.01957,2.990495,3.389662,3.043045
1,satisfied,12.435001,12.533859,3.856475,3.820376,3.971277,3.648786


### Analysis 

Service score and delays are very correlated, but it seems to be a bigger difference in services overall than the delays.

Service ratings differ by ~0.85, delays only differ by ~4 min.

---

### Does class (Eco vs Business) contribute to satisfaction differences?

In [None]:
ecbs = """ 
SELECT 
    class,
    satisfaction,
    COUNT(class) AS total_satisfaction
FROM 
    all_customers_loyalty
WHERE 
    satisfaction = 'satisfied'
GROUP BY 
    class
"""
eco_vs_business_satisfaction = pd.read_sql(ecbs, conn)
eco_vs_business_satisfaction

Unnamed: 0,class,satisfaction,total_satisfaction
0,Business,satisfied,43050
1,Eco,satisfied,10902
2,Eco Plus,satisfied,2310


### Analysis

Yes classes can contribute to satisfaction difference. Our Business class customers are 18 times more satisfied than our Eco Plus class.

---

### How do ratings differ between satisfied vs dissatisfied customers?

In [None]:
sdc = """ 
SELECT 
    satisfaction, 
    AVG(on_board_service) AS on_board_service, 
    AVG(leg_room_service) AS leg_room_service, 
    AVG(inflight_service) AS inflight_service, 
    AVG(checkin_service) AS checkin_service
FROM 
    all_customers_loyalty
GROUP BY 
    satisfaction

"""
satisfied_vs_dissatisfied_customers = pd.read_sql(sdc, conn)
satisfied_vs_dissatisfied_customers

Unnamed: 0,satisfaction,on_board_service,leg_room_service,inflight_service,checkin_service
0,neutral or dissatisfied,3.01957,2.990495,3.389662,3.043045
1,satisfied,3.856475,3.820376,3.971277,3.648786


### Analysis 

Satisfied & dissatisfied customers are both most satisfied with the inflight services.

They differ in satisfied customers do not like the check in process and dissatisfied customers do not like their leg room service.

---

### Which service categories are most critical to fix?

In [None]:
fsc = """ 
SELECT 
    satisfaction, 
    AVG(on_board_service) AS on_board_service, 
    AVG(leg_room_service) AS leg_room_service, 
    AVG(inflight_service) AS inflight_service, 
    AVG(checkin_service) AS checkin_service
FROM 
    all_customers_loyalty
GROUP BY 
    satisfaction
"""

fix_service_category = pd.read_sql(fsc, conn)
fix_service_category

Unnamed: 0,satisfaction,on_board_service,leg_room_service,inflight_service,checkin_service
0,neutral or dissatisfied,3.01957,2.990495,3.389662,3.043045
1,satisfied,3.856475,3.820376,3.971277,3.648786


### Anaylsis 

The category that needs the most fixing is the leg room service to bring dissatisfied customers rating up and checkin services to bring up the satisfied customer rating. 

Leg room satisfaction gap = 0.83 between groups

---

### What is the satisfaction drop when delays exceed 20 minutes?

In [None]:
sdd = """ 
SELECT 
    satisfaction,
    departure_delay_in_minutes,
    arrival_delay_in_minutes,
    AVG(on_board_service) AS on_board_service, 
    AVG(leg_room_service) AS leg_room_service, 
    AVG(inflight_service) AS inflight_service, 
    AVG(checkin_service) AS checkin_service
FROM 
    all_customers_loyalty
WHERE 
    departure_delay_in_minutes >= 20
    AND arrival_delay_in_minutes >= 20
GROUP BY 
    satisfaction
"""

satisfaction_drop_delay = pd.read_sql(sdd, conn)
satisfaction_drop_delay

Unnamed: 0,satisfaction,departure_delay_in_minutes,arrival_delay_in_minutes,on_board_service,leg_room_service,inflight_service,checkin_service
0,neutral or dissatisfied,40,68.0,2.986573,3.048096,3.295327,2.955621
1,satisfied,50,44.0,3.823569,3.863165,3.952189,3.726061


### Analysis 

When the delay is longer than 20 minutes the ratings drop by both satisfied and dissatisfied customers in flight services, onboarding services and checkin services for dissatisfied customers.

Check-in satisfaction drops the most, from 3.64 → 2.95 (−0.69).

---

### Is arrival delay more impactful than departure delay?

In [None]:
ad = """ 
SELECT 
    satisfaction,
    arrival_delay_in_minutes AS arrival_delay,
    AVG(on_board_service) AS on_board_service, 
    AVG(leg_room_service) AS leg_room_service, 
    AVG(inflight_service) AS inflight_service, 
    AVG(checkin_service) AS checkin_service
FROM 
    all_customers_loyalty
WHERE 
    arrival_delay_in_minutes > departure_delay_in_minutes
GROUP BY 
    satisfaction
"""
arrival_delay = pd.read_sql(ad, conn)
arrival_delay

Unnamed: 0,satisfaction,arrival_delay,on_board_service,leg_room_service,inflight_service,checkin_service
0,neutral or dissatisfied,68.0,2.986493,2.98764,3.344668,2.990352
1,satisfied,6.0,3.847953,3.836893,3.956015,3.675053


In [None]:
dd = """ 
SELECT 
    satisfaction,
    departure_delay_in_minutes AS departure_delay,
    AVG(on_board_service) AS on_board_service, 
    AVG(leg_room_service) AS leg_room_service, 
    AVG(inflight_service) AS inflight_service, 
    AVG(checkin_service) AS checkin_service
FROM 
    all_customers_loyalty
WHERE 
    arrival_delay_in_minutes < departure_delay_in_minutes
GROUP BY 
    satisfaction
"""
departure_delay = pd.read_sql(dd, conn)
departure_delay


Unnamed: 0,satisfaction,departure_delay,on_board_service,leg_room_service,inflight_service,checkin_service
0,neutral or dissatisfied,18,3.017701,2.997188,3.383424,3.031668
1,satisfied,50,3.893605,3.854916,4.017499,3.704613


### Analysis 

Departure delays tend to still have better service ratings compared to arrival delays.

Arrival delays create more negative customer perception because customers remember the end of their experience more than the beginning.