In [None]:
import pandas as pd 
import sqlite3 

df1 = pd.read_csv("../data/processed/customer_info_cleaned.csv")
df2 = pd.read_csv("../data/processed/more_customers_cleaned.csv")

full_df = pd.concat([df1, df2], ignore_index=True)

conn = sqlite3.connect("../data/processed/flight_customer.db")

full_df.to_sql("all_customers_loyalty", conn, if_exists="replace", index=False)

conn.close

pd.read_sql("SELECT name FROM sqlite_master WHERE type='table';", conn)

Unnamed: 0,name
0,customer_info_cleaned
1,more_customers_cleaned
2,all_customers_loyalty


In [None]:
tables = ["more_customers_cleaned", "customer_info_cleaned", "all_customers_loyalty"]

for t in tables: 
    count = pd.read_sql(f"SELECT COUNT (*) AS rows FROM {t};", conn)
    print(t)
    display(count)


more_customers_cleaned


Unnamed: 0,rows
0,103594


customer_info_cleaned


Unnamed: 0,rows
0,25893


all_customers_loyalty


Unnamed: 0,rows
0,129487


### Are certain age groups more likely to be loyal customers?

In [None]:
lag = """
SELECT 
    age AS Age, 
    customer_type,
    COUNT(age) as age_loyalty
FROM
    all_customers_loyalty
WHERE
    customer_type = 'Loyal Customer'
GROUP BY
    AGE
ORDER BY 
    age_loyalty DESC
"""

loyal_age_group = pd.read_sql(lag, conn)
loyal_age_group

Unnamed: 0,Age,customer_type,age_loyalty
0,39,Loyal Customer,2937
1,41,Loyal Customer,2851
2,40,Loyal Customer,2763
3,44,Loyal Customer,2743
4,47,Loyal Customer,2693
...,...,...,...
70,76,Loyal Customer,60
71,74,Loyal Customer,56
72,79,Loyal Customer,43
73,78,Loyal Customer,42


### Analysis 

The late 30's early 40's seem to be the most loyal age group of customers.

---

### What is the age distribution of customers?

In [None]:
ad = """
SELECT 
    age AS Age
FROM 
    all_customers_loyalty
ORDER BY
    Age DESC
"""

age_distribution = pd.read_sql(ad, conn)
age_distribution

Unnamed: 0,Age
0,85
1,85
2,85
3,85
4,85
...,...
129482,7
129483,7
129484,7
129485,7


### Analysis

There is a very wide age distribution of customers, rangin from 85 years old to 7 years old.

---

### What is the gender split, and does gender impact satisfaction?

In [None]:
gs = """
SELECT 
    gender AS Gender,
    COUNT(Satisfaction) AS total_satisfied
FROM 
    all_customers_loyalty
WHERE 
    satisfaction = 'satisfied'
GROUP BY 
    Gender
ORDER BY
    Gender
"""

gender_split = pd.read_sql(gs, conn)
gender_split

Unnamed: 0,Gender,total_satisfied
0,Female,28179
1,Male,28083


### Analysis

There is a difference of 96 females more satisfied than males. Although 96 more were satisfied, out of the large sample size we have, it is not a big discrepancy. 

---

### How many customers are “Loyal” vs “disloyal” (customer_type)?

In [None]:
lc_vs_dc = """
SELECT 
    customer_type,
    COUNT(CASE WHEN customer_type = 'disloyal Customer' THEN 1.0 ELSE 0 END) AS total_customers
FROM 
    all_customers_loyalty
GROUP BY 
    customer_type
"""

loyal_vs_disloyal_customers = pd.read_sql(lc_vs_dc, conn)
loyal_vs_disloyal_customers 

Unnamed: 0,customer_type,total_customers
0,Loyal Customer,105773
1,disloyal Customer,23714


### Analysis

We as a company have way more loyal customers than disloyal customers.

---

### Do business travelers have higher satisfaction than personal travelers?

In [None]:
bt_vs_pt = """
SELECT
    type_of_travel,
    COUNT(CASE WHEN type_of_travel = 'Business Travel' THEN 1.0 ELSE 0 END) AS total_satisfaction_by_type_of_travel
  
FROM
    all_customers_loyalty
WHERE 
    satisfaction = 'satisfied'
GROUP BY 
    type_of_travel
"""

business_vs_personal_travel = pd.read_sql(bt_vs_pt, conn)
business_vs_personal_travel

Unnamed: 0,type_of_travel,total_satisfaction_by_type_of_travel
0,Business travel,52207
1,Personal Travel,4055


### Analysis

Our business traveling customers seem to be a lot more satisfied than our personal travelers.

---

### How does flight distance differ by travel type?

In [None]:
fdtt = """
SELECT
    type_of_travel,
    AVG(flight_distance) AS avg_distance,
    MIN(flight_distance),
    MAX(flight_distance)
FROM all_customers_loyalty
GROUP BY type_of_travel;
"""
flight_distance_travel_type = pd.read_sql(fdtt, conn)
flight_distance_travel_type

Unnamed: 0,type_of_travel,avg_distance,MIN(flight_distance),MAX(flight_distance)
0,Business travel,1368.462396,31,4983
1,Personal Travel,792.035588,31,4983


### Analysis

Our business travelers tend to fly a lot farther on average than our personal travelers.

Maybe we could give more incentives or one time perks for further flights for personal travelers.

---

### Does class (Eco/Biz) affect service ratings?

In [None]:
evb = """
SELECT 
    class,
    AVG(on_board_service) AS on_board_service,
    AVG(leg_room_service) AS leg_room_service,
    AVG(checkin_service) AS checkin_services,
    AVG(inflight_service) AS inflight_services
FROM 
    all_customers_loyalty
GROUP BY    
    class
    """
eco_vs_biz = pd.read_sql(evb, conn)
eco_vs_biz

Unnamed: 0,class,on_board_service,leg_room_service,checkin_services,inflight_services
0,Business,3.682529,3.646169,3.520745,3.846007
1,Eco,3.120171,3.083848,3.124507,3.467144
2,Eco Plus,3.034755,3.05661,3.014606,3.382303


### Analysis

Our "Business" class is doing a lot better than our "Eco" and "Eco Plus". 

People pay more for Business class, but we still need to give Eco and Eco Plus the feeling of satisfaction for what they paid for.

---

### Which service categories have the lowest ratings?
### Which service categories have the highest ratings?

In [None]:
cr = """
SELECT 
    AVG(on_board_service) AS on_board_service,
    AVG(leg_room_service) AS leg_room_service,
    AVG(checkin_service) AS checkin_services,
    AVG(inflight_service) AS inflight_services
FROM 
    all_customers_loyalty
    """

category_ratings = pd.read_sql(cr, conn)
category_ratings

Unnamed: 0,on_board_service,leg_room_service,checkin_services,inflight_services
0,3.383204,3.351078,3.306239,3.642373


### Analysis

Our lowest service right now is by far the checkin services. 

We should find a way to make checking in for their flights easier. 

Also our highest service is our onboarding service so people are enjoying the onboarding process.

---

### Are service weaknesses consistent across customer types?

In [None]:
ctr = """
SELECT 
    customer_type,
    AVG(on_board_service) AS on_board_service,
    AVG(leg_room_service) AS leg_room_service,
    AVG(checkin_service) AS checkin_services,
    AVG(inflight_service) AS inflight_services
FROM 
    all_customers_loyalty
GROUP BY 
    customer_type
    """
customer_type_ratings = pd.read_sql(ctr, conn)
customer_type_ratings

Unnamed: 0,customer_type,on_board_service,leg_room_service,checkin_services,inflight_services
0,Loyal Customer,3.416136,3.380296,3.324979,3.629244
1,disloyal Customer,3.236316,3.220756,3.222653,3.700936


### Analysis

It appears as though the loyal customers tend to be higher(as they should be) except for inflight services. Maybe because they expect more due to them being loyal.

Meanwhile disloyal customers are not enjoying the other services and that needs to be corrected. 

---

### What is the average departure delay?

In [None]:
avgdd = """
SELECT 
    AVG(departure_delay_in_minutes) AS departure_delay
FROM 
    all_customers_loyalty
"""
avg_departure_delay = pd.read_sql(avgdd, conn)
avg_departure_delay

Unnamed: 0,departure_delay
0,14.643385


### Analysis

The average departure delay is 15 minutes.

---

### What is the average arrival delay?

In [None]:
avgad = """
SELECT 
    AVG(arrival_delay_in_minutes) AS arrival_delay
FROM 
    all_customers_loyalty
"""

avg_arrival_delay = pd.read_sql(avgad, conn)
avg_arrival_delay

Unnamed: 0,arrival_delay
0,15.091129


### Analysis

The average arrival delay is 15 minutes as well.

---

### Do longer delays lead to lower satisfaction?

In [None]:
ldls = """
SELECT
    satisfaction,
    AVG(arrival_delay_in_minutes) AS arrival_delay,
    AVG(departure_delay_in_minutes) AS departure_delay
FROM
    all_customers_loyalty
GROUP BY 
    satisfaction
"""
long_delay_lower_satisfaction = pd.read_sql(ldls, conn)
long_delay_lower_satisfaction

Unnamed: 0,satisfaction,arrival_delay,departure_delay
0,neutral or dissatisfied,17.055992,16.340184
1,satisfied,12.533859,12.435001


### Analysis

There is a clear correlation between longer delays and dissatisfied customers. 

One of the clear better ways to gain more satisfaction is to reduce delay times, if not eliminate them.

---

### Are loyal customers more tolerant of delays compared to disloyal?

In [None]:
lcs = """
SELECT
    satisfaction,
    customer_type,
    AVG(arrival_delay_in_minutes) AS arrival_delay,
    AVG(departure_delay_in_minutes) AS departure_delay,
    COUNT(CASE WHEN satisfaction = 'satisfied' THEN 1.0 ELSE 0 END) as total_satisfaction
FROM
    all_customers_loyalty
GROUP BY 
    customer_type
"""
loyal_vs_disloyal_customers_satisfaction_by_delay = pd.read_sql(lcs, conn)
loyal_vs_disloyal_customers_satisfaction_by_delay

Unnamed: 0,satisfaction,customer_type,arrival_delay,departure_delay,total_satisfaction
0,satisfied,Loyal Customer,15.004973,14.569181,105773
1,neutral or dissatisfied,disloyal Customer,15.475415,14.974361,23714


### Analysis

Loyal Customers are more satisfied than disloyal customers with the same average time delays but it can still be cut down to gain satisfaction from the disloyal customers. 

---

### What percentage of customers are satisfied vs dissatisfied?

In [None]:
svd = """
SELECT 
    satisfaction, 
    COUNT(CASE WHEN satisfaction = 'satisfied' THEN 1.0 ELSE 0 END) AS total_satisfied
FROM 
    all_customers_loyalty
GROUP BY
    satisfaction
"""
satisfied_vs_dissatisfied = pd.read_sql(svd, conn)
satisfied_vs_dissatisfied

Unnamed: 0,satisfaction,total_satisfied
0,neutral or dissatisfied,73225
1,satisfied,56262


### Analysis

Most of our customers seem to be dissatisfied with their puchase and we need to be able to fix that. 

---

### How do satisfaction levels differ by: 

- Class
- Customer Type
- Type of Travel
- Gender

In [None]:
sd = """
SELECT 
    class,
    type_of_travel,
    gender, 
    customer_type,
    satisfaction,
    COUNT(CASE WHEN satisfaction = 'satisfied' THEN 1.0 ELSE 0 END) AS total_satisfied
FROM 
    all_customers_loyalty
GROUP BY 
    gender, 
    class, 
    customer_type, 
    type_of_travel, 
    satisfaction
ORDER BY 
    total_satisfied DESC
LIMIT 10
"""
satisfaction_difference = pd.read_sql(sd, conn)
satisfaction_difference

Unnamed: 0,class,type_of_travel,gender,customer_type,satisfaction,total_satisfied
0,Business,Business travel,Male,Loyal Customer,satisfied,19567
1,Business,Business travel,Female,Loyal Customer,satisfied,19515
2,Eco,Personal Travel,Male,Loyal Customer,neutral or dissatisfied,14703
3,Eco,Personal Travel,Female,Loyal Customer,neutral or dissatisfied,14680
4,Eco,Business travel,Female,disloyal Customer,neutral or dissatisfied,6242
5,Business,Business travel,Female,Loyal Customer,neutral or dissatisfied,5593
6,Business,Business travel,Male,Loyal Customer,neutral or dissatisfied,5453
7,Eco,Business travel,Male,disloyal Customer,neutral or dissatisfied,5238
8,Eco,Business travel,Female,Loyal Customer,neutral or dissatisfied,3100
9,Eco,Business travel,Male,Loyal Customer,neutral or dissatisfied,3100


### Analysis 

Our most satisfied customers are loyal customers flying business with males at 19,567 and females at 19,515. 

Our lower end satisfied customers are males traveling "Eco" Business travel at 3,100.