In [434]:
# https://preppindata.blogspot.com/2021/06/2021-week-22-answer-smash.html

import pandas as pd
import numpy as np

### Input the data

In [435]:
df_o_airlines = pd.read_excel(r'data\PD 2021 Wk 23 input.xlsx', sheet_name='Airlines')
df_PrepAir = pd.read_excel(r'data\PD 2021 Wk 23 input.xlsx', sheet_name='Prep Air')

### Combine Prep Air dataset with other airlines

In [436]:
df_main = pd.concat([df_o_airlines, df_PrepAir])
df_main

Unnamed: 0,Airline,CustomerID,How likely are you to recommend this airline?
0,"Schmeler, Schimmel and Collier",013d950,6
1,"Schmeler, Schimmel and Collier",0d25185,10
2,"Schmeler, Schimmel and Collier",a1b541d,10
3,"Schmeler, Schimmel and Collier",6b24ea8,9
4,"Schmeler, Schimmel and Collier",d5f96ab,7
...,...,...,...
179,Prep Air,afa8aec,5
180,Prep Air,63e95db,8
181,Prep Air,e4e2548,7
182,Prep Air,b9467ee,7


### Exclude any airlines who have had less than 50 customers respond

In [437]:
# Filter out airlines with less than 3 responses
df_main = df_main.groupby('Airline').filter(lambda x: len(x) >= 50)
df_main

Unnamed: 0,Airline,CustomerID,How likely are you to recommend this airline?
0,"Schmeler, Schimmel and Collier",013d950,6
1,"Schmeler, Schimmel and Collier",0d25185,10
2,"Schmeler, Schimmel and Collier",a1b541d,10
3,"Schmeler, Schimmel and Collier",6b24ea8,9
4,"Schmeler, Schimmel and Collier",d5f96ab,7
...,...,...,...
179,Prep Air,afa8aec,5
180,Prep Air,63e95db,8
181,Prep Air,e4e2548,7
182,Prep Air,b9467ee,7


### Classify customer responses to the question in the following way:
- 0-6 = Detractors
- 7-8 = Passive
- 9-10 = Promoters

In [438]:
# Define the bins and labels for classification
bins = [0,6,8,10]
labels = ['Detractors', 'Passive', 'Promoters']

# Create a new column 'Classification' based on the bins and labels
df_main['Classification'] = pd.cut(df_main['How likely are you to recommend this airline?'], bins=bins, labels=labels, include_lowest=True)
df_main

Unnamed: 0,Airline,CustomerID,How likely are you to recommend this airline?,Classification
0,"Schmeler, Schimmel and Collier",013d950,6,Detractors
1,"Schmeler, Schimmel and Collier",0d25185,10,Promoters
2,"Schmeler, Schimmel and Collier",a1b541d,10,Promoters
3,"Schmeler, Schimmel and Collier",6b24ea8,9,Promoters
4,"Schmeler, Schimmel and Collier",d5f96ab,7,Passive
...,...,...,...,...
179,Prep Air,afa8aec,5,Detractors
180,Prep Air,63e95db,8,Passive
181,Prep Air,e4e2548,7,Passive
182,Prep Air,b9467ee,7,Passive


### Calculate the NPS for each airline
- NPS = % Promoters - % Detractors
- Note: I rounded the %s down to the nearest whole number, so if your answer differs slightly from mine then this could be why! 

In [439]:
df_nps = df_main.groupby(['Airline','Classification'], as_index=False).size()
df_nps[df_nps['Airline']=='Prep Air']

Unnamed: 0,Airline,Classification,size
126,Prep Air,Detractors,35
127,Prep Air,Passive,75
128,Prep Air,Promoters,74


In [440]:
df_nps = df_nps.pivot(index='Airline', columns='Classification', values = 'size').reset_index().rename_axis(None, axis=1)
df_nps['Total'] = df_nps['Detractors'] + df_nps['Passive'] + df_nps['Promoters']

df_nps['Promoters%'] = np.floor(df_nps['Promoters']/df_nps['Total']*100)
df_nps['Detractors%'] = np.floor(df_nps['Detractors']/df_nps['Total']*100)
df_nps['NPS'] = df_nps['Promoters%'] - df_nps['Detractors%']
df_nps['NPS'] = df_nps['NPS'].astype(int)

df_nps

Unnamed: 0,Airline,Detractors,Passive,Promoters,Total,Promoters%,Detractors%,NPS
0,"Abbott, Boyle and Morar",15,22,23,60,38.0,25.0,13
1,"Abbott, Gutkowski and Cummings",29,41,31,101,30.0,28.0,2
2,Abshire Group,24,32,35,91,38.0,26.0,12
3,Bayer-Collier,23,37,28,88,31.0,26.0,5
4,"Bernhard, Ernser and Toy",20,32,30,82,36.0,24.0,12
5,Blick Inc,31,31,31,93,33.0,33.0,0
6,Bogan and Sons,32,53,27,112,24.0,28.0,-4
7,"Bogan, Barrows and Morar",21,31,31,83,37.0,25.0,12
8,Brakus-Weissnat,14,21,17,52,32.0,26.0,6
9,Brekke Inc,17,29,18,64,28.0,26.0,2


### Calculate the average and standard deviation of the dataset

In [441]:
df_stat = df_nps['NPS'].agg({'average':'mean',"standard deviation": 'std'})
df_stat

average               10.152542
standard deviation     9.648680
Name: NPS, dtype: float64

### Take each airline's NPS and subtract the average, then divide this by the standard deviation


In [442]:
df_nps['Z-Score'] = (df_nps['NPS'] - df_stat['average'])/df_stat['standard deviation']
df_nps

Unnamed: 0,Airline,Detractors,Passive,Promoters,Total,Promoters%,Detractors%,NPS,Z-Score
0,"Abbott, Boyle and Morar",15,22,23,60,38.0,25.0,13,0.295114
1,"Abbott, Gutkowski and Cummings",29,41,31,101,30.0,28.0,2,-0.844939
2,Abshire Group,24,32,35,91,38.0,26.0,12,0.191473
3,Bayer-Collier,23,37,28,88,31.0,26.0,5,-0.534015
4,"Bernhard, Ernser and Toy",20,32,30,82,36.0,24.0,12,0.191473
5,Blick Inc,31,31,31,93,33.0,33.0,0,-1.052221
6,Bogan and Sons,32,53,27,112,24.0,28.0,-4,-1.466785
7,"Bogan, Barrows and Morar",21,31,31,83,37.0,25.0,12,0.191473
8,Brakus-Weissnat,14,21,17,52,32.0,26.0,6,-0.430374
9,Brekke Inc,17,29,18,64,28.0,26.0,2,-0.844939


### Filter to just show Prep Air's NPS along with their Z-Score

In [443]:
dt_output = df_nps[df_nps['Airline']=='Prep Air']
dt_output = dt_output[['Airline','NPS','Z-Score']]
dt_output

Unnamed: 0,Airline,NPS,Z-Score
42,Prep Air,21,1.124243


### Output the data

In [444]:
dt_output.to_csv(r'output/2021-week23-output.csv')