# Problema de Negócio
📝Identificar fatores que influenciam a satisfação dos passageiros.

## Libs

In [15]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

## Carregando o DataSet

In [16]:
nps = pd.read_csv('NPS.csv')

## Explorando o DataSet

In [17]:
nps.shape

(103904, 25)

In [18]:
nps.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103904 entries, 0 to 103903
Data columns (total 25 columns):
 #   Column                             Non-Null Count   Dtype  
---  ------                             --------------   -----  
 0   Unnamed: 0                         103904 non-null  int64  
 1   id                                 103904 non-null  int64  
 2   Gender                             103904 non-null  object 
 3   Customer Type                      103904 non-null  object 
 4   Age                                103904 non-null  int64  
 5   Type of Travel                     103904 non-null  object 
 6   Class                              103904 non-null  object 
 7   Flight Distance                    103904 non-null  int64  
 8   Inflight wifi service              103904 non-null  int64  
 9   Departure/Arrival time convenient  103904 non-null  int64  
 10  Ease of Online booking             103904 non-null  int64  
 11  Gate location                      1039

In [19]:
nps.sample(10)

Unnamed: 0.1,Unnamed: 0,id,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,...,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,satisfaction
7718,7718,47865,Male,Loyal Customer,22,Business travel,Business,2042,2,1,...,2,1,1,3,4,3,2,0,0.0,neutral or dissatisfied
14103,14103,60174,Female,Loyal Customer,37,Personal Travel,Eco,1120,2,4,...,4,3,5,5,5,5,4,2,2.0,neutral or dissatisfied
81508,81508,117836,Male,Loyal Customer,40,Business travel,Business,2878,5,5,...,5,5,5,5,4,5,5,0,0.0,satisfied
59910,59910,8800,Male,Loyal Customer,60,Business travel,Business,2823,1,1,...,5,5,5,5,3,5,4,0,0.0,satisfied
45277,45277,40601,Male,Loyal Customer,67,Personal Travel,Eco,391,5,4,...,3,3,5,4,3,4,3,0,0.0,satisfied
90497,90497,8339,Female,Loyal Customer,41,Business travel,Business,2812,3,3,...,3,3,3,3,2,3,4,0,0.0,neutral or dissatisfied
75413,75413,88805,Female,Loyal Customer,60,Business travel,Business,3641,1,1,...,4,4,4,4,4,4,5,0,0.0,satisfied
15557,15557,82297,Female,Loyal Customer,52,Business travel,Business,986,5,5,...,4,4,4,4,3,4,5,0,0.0,satisfied
86120,86120,45958,Male,Loyal Customer,56,Business travel,Eco Plus,409,3,5,...,3,1,5,3,3,4,3,6,6.0,neutral or dissatisfied
20411,20411,70562,Male,Loyal Customer,52,Business travel,Eco,858,4,4,...,5,1,2,2,4,4,5,0,0.0,satisfied


## Explorando o DataFrame

In [20]:
df_nps = nps.copy()

In [21]:
df_nps.shape

(103904, 25)

In [22]:
df_nps.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103904 entries, 0 to 103903
Data columns (total 25 columns):
 #   Column                             Non-Null Count   Dtype  
---  ------                             --------------   -----  
 0   Unnamed: 0                         103904 non-null  int64  
 1   id                                 103904 non-null  int64  
 2   Gender                             103904 non-null  object 
 3   Customer Type                      103904 non-null  object 
 4   Age                                103904 non-null  int64  
 5   Type of Travel                     103904 non-null  object 
 6   Class                              103904 non-null  object 
 7   Flight Distance                    103904 non-null  int64  
 8   Inflight wifi service              103904 non-null  int64  
 9   Departure/Arrival time convenient  103904 non-null  int64  
 10  Ease of Online booking             103904 non-null  int64  
 11  Gate location                      1039

In [23]:
df_nps.sample(20)

Unnamed: 0.1,Unnamed: 0,id,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,...,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,satisfaction
19933,19933,123341,Male,Loyal Customer,55,Personal Travel,Eco Plus,631,3,2,...,3,1,4,1,2,2,3,0,0.0,neutral or dissatisfied
18838,18838,14499,Male,Loyal Customer,28,Business travel,Eco,402,5,5,...,5,1,2,4,3,2,5,0,0.0,satisfied
62506,62506,44863,Female,Loyal Customer,51,Business travel,Eco Plus,693,5,1,...,5,5,5,5,4,5,2,0,57.0,satisfied
89677,89677,62423,Female,Loyal Customer,50,Business travel,Business,1911,3,3,...,4,4,4,4,5,4,3,0,0.0,satisfied
72839,72839,129344,Male,Loyal Customer,64,Personal Travel,Eco,2586,2,4,...,1,1,1,3,4,4,1,8,1.0,neutral or dissatisfied
48998,48998,81479,Male,Loyal Customer,41,Business travel,Business,528,3,1,...,3,3,3,3,4,3,3,0,0.0,neutral or dissatisfied
79277,79277,14383,Female,Loyal Customer,51,Business travel,Business,3319,2,1,...,2,2,3,2,4,2,4,57,69.0,neutral or dissatisfied
12469,12469,110628,Female,Loyal Customer,32,Personal Travel,Eco,727,4,2,...,4,2,1,3,4,3,4,0,0.0,neutral or dissatisfied
82476,82476,74070,Male,Loyal Customer,26,Business travel,Business,2329,4,4,...,4,4,2,5,5,4,4,22,5.0,satisfied
24239,24239,94949,Female,Loyal Customer,38,Business travel,Business,248,3,4,...,3,3,3,3,4,3,1,27,23.0,neutral or dissatisfied


🔎Verificando valores ausentes

In [24]:
df_nps.isnull().sum().sort_values(ascending = False)

Arrival Delay in Minutes             310
Unnamed: 0                             0
Online boarding                        0
Departure Delay in Minutes             0
Cleanliness                            0
Inflight service                       0
Checkin service                        0
Baggage handling                       0
Leg room service                       0
On-board service                       0
Inflight entertainment                 0
Seat comfort                           0
Food and drink                         0
id                                     0
Gate location                          0
Ease of Online booking                 0
Departure/Arrival time convenient      0
Inflight wifi service                  0
Flight Distance                        0
Class                                  0
Type of Travel                         0
Age                                    0
Customer Type                          0
Gender                                 0
satisfaction    

In [26]:
valores_ausentes = df_nps.isnull().sum().sort_values(ascending = False)

valores_ausentes_percent = valores_ausentes[valores_ausentes > 0] / df_nps.shape[0] 

# Imprimir corretamente os valores de porcentagem
for coluna, valor in valores_ausentes_percent.items():
    print(f'{coluna}: {valor * 100:.2f}%')

Arrival Delay in Minutes: 0.30%


🔎Verificando valores duplicados

In [27]:
df_nps.duplicated().sum()

0

🔎Verificando Outliers