# Entrenamiento y Evaluación de Modelos

## Trabajo Práctico Nro. 2 - Grupo 3

#### Integrantes:
* Ignacio Busso
* Lucas Copes
* Jesica Heit

#### Dataset: https://www.kaggle.com/datasets/teejmahal20/airline-passenger-satisfaction
* Detalle: contiene datos de la satisfacción de los pasajeros de diferentes vuelos tomando en cuenta multiples aspectos (calidad del servicio, comodidad, limpieza, etc.)
* Target: columna 'satisfaction', para determinar la satisfacción de un pasajero respecto a un vuelo.
* Dimensiones: 25 columnas x 129.880 filas.

In [92]:
%matplotlib inline

import warnings
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np

pd.options.display.max_columns = 0

#Cambios en el estilo de los graficos
plt.style.use('fast')
plt.rcParams.update({
    "font.family": ["serif"],
    "font.sans-serif": ["Roboto"],
    "font.size": 9,
    "axes.labelsize": 11,
    "axes.titlesize": 13,
    "xtick.labelsize": 11,
    "ytick.labelsize": 11,
    "legend.fontsize": 11,
    'figure.figsize': (11.0, 5.0),
    'axes.grid': True,
    'axes.spines.left': True,
    'axes.spines.right': True,
    'axes.spines.top': True,
    'axes.spines.bottom': True,
})

np.set_printoptions(suppress=True)

warnings.filterwarnings('ignore')

In [93]:
# Lectura y concatenación de los .csv
train = pd.read_csv('data/train.csv', index_col=[0])
test = pd.read_csv('data/test.csv', index_col=[0])
full = pd.concat([train, test], sort=False)

# Asignamos nuevos nombres a algunas de las columnas
new_column_names = {
    'Gender': 'gender',
    'Customer Type': 'customer_type',
    'Age': 'age',
    'Type of Travel': 'business_travel',
    'Class': 'ticket_class',
    'Flight Distance': 'flight_distance',
    'Inflight wifi service': 'wifi_service',
    'Departure/Arrival time convenient': 'departure_arrival_time_convenient',
    'Ease of Online booking': 'online_booking',
    'Gate location': 'gate_location',
    'Food and drink': 'food_and_drink',
    'Online boarding': 'online_boarding',
    'Seat comfort': 'seat_comfort',
    'Inflight entertainment': 'inflight_entertainment',
    'On-board service': 'onboard_service',
    'Leg room service': 'leg_room',
    'Baggage handling': 'baggage_handling',
    'Checkin service': 'checkin',
    'Inflight service': 'inflight_service',
    'Cleanliness': 'cleanliness',
    'Departure Delay in Minutes': 'departure_delay',
    'Arrival Delay in Minutes': 'arrival_delay',
}

full.rename(columns=new_column_names, inplace=True)
full.set_index('id', inplace=True)

In [101]:
from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer

# Conversión a variables booleanas
full['gender'] = full['gender'].replace(['Male','Female'],['0','1'])
full['customer_type'] = full['customer_type'].replace(['disloyal Customer','Loyal Customer'],['0','1'])
full['business_travel'] = full['business_travel'].replace(['Personal Travel','Business travel'],['0','1'])
full['satisfaction'] = full['satisfaction'].replace(['neutral or dissatisfied','satisfied'],['0','1'])


# One-hot Encoder en variable 'ticket_class'
full['ticket_class'] = full['ticket_class'].replace(['Business','Eco Plus','Eco'],['business','economy_plus','economy'])
transformer = make_column_transformer(
    (OneHotEncoder(), ['ticket_class']),
    remainder='passthrough')

transformed = transformer.fit_transform(full)

new_column_names = transformer.get_feature_names()
new_column_names[0] = 'ticket_business'
new_column_names[1] = 'ticket_economy'
new_column_names[2] = 'ticket_economy_plus'

transformed_df = pd.DataFrame(
    transformed, 
    columns = new_column_names
    
)

transformed_df

Unnamed: 0,ticket_business,ticket_economy,ticket_economy_plus,gender,customer_type,age,business_travel,flight_distance,wifi_service,departure_arrival_time_convenient,online_booking,gate_location,food_and_drink,online_boarding,seat_comfort,inflight_entertainment,onboard_service,leg_room,baggage_handling,checkin,inflight_service,cleanliness,departure_delay,arrival_delay,satisfaction
0,0.0,0.0,1.0,0,1,13,0,460,3,4,3,1,5,3,5,5,4,3,4,4,5,5,25,18.0,0
1,1.0,0.0,0.0,0,0,25,1,235,3,2,3,3,1,3,1,1,1,5,3,1,4,1,1,6.0,0
2,1.0,0.0,0.0,1,1,26,1,1142,2,2,2,2,5,5,5,5,4,3,4,4,4,5,0,0.0,1
3,1.0,0.0,0.0,1,1,25,1,562,2,5,5,5,2,2,2,2,2,5,3,1,4,2,11,9.0,0
4,1.0,0.0,0.0,0,1,61,1,214,3,3,3,3,4,5,5,3,3,4,4,3,3,3,0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129875,1.0,0.0,0.0,0,0,34,1,526,3,3,3,1,4,3,4,4,3,2,4,4,5,4,0,0.0,0
129876,1.0,0.0,0.0,0,1,23,1,646,4,4,4,4,4,4,4,4,4,5,5,5,5,4,0,0.0,1
129877,0.0,1.0,0.0,1,1,17,0,828,2,5,1,5,2,1,2,2,4,3,4,5,4,2,0,0.0,0
129878,1.0,0.0,0.0,0,1,14,1,1127,3,3,3,3,4,4,4,4,3,2,5,4,5,4,0,0.0,1


In [95]:
full.sample(15)

Unnamed: 0_level_0,gender,customer_type,age,business_travel,ticket_class,flight_distance,wifi_service,departure_arrival_time_convenient,online_booking,gate_location,food_and_drink,online_boarding,seat_comfort,inflight_entertainment,onboard_service,leg_room,baggage_handling,checkin,inflight_service,cleanliness,departure_delay,arrival_delay,satisfaction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
12959,0,1,41,1,economy_plus,674,4,2,2,2,4,4,4,4,1,3,4,2,5,4,12,5.0,1
3386,0,0,24,1,business,315,4,0,4,3,2,4,2,2,4,5,5,5,4,2,9,12.0,1
17867,0,1,58,1,economy,425,2,1,3,1,2,2,2,2,4,5,1,4,5,2,0,0.0,1
113546,1,1,63,1,business,3762,1,1,1,1,4,4,4,5,5,5,5,3,5,3,15,12.0,1
88491,0,1,22,1,business,1991,2,1,1,1,2,2,2,2,3,1,3,3,3,2,13,18.0,0
60549,0,1,30,1,business,862,3,3,3,3,5,5,5,5,5,5,5,4,4,5,10,0.0,1
5960,1,1,26,0,economy,1250,2,3,2,3,2,2,2,2,3,1,2,4,3,2,3,7.0,0
73638,0,1,26,1,business,1811,5,5,5,5,5,5,4,5,4,2,4,3,5,5,0,0.0,1
35451,0,1,8,0,economy,1056,2,3,2,3,4,2,4,4,2,5,4,3,3,4,0,0.0,0
52319,1,0,30,1,business,493,4,4,4,1,1,4,1,1,2,2,1,2,2,1,0,0.0,0
