In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("cust_satisfaction.csv")
df.head()

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,Male,Loyal Customer,Personal Travel,Eco Plus,neutral or dissatisfied,13,460,5,4,5,25,18.0
1,Male,disloyal Customer,Business travel,Business,neutral or dissatisfied,25,235,1,3,1,1,6.0
2,Female,Loyal Customer,Business travel,Business,satisfied,26,1142,5,4,5,0,0.0
3,Female,Loyal Customer,Business travel,Business,neutral or dissatisfied,25,562,2,3,2,11,9.0
4,Male,Loyal Customer,Business travel,Business,satisfied,61,214,3,4,3,0,0.0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103904 entries, 0 to 103903
Data columns (total 12 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   Gender                      103904 non-null  object 
 1   Customer Type               103904 non-null  object 
 2   Type of Travel              103904 non-null  object 
 3   Class                       103904 non-null  object 
 4   satisfaction                103904 non-null  object 
 5   Age                         103904 non-null  int64  
 6   Flight Distance             103904 non-null  int64  
 7   Inflight entertainment      103904 non-null  int64  
 8   Baggage handling            103904 non-null  int64  
 9   Cleanliness                 103904 non-null  int64  
 10  Departure Delay in Minutes  103904 non-null  int64  
 11  Arrival Delay in Minutes    103594 non-null  float64
dtypes: float64(1), int64(6), object(5)
memory usage: 9.5+ MB


In [4]:
df.duplicated().sum()

np.int64(172)

In [5]:
df.drop_duplicates(inplace=True)

In [6]:
df.duplicated().sum()

np.int64(0)

In [7]:
df.isnull().sum()
df.dropna(inplace=True)

In [8]:
df["Customer Type"].value_counts()

Customer Type
Loyal Customer       84517
disloyal Customer    18905
Name: count, dtype: int64

In [9]:
loyal_customer = df[df["Customer Type"] == "Loyal Customer"]
disloyal_customer = df[df["Customer Type"] == "disloyal Customer"]

In [10]:
loyal_customer = loyal_customer.sample(20000)
loyal_customer.shape

(20000, 12)

In [11]:
balance_df = pd.concat([loyal_customer, disloyal_customer],axis=0)
balance_df.shape

(38905, 12)

In [12]:
cat_col = balance_df.select_dtypes(include=["object"])
cat_col.head()

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction
75430,Male,Loyal Customer,Business travel,Business,satisfied
85979,Female,Loyal Customer,Business travel,Eco Plus,satisfied
13224,Female,Loyal Customer,Personal Travel,Eco,neutral or dissatisfied
102958,Male,Loyal Customer,Business travel,Business,neutral or dissatisfied
81327,Male,Loyal Customer,Personal Travel,Eco Plus,neutral or dissatisfied


In [13]:
num_col = balance_df.select_dtypes(exclude=["object"])
num_col.head()

Unnamed: 0,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
75430,47,989,5,5,4,24,0.0
85979,33,844,4,5,4,0,0.0
13224,37,1744,2,5,2,0,0.0
102958,37,303,1,1,2,123,116.0
81327,64,862,4,5,4,20,14.0


In [14]:
pd.get_dummies(cat_col, drop_first=True).astype(int).head()

Unnamed: 0,Gender_Male,Customer Type_disloyal Customer,Type of Travel_Personal Travel,Class_Eco,Class_Eco Plus,satisfaction_satisfied
75430,1,0,0,0,0,1
85979,0,0,0,0,1,1
13224,0,0,1,1,0,0
102958,1,0,0,0,0,0
81327,1,0,1,0,1,0


In [15]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
ohe = OneHotEncoder(drop="if_binary")
cat_col_encoded = ohe.fit_transform(cat_col).toarray()
cat_col_encoded

array([[1., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 1., 1.],
       [0., 0., 1., ..., 1., 0., 0.],
       ...,
       [0., 1., 0., ..., 1., 0., 0.],
       [1., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 1., 0., 0.]])

In [16]:
column_names = list(ohe.get_feature_names_out())
column_names

['Gender_Male',
 'Customer Type_disloyal Customer',
 'Type of Travel_Personal Travel',
 'Class_Business',
 'Class_Eco',
 'Class_Eco Plus',
 'satisfaction_satisfied']

In [17]:
one_hot = pd.DataFrame(cat_col_encoded, columns=column_names)
one_hot.head()

Unnamed: 0,Gender_Male,Customer Type_disloyal Customer,Type of Travel_Personal Travel,Class_Business,Class_Eco,Class_Eco Plus,satisfaction_satisfied
0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,1.0,0.0,0.0,1.0,0.0,0.0,0.0
4,1.0,0.0,1.0,0.0,0.0,1.0,0.0


In [18]:
one_hot = one_hot.reset_index(drop=True)
num_col = num_col.reset_index(drop=True)

In [19]:
final_df = pd.concat([num_col, one_hot], axis=1)
final_df

Unnamed: 0,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,Gender_Male,Customer Type_disloyal Customer,Type of Travel_Personal Travel,Class_Business,Class_Eco,Class_Eco Plus,satisfaction_satisfied
0,47,989,5,5,4,24,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
1,33,844,4,5,4,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2,37,1744,2,5,2,0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,37,303,1,1,2,123,116.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
4,64,862,4,5,4,20,14.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38900,37,596,3,3,3,110,121.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
38901,24,1055,1,5,1,13,10.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
38902,23,192,2,4,2,3,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
38903,30,1995,4,4,4,7,14.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0


In [20]:
final_df.columns

Index(['Age', 'Flight Distance', 'Inflight entertainment', 'Baggage handling',
       'Cleanliness', 'Departure Delay in Minutes', 'Arrival Delay in Minutes',
       'Gender_Male', 'Customer Type_disloyal Customer',
       'Type of Travel_Personal Travel', 'Class_Business', 'Class_Eco',
       'Class_Eco Plus', 'satisfaction_satisfied'],
      dtype='object')

In [21]:
# x and y ---> train test split ---> algo training
from sklearn.model_selection import train_test_split
x = final_df.drop("Customer Type_disloyal Customer", axis=1)
y = final_df["Customer Type_disloyal Customer"]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# ANN

In [22]:
# standard scaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [24]:
x_train.shape[1]  # it give the columns

13

In [25]:
# Define your ANN model
model = Sequential()
# Input layer
# Hidden layer with 68 neurons and ReLU activation function--- general convention --- 128   
model.add(Dense(68, activation='relu', input_dim=(x_train.shape[1])))
# Hidden Layer 
model.add(Dense(32, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(12, activation='relu'))
# Output layer with 1 neuron and sigmoid activation function
model.add(Dense(1, activation='sigmoid'))
# Compile the model
model.compile(optimizer='adam',
               loss='binary_crossentropy',
                 metrics=['accuracy'])

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
(13+1)* 68 # calulate the weights and biases for the first layer
(68+1)* 32 # calulate the weights and biases for the second layer

2208

In [27]:
# Train the model
history = model.fit(x_train, y_train,
                     epochs=10,
                       validation_data=(x_test, y_test))

Epoch 1/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8550 - loss: 0.3477 - val_accuracy: 0.9019 - val_loss: 0.2549
Epoch 2/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9060 - loss: 0.2376 - val_accuracy: 0.9099 - val_loss: 0.2311
Epoch 3/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9149 - loss: 0.2201 - val_accuracy: 0.9115 - val_loss: 0.2233
Epoch 4/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9160 - loss: 0.2170 - val_accuracy: 0.9113 - val_loss: 0.2219
Epoch 5/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9208 - loss: 0.2038 - val_accuracy: 0.9157 - val_loss: 0.2138
Epoch 6/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9222 - loss: 0.1977 - val_accuracy: 0.9147 - val_loss: 0.2161
Epoch 7/10
[1m973/973[0m 

In [28]:
# Predicting the test data
y_pred = model.predict(x_test)
prediction_label = (y_pred> 0.5).astype(int).ravel()
prediction_label


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 599us/step


array([1, 0, 1, ..., 0, 1, 1])

In [29]:
model.save("model_classification.h5")



In [30]:
from tensorflow.keras.models import load_model
model_load  = load_model("model_classification.h5")

