In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow

In [2]:
df= pd.read_csv('cust_satisfaction.csv')
df.head()

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,Male,Loyal Customer,Personal Travel,Eco Plus,neutral or dissatisfied,13,460,5,4,5,25,18.0
1,Male,disloyal Customer,Business travel,Business,neutral or dissatisfied,25,235,1,3,1,1,6.0
2,Female,Loyal Customer,Business travel,Business,satisfied,26,1142,5,4,5,0,0.0
3,Female,Loyal Customer,Business travel,Business,neutral or dissatisfied,25,562,2,3,2,11,9.0
4,Male,Loyal Customer,Business travel,Business,satisfied,61,214,3,4,3,0,0.0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103904 entries, 0 to 103903
Data columns (total 12 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   Gender                      103904 non-null  object 
 1   Customer Type               103904 non-null  object 
 2   Type of Travel              103904 non-null  object 
 3   Class                       103904 non-null  object 
 4   satisfaction                103904 non-null  object 
 5   Age                         103904 non-null  int64  
 6   Flight Distance             103904 non-null  int64  
 7   Inflight entertainment      103904 non-null  int64  
 8   Baggage handling            103904 non-null  int64  
 9   Cleanliness                 103904 non-null  int64  
 10  Departure Delay in Minutes  103904 non-null  int64  
 11  Arrival Delay in Minutes    103594 non-null  float64
dtypes: float64(1), int64(6), object(5)
memory usage: 9.5+ MB


In [4]:
df.isnull().sum()
df.dropna(inplace=True)


In [5]:

df.duplicated().sum()
df.drop_duplicates(inplace=True)

In [6]:
df["Customer Type"].value_counts()

Customer Type
Loyal Customer       84517
disloyal Customer    18905
Name: count, dtype: int64

In [7]:
loyal_customer = df[df["Customer Type"] == "Loyal Customer"]
disloyal_customer = df[df["Customer Type"] == "disloyal Customer"]

In [8]:
loyal_customer=loyal_customer.sample(20000)
loyal_customer.shape

(20000, 12)

In [9]:
balanced_df = pd.concat([loyal_customer, disloyal_customer], axis=0)
balanced_df.shape

(38905, 12)

In [10]:
cat_col= balanced_df.select_dtypes(include=['object'])
cat_col.head()

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction
70987,Male,Loyal Customer,Business travel,Business,neutral or dissatisfied
98776,Female,Loyal Customer,Business travel,Eco,satisfied
82659,Male,Loyal Customer,Personal Travel,Eco,neutral or dissatisfied
87389,Female,Loyal Customer,Business travel,Eco,neutral or dissatisfied
95160,Female,Loyal Customer,Business travel,Business,satisfied


In [11]:
num_col= balanced_df.select_dtypes(exclude=['object'])
num_col.head()

Unnamed: 0,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
70987,23,621,5,3,5,0,0.0
98776,46,533,5,5,3,0,0.0
82659,42,370,5,3,5,20,10.0
87389,30,196,1,3,1,0,0.0
95160,60,2564,4,5,4,0,0.0


In [12]:
pd.get_dummies(cat_col, drop_first=True).astype(int).head()

Unnamed: 0,Gender_Male,Customer Type_disloyal Customer,Type of Travel_Personal Travel,Class_Eco,Class_Eco Plus,satisfaction_satisfied
70987,1,0,0,0,0,0
98776,0,0,0,1,0,1
82659,1,0,1,1,0,0
87389,0,0,0,1,0,0
95160,0,0,0,0,0,1


In [13]:
# ## one hot encoding
# cat_col = pd.get_dummies(cat_col, drop_first=True).astype(int)           # drop_first=True avoids dummy variable trap
# cat_col

In [14]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
ohe= OneHotEncoder(drop="if_binary")
cat_col_encoded = ohe.fit_transform(cat_col).toarray()
cat_col_encoded

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 1.],
       [1., 0., 1., ..., 1., 0., 0.],
       ...,
       [0., 1., 0., ..., 1., 0., 0.],
       [1., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 1., 0., 0.]])

In [15]:
column_name=list(ohe.get_feature_names_out())
column_name

['Gender_Male',
 'Customer Type_disloyal Customer',
 'Type of Travel_Personal Travel',
 'Class_Business',
 'Class_Eco',
 'Class_Eco Plus',
 'satisfaction_satisfied']

In [16]:
one_hot = pd.DataFrame(cat_col_encoded,columns=column_name)
one_hot.head()

Unnamed: 0,Gender_Male,Customer Type_disloyal Customer,Type of Travel_Personal Travel,Class_Business,Class_Eco,Class_Eco Plus,satisfaction_satisfied
0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,1.0
2,1.0,0.0,1.0,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,1.0


In [17]:
one_hot= one_hot.reset_index(drop=True)
num_col= num_col.reset_index(drop=True)
# final_df=pd.concat([one_hot,num_col],axis=1)
# final_df.head()

In [18]:
final_df=pd.concat([one_hot,num_col],axis=1)
final_df.head()

Unnamed: 0,Gender_Male,Customer Type_disloyal Customer,Type of Travel_Personal Travel,Class_Business,Class_Eco,Class_Eco Plus,satisfaction_satisfied,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,23,621,5,3,5,0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,1.0,46,533,5,5,3,0,0.0
2,1.0,0.0,1.0,0.0,1.0,0.0,0.0,42,370,5,3,5,20,10.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,30,196,1,3,1,0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,1.0,60,2564,4,5,4,0,0.0


In [19]:
## x and y --> train test split ---> algo traning

In [20]:
# matrix = final_df.corr()
# matrix       

In [21]:
## trainning and testing data
from sklearn.model_selection import train_test_split
## divide the data into x and y or independent and dependent variable
x =final_df.drop("Customer Type_disloyal Customer",axis=1)
y= final_df["Customer Type_disloyal Customer"]
x_train,x_test,y_train,y_test=train_test_split(x,y,
                                                test_size=0.2) 

In [22]:
# Deep Learning

from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train_scaled=sc.fit_transform(x_train)
x_test_scaled=sc.transform(x_test)

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense      # dense is work of InputLayer outputLayer and hidden layer

In [24]:
x_train.shape[1]

13

In [25]:
## define your ANN model
model = Sequential()
# input layer
## 68 --> no of neurons --> genral connverdation -- 128
model.add(Dense(68, activation='relu', input_dim=(x_train_scaled.shape[1])))  # input layer with 68 neurons and relu activation function  
# hidden layer
model.add(Dense(32, activation='relu'))  # hidden layer with 32 neurons and relu activation function
model.add(Dense(24, activation='relu'))  # hidden layer with 32 neurons and relu activation function
model.add(Dense(12, activation='relu'))  # hidden layer with 32 neurons and relu activation function 
# output layeer
model.add(Dense(1, activation='sigmoid'))  # output layer with 1 neuron and sigmoid activation function for binary classification

## compile the model
model.compile(optimizer='adam',
               loss='binary_crossentropy',
                 metrics=['accuracy'])  # in compie we define the optimizer, loss function and metrics

model.summary()  # to see the summary of the model

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
(13+1)*68 

952

In [27]:
history= model.fit(x_train_scaled, y_train, 
                   epochs=10,
                     validation_data=(x_test_scaled,y_test))  # training the model with 100 epochs and batch size of 32 and validation split of 0.2

Epoch 1/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8640 - loss: 0.3344 - val_accuracy: 0.9088 - val_loss: 0.2376
Epoch 2/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9101 - loss: 0.2342 - val_accuracy: 0.9100 - val_loss: 0.2258
Epoch 3/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9113 - loss: 0.2260 - val_accuracy: 0.9143 - val_loss: 0.2123
Epoch 4/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9179 - loss: 0.2121 - val_accuracy: 0.9165 - val_loss: 0.2153
Epoch 5/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9203 - loss: 0.2055 - val_accuracy: 0.9192 - val_loss: 0.2057
Epoch 6/10
[1m973/973[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9218 - loss: 0.2003 - val_accuracy: 0.9176 - val_loss: 0.2037
Epoch 7/10
[1m973/973[0m 

In [28]:
## prediction
y_pred = model.predict(x_test_scaled)  # predicting the test data
prediction_label=(y_pred>0.5).astype(int).ravel()  # converting the predicted values to 0 or 1 based on the threshold of 0.5
prediction_label


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 740us/step


array([1, 0, 0, ..., 1, 0, 1])

In [29]:
model.save('model.h5')  # saving the model





In [30]:
from tensorflow.keras.models import load_model
model_load= load_model('model.h5')  # loading the saved model

