In [1]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
from plotly.colors import qualitative
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import torch
from torch import nn

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [3]:
# Read the dataset
df = pd.read_csv("dataset/satisfaction.csv")
# Dropping the id column as it is not necessary
df = df.drop(columns=['id'])

# Renaming the columns for better readability
df = df.rename(columns={'satisfaction_v2': 'satisfaction'})
df = df.rename(columns={'Departure/Arrival time convenient': 'departure_arrival_time_convenient'})

# Lowercase all column names and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')
df = df.dropna(axis=0)

df.head()

Unnamed: 0,satisfaction,gender,customer_type,age,type_of_travel,class,flight_distance,seat_comfort,departure_arrival_time_convenient,food_and_drink,...,online_support,ease_of_online_booking,on-board_service,leg_room_service,baggage_handling,checkin_service,cleanliness,online_boarding,departure_delay_in_minutes,arrival_delay_in_minutes
0,satisfied,Female,Loyal Customer,65,Personal Travel,Eco,265,0,0,0,...,2,3,3,0,3,5,3,2,0,0.0
1,satisfied,Male,Loyal Customer,47,Personal Travel,Business,2464,0,0,0,...,2,3,4,4,4,2,3,2,310,305.0
2,satisfied,Female,Loyal Customer,15,Personal Travel,Eco,2138,0,0,0,...,2,2,3,3,4,4,4,2,0,0.0
3,satisfied,Female,Loyal Customer,60,Personal Travel,Eco,623,0,0,0,...,3,1,1,0,1,4,1,3,0,0.0
4,satisfied,Female,Loyal Customer,70,Personal Travel,Eco,354,0,0,0,...,4,2,2,0,2,4,2,5,0,0.0


In [4]:
df['satisfaction'] = df['satisfaction'].map({'neutral or dissatisfied':0 , 'satisfied':1})
df['customer_type'] = df['customer_type'].map({'Loyal Customer':1, 'disloyal Customer':0})
df['type_of_travel'] = df['type_of_travel'].map({'Personal Travel':0, 'Business travel':1})
df['class'] = df['class'].map({'Eco':0, 'Eco Plus':1, 'Business':2})
df = df.drop(columns=['gender'])

In [5]:
# Data Splitting
X = df.drop(columns=['satisfaction'])
y = df['satisfaction']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

In [6]:
# Scaling the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
# Define the neural network architecture
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.sigmoid(out)
        return out

# Set the parameters
input_size = X_train.shape[1]
hidden_size = 512
output_size = 1
num_epochs = 75
learning_rate = 0.001

# Initialize the model, loss function and optimizer
model = NeuralNetwork(input_size, hidden_size, output_size)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Convert numpy arrays to torch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Train the model
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 2 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    y_pred_train = model(X_train_tensor)
    y_pred_train = y_pred_train.round()
    train_accuracy = (y_pred_train.eq(y_train_tensor).sum() / float(y_train_tensor.shape[0])).item()

    y_pred_test = model(X_test_tensor)
    y_pred_test = y_pred_test.round()
    test_accuracy = (y_pred_test.eq(y_test_tensor).sum() / float(y_test_tensor.shape[0])).item()

print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')

  from .autonotebook import tqdm as notebook_tqdm


Epoch [2/75], Loss: 0.5995
Epoch [4/75], Loss: 0.4717
Epoch [6/75], Loss: 0.4144
Epoch [8/75], Loss: 0.3965
Epoch [10/75], Loss: 0.3841
Epoch [12/75], Loss: 0.3691
Epoch [14/75], Loss: 0.3547
Epoch [16/75], Loss: 0.3418
Epoch [18/75], Loss: 0.3299
Epoch [20/75], Loss: 0.3184
Epoch [22/75], Loss: 0.3082
Epoch [24/75], Loss: 0.2995
Epoch [26/75], Loss: 0.2915
Epoch [28/75], Loss: 0.2831
Epoch [30/75], Loss: 0.2744
Epoch [32/75], Loss: 0.2665
Epoch [34/75], Loss: 0.2598
Epoch [36/75], Loss: 0.2536
Epoch [38/75], Loss: 0.2475
Epoch [40/75], Loss: 0.2416
Epoch [42/75], Loss: 0.2362
Epoch [44/75], Loss: 0.2314
Epoch [46/75], Loss: 0.2267
Epoch [48/75], Loss: 0.2221
Epoch [50/75], Loss: 0.2179
Epoch [52/75], Loss: 0.2141
Epoch [54/75], Loss: 0.2104
Epoch [56/75], Loss: 0.2070
Epoch [58/75], Loss: 0.2037
Epoch [60/75], Loss: 0.2007
Epoch [62/75], Loss: 0.1977
Epoch [64/75], Loss: 0.1949
Epoch [66/75], Loss: 0.1923
Epoch [68/75], Loss: 0.1897
Epoch [70/75], Loss: 0.1873
Epoch [72/75], Loss: 0.1