In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
from plotly.colors import qualitative
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
# Read the dataset
df = pd.read_csv("dataset/satisfaction.csv")
# Dropping the id column as it is not necessary
df = df.drop(columns=['id'])

# Renaming the columns for better readability
df = df.rename(columns={'satisfaction_v2': 'satisfaction'})
df = df.rename(columns={'Departure/Arrival time convenient': 'departure_arrival_time_convenient'})

# Lowercase all column names and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')
df = df.dropna(axis=0)

df.head()

Unnamed: 0,satisfaction,gender,customer_type,age,type_of_travel,class,flight_distance,seat_comfort,departure_arrival_time_convenient,food_and_drink,...,online_support,ease_of_online_booking,on-board_service,leg_room_service,baggage_handling,checkin_service,cleanliness,online_boarding,departure_delay_in_minutes,arrival_delay_in_minutes
0,satisfied,Female,Loyal Customer,65,Personal Travel,Eco,265,0,0,0,...,2,3,3,0,3,5,3,2,0,0.0
1,satisfied,Male,Loyal Customer,47,Personal Travel,Business,2464,0,0,0,...,2,3,4,4,4,2,3,2,310,305.0
2,satisfied,Female,Loyal Customer,15,Personal Travel,Eco,2138,0,0,0,...,2,2,3,3,4,4,4,2,0,0.0
3,satisfied,Female,Loyal Customer,60,Personal Travel,Eco,623,0,0,0,...,3,1,1,0,1,4,1,3,0,0.0
4,satisfied,Female,Loyal Customer,70,Personal Travel,Eco,354,0,0,0,...,4,2,2,0,2,4,2,5,0,0.0


In [3]:
df['satisfaction'] = df['satisfaction'].map({'neutral or dissatisfied':0 , 'satisfied':1})
df['customer_type'] = df['customer_type'].map({'Loyal Customer':1, 'disloyal Customer':0})
df['type_of_travel'] = df['type_of_travel'].map({'Personal Travel':0, 'Business travel':1})
df['class'] = df['class'].map({'Eco':0, 'Eco Plus':1, 'Business':2})
df = df.drop(columns=['gender'])

In [4]:
# Data Splitting
X = df.drop(columns=['satisfaction'])
y = df['satisfaction']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

In [5]:
# Scaling the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
model = GradientBoostingClassifier(n_estimators=500) # Creating the model

model.fit(X_train, y_train) # Training the model

y_pred = model.predict(X_test) # Making predictions

In [7]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

cm = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix: \n{cm}")

print(classification_report(y_test, y_pred))

Accuracy: 0.9438566684686076
Confusion Matrix: 
[[5557  325]
 [ 402 6665]]
              precision    recall  f1-score   support

           0       0.93      0.94      0.94      5882
           1       0.95      0.94      0.95      7067

    accuracy                           0.94     12949
   macro avg       0.94      0.94      0.94     12949
weighted avg       0.94      0.94      0.94     12949



In [8]:
from sklearn.metrics import roc_curve, auc

# Some post training visualizations
# Feature Importance
# Feature Importance
feature_importance = model.feature_importances_
sorted_idx = np.argsort(feature_importance)[::-1]

fig = px.bar(x=X.columns[sorted_idx], y=feature_importance[sorted_idx], labels={'x':'Feature', 'y':'Importance'}, title='Feature Importance')
fig.show()

# Confusion Matrix
fig = px.imshow(cm, labels=dict(x="Predicted", y="Actual", color="Count"), x=['Neutral or Dissatisfied', 'Satisfied'], y=['Neutral or Dissatisfied', 'Satisfied'])
fig.show()

# ROC Curve
y_pred_proba = model.predict_proba(X_test)[:,1]
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

fig = px.area(
    x=fpr, y=tpr,
    title=f'ROC Curve (AUC={roc_auc:.4f})',
    labels=dict(x='False Positive Rate', y='True Positive Rate'),
    width=700, height=500
)
fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], line=dict(color='firebrick', dash='dash'), name='diagonal'))
fig.show()