In [1]:

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score



In [2]:
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
df = pd.read_csv("satisfaction_log_no_outliers.csv")

In [4]:
print(df.dtypes)

id                                     int64
satisfaction_v2                       object
Gender                                object
Customer Type                         object
Age                                    int64
Type of Travel                        object
Class                                 object
Flight Distance                      float64
Inflight wifi service                  int64
Departure/Arrival time convenient      int64
Ease of Online booking                 int64
Gate location                          int64
Food and drink                         int64
Online boarding                        int64
Seat comfort                           int64
Inflight entertainment                 int64
On-board service                       int64
Leg room service                       int64
Baggage handling                       int64
Checkin service                        int64
Inflight service                       int64
Cleanliness                            int64
Departure 

In [5]:
df.isna().sum()


id                                   0
satisfaction_v2                      0
Gender                               0
Customer Type                        0
Age                                  0
Type of Travel                       0
Class                                0
Flight Distance                      0
Inflight wifi service                0
Departure/Arrival time convenient    0
Ease of Online booking               0
Gate location                        0
Food and drink                       0
Online boarding                      0
Seat comfort                         0
Inflight entertainment               0
On-board service                     0
Leg room service                     0
Baggage handling                     0
Checkin service                      0
Inflight service                     0
Cleanliness                          0
Departure Delay in Minutes           0
Arrival Delay in Minutes             0
dtype: int64

In [6]:
df.drop(columns=['id'], inplace=True)

In [7]:
print(df['Class'].value_counts())

Class
Business    61975
Eco         58099
Eco Plus     9378
Name: count, dtype: int64


In [8]:
le = LabelEncoder()
df['Class'] = le.fit_transform(df['Class'])

In [9]:
df = pd.get_dummies(df, columns=['Gender', 'Customer Type', 'Type of Travel'], drop_first=True)


In [10]:
df['satisfaction_v2'] = df['satisfaction_v2'].map({'satisfied': 1, 'neutral or dissatisfied': 0})


In [11]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
satisfaction_v2,129452.0,0.434532,0.495697,0.0,0.0,0.0,1.0,1.0
Age,129452.0,39.430036,15.117662,7.0,27.0,40.0,51.0,85.0
Class,129452.0,0.593695,0.621379,0.0,0.0,1.0,1.0,2.0
Flight Distance,129452.0,6.706453,0.915257,4.043051,6.028279,6.739337,7.46451,8.513988
Inflight wifi service,129452.0,2.728602,1.329245,0.0,2.0,3.0,4.0,5.0
Departure/Arrival time convenient,129452.0,3.057303,1.526758,0.0,2.0,3.0,4.0,5.0
Ease of Online booking,129452.0,2.756875,1.401652,0.0,2.0,3.0,4.0,5.0
Gate location,129452.0,2.976964,1.278481,0.0,2.0,3.0,4.0,5.0
Food and drink,129452.0,3.204833,1.329845,0.0,2.0,3.0,4.0,5.0
Online boarding,129452.0,3.252773,1.350596,0.0,2.0,3.0,4.0,5.0


In [12]:
df['satisfaction_v2'].value_counts()

satisfaction_v2
0    73201
1    56251
Name: count, dtype: int64

In [13]:
Input_data = df.drop(columns='satisfaction_v2', axis=1)
Labels = df['satisfaction_v2']

In [14]:
Labels

0         1
1         1
2         1
3         1
4         1
         ..
129447    1
129448    1
129449    1
129450    1
129451    1
Name: satisfaction_v2, Length: 129452, dtype: int64

In [15]:
scaler = StandardScaler()

In [16]:
scaler.fit(Input_data)

In [17]:
standardized_data = scaler.transform(Input_data)

In [18]:
print(standardized_data)

[[ 1.09607085  0.65387884 -0.86637244 ...  1.01489634  2.11182527
   1.49466925]
 [ 0.63303449  0.65387884  1.21537835 ...  1.01489634  2.11182527
   1.49466925]
 [ 1.0299228   0.65387884  0.62515545 ...  1.01489634  2.11182527
   1.49466925]
 ...
 [ 0.43459033 -0.95545104 -2.44800539 ...  1.01489634 -0.47352402
  -0.66904434]
 [-0.29303824 -0.95545104 -2.48634594 ... -0.9853223  -0.47352402
  -0.66904434]
 [ 0.50073838 -0.95545104  1.71808457 ... -0.9853223  -0.47352402
  -0.66904434]]


In [19]:
Input_data = standardized_data

In [20]:
X_train, X_test, Y_train, Y_test = train_test_split(Input_data, Labels, test_size=0.2, stratify=Labels, random_state=2)

In [21]:
Input_data.shape, X_train.shape, X_test.shape

((129452, 22), (103561, 22), (25891, 22))

In [22]:

#training the model
classifier_linear = svm.SVC( C=1.0 ,kernel='linear')

In [23]:
df.isna().sum()


satisfaction_v2                      0
Age                                  0
Class                                0
Flight Distance                      0
Inflight wifi service                0
Departure/Arrival time convenient    0
Ease of Online booking               0
Gate location                        0
Food and drink                       0
Online boarding                      0
Seat comfort                         0
Inflight entertainment               0
On-board service                     0
Leg room service                     0
Baggage handling                     0
Checkin service                      0
Inflight service                     0
Cleanliness                          0
Departure Delay in Minutes           0
Arrival Delay in Minutes             0
Gender_Male                          0
Customer Type_disloyal Customer      0
Type of Travel_Personal Travel       0
dtype: int64

In [24]:


def train_and_evaluate_svc(X_train, Y_train, X_test=None, Y_test=None, kernel='sigmoid', C=1.0):
     
    # 1. Create the SVC with the chosen kernel and C
    classifier = svm.SVC(kernel=kernel, C=C)

    # 2. Fit on the training data
    classifier.fit(X_train, Y_train)

    # 3. Evaluate on the training data
    train_preds = classifier.predict(X_train)
    train_accuracy = accuracy_score(Y_train, train_preds)
    train_precision = precision_score(Y_train, train_preds, average='macro')
    train_recall = recall_score(Y_train, train_preds, average='macro')
    train_f1 = f1_score(Y_train, train_preds, average='macro')

    # 4. Evaluate on the test data (if provided)
    test_accuracy = None
    test_precision = None
    test_recall = None
    test_f1 = None

    if X_test is not None and Y_test is not None:
        test_preds = classifier.predict(X_test)
        test_accuracy = accuracy_score(Y_test, test_preds)
        test_precision = precision_score(Y_test, test_preds, average='macro')
        test_recall = recall_score(Y_test, test_preds, average='macro')
        test_f1 = f1_score(Y_test, test_preds, average='macro')

    # Print metrics
    print(f"Kernel: {kernel}, C: {C}")
    print(f"Training Accuracy: {train_accuracy:.4f}")
    print(f"Training Precision (macro): {train_precision:.4f}")
    print(f"Training Recall (macro): {train_recall:.4f}")
    print(f"Training F1-score (macro): {train_f1:.4f}")

    if test_accuracy is not None:
        print(f"Test Accuracy: {test_accuracy:.4f}")
        print(f"Test Precision (macro): {test_precision:.4f}")
        print(f"Test Recall (macro): {test_recall:.4f}")
        print(f"Test F1-score (macro): {test_f1:.4f}")

    # Return the classifier and metrics
    return (
        classifier,
        (train_accuracy, train_precision, train_recall, train_f1),
        (test_accuracy, test_precision, test_recall, test_f1),
    )


In [25]:
# Example usage:
clf, (train_acc, train_prec, train_rec, train_f1), (test_acc, test_prec, test_rec, test_f1) = \
    train_and_evaluate_svc(X_train, Y_train, X_test, Y_test, kernel='sigmoid', C=1.0)


Kernel: sigmoid, C: 1.0
Training Accuracy: 0.7895
Training Precision (macro): 0.7859
Training Recall (macro): 0.7858
Training F1-score (macro): 0.7858
Test Accuracy: 0.7882
Test Precision (macro): 0.7845
Test Recall (macro): 0.7839
Test F1-score (macro): 0.7842


In [24]:

clf, (train_acc, train_prec, train_rec, train_f1), (test_acc, test_prec, test_rec, test_f1) = \
    train_and_evaluate_svc(X_train, Y_train, X_test, Y_test, kernel='rbf', C=1.0)

Kernel: rbf, C: 1.0
Training Accuracy: 0.9589
Training Precision (macro): 0.9597
Training Recall (macro): 0.9567
Training F1-score (macro): 0.9581
Test Accuracy: 0.9531
Test Precision (macro): 0.9536
Test Recall (macro): 0.9508
Test F1-score (macro): 0.9521


In [25]:

clf, (train_acc, train_prec, train_rec, train_f1), (test_acc, test_prec, test_rec, test_f1) = \
    train_and_evaluate_svc(X_train, Y_train, X_test, Y_test, kernel='poly', C=1.0)

Kernel: poly, C: 1.0
Training Accuracy: 0.9463
Training Precision (macro): 0.9470
Training Recall (macro): 0.9436
Training F1-score (macro): 0.9451
Test Accuracy: 0.9404
Test Precision (macro): 0.9411
Test Recall (macro): 0.9375
Test F1-score (macro): 0.9391
