In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.inspection import permutation_importance

In [None]:
# Load the "Airline Passenger Satisfaction" dataset
data = pd.read_csv('test.csv')

In [None]:
data.head()

In [None]:
data.shape

In [None]:
data.isnull().sum()

In [None]:
data.dropna(subset=['Arrival Delay in Minutes'], inplace=True)
data = data.drop(columns=['id'])
data=data.drop(columns=['Unnamed: 0'])

In [None]:
data.isnull().sum()

In [None]:
label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])
data['Customer Type'] = label_encoder.fit_transform(data['Customer Type'])
data['Type of Travel'] = label_encoder.fit_transform(data['Type of Travel'])
data['Class'] = label_encoder.fit_transform(data['Class'])

In [None]:
# Assuming the target variable is in a column named 'satisfaction'
X = data.drop(columns=['satisfaction'])
y = data['satisfaction']

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [None]:
# Create an SVM classifier with an Linear kernel
svm_classifier = SVC(kernel='linear', random_state=42)

# Train the SVM classifier on the training data
svm_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = svm_classifier.predict(X_test)

# Calculate and print accuracy
lin_accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {lin_accuracy * 100:.2f}%")
print(f"Classification Report: {classification_report(y_test, y_pred)}")

In [None]:
cm = confusion_matrix(y_test, y_pred)

# Plot Confusion Matrix
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.title("Confusion Matrix - Linear Kernel SVM")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
# Train the model
model = SVC(kernel='linear')
model.fit(X_train, y_train)

# Use a subset of test data
X_test_sample = X_test[:1000]
y_test_sample = y_test[:1000]

# Get permutation importance
result = permutation_importance(model, X_test_sample, y_test_sample, n_repeats=5, random_state=42)

# Create and sort a DataFrame of feature importances
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': result.importances_mean
}).sort_values(by='Importance', ascending=True)  # ascending=True so most important is at the bottom

# Plot
plt.figure(figsize=(10, 6))
plt.barh(importance_df['Feature'], importance_df['Importance'])
plt.xlabel("Mean Decrease in Accuracy")
plt.title("Permutation Feature Importance for Linear Kernel")
plt.tight_layout()
plt.show()

In [None]:
# Printing the weights and intercept of the model
print(f"w: {svm_classifier.coef_}");
print(f"b: {svm_classifier.intercept_}");

In [None]:
# Create an SVM classifier with an Polynomial kernel
svm_classifier = SVC(kernel='poly', random_state=42)

# Train the SVM classifier on the training data
svm_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = svm_classifier.predict(X_test)

# Calculate and print accuracy
poly_accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {poly_accuracy * 100:.2f}%")
print(f"Classification Report: {classification_report(y_test, y_pred)}")

In [None]:
cm = confusion_matrix(y_test, y_pred)

# Plot Confusion Matrix
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.title("Confusion Matrix - Polynomial Kernel SVM")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
# Train the model
model = SVC(kernel='poly')
model.fit(X_train, y_train)

# Get permutation importance
result = permutation_importance(model, X_test_sample, y_test_sample, n_repeats=5, random_state=42)

# Create and sort a DataFrame of feature importances
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': result.importances_mean
}).sort_values(by='Importance', ascending=True)  # ascending=True so most important is at the bottom

# Plot
plt.figure(figsize=(10, 6))
plt.barh(importance_df['Feature'], importance_df['Importance'])
plt.xlabel("Mean Decrease in Accuracy")
plt.ylabel("Features")
plt.title("Permutation Feature Importance for Polynomial Kernel")
plt.tight_layout()
plt.show()

In [None]:
# Create an SVM classifier with an RBF kernel
svm_classifier = SVC(kernel='rbf', random_state=42)

# Train the SVM classifier on the training data
svm_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = svm_classifier.predict(X_test)

# Calculate and print accuracy
rbf_accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {rbf_accuracy * 100:.2f}%")
print(f"Classification Report: {classification_report(y_test, y_pred)}")

In [None]:
cm = confusion_matrix(y_test, y_pred)

# Plot Confusion Matrix
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.title("Confusion Matrix - RBF Kernel SVM")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
# Train the model
model = SVC(kernel='rbf')
model.fit(X_train, y_train)

# Get permutation importance
result = permutation_importance(model, X_test_sample, y_test_sample, n_repeats=5, random_state=42)

# Create and sort a DataFrame of feature importances
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': result.importances_mean
}).sort_values(by='Importance', ascending=True)  # ascending=True so most important is at the bottom

# Plot
plt.figure(figsize=(10, 6))
plt.barh(importance_df['Feature'], importance_df['Importance'])
plt.xlabel("Mean Decrease in Accuracy")
plt.ylabel("Features")
plt.title("Permutation Feature Importance for RBF Kernel")
plt.tight_layout()
plt.show()

In [None]:
# Accuracy values dictionary
accuracies = {
    'Linear': lin_accuracy,
    'Polynomial': poly_accuracy,
    'RBF': rbf_accuracy
}

# Plotting
plt.figure(figsize=(8, 5))
bars = plt.bar(accuracies.keys(), [v * 100 for v in accuracies.values()], color=['#1f77b4', '#ff7f0e', '#2ca02c'])
plt.ylabel("Accuracy (%)")
plt.title("SVM Kernel Accuracy Comparison")
plt.ylim(0, 100)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Add value labels
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2.0, yval + 1, f'{yval:.2f}%', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()
