In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.svm import SVC

In [2]:
data = pd.read_csv("train_cluster_1.csv")
data2 = pd.read_csv("test_cluster_1.csv")

In [3]:
x_train = data.drop('booking_status', axis=1)
y_train = data['booking_status']

x_test = data2.drop('booking_status', axis=1)
y_test = data2['booking_status']

In [4]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_train = le.fit_transform(y_train)
le.classes_

y_test = le.fit_transform(y_test)
le.classes_

array(['Canceled', 'Not_Canceled'], dtype=object)

In [5]:


from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

num_cols = ['lead_time', 'avg_price_per_room']
cat_cols = ['type_of_meal_plan', 'room_type_reserved', 'market_segment_type']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_cols),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), cat_cols)
    ],
    remainder='passthrough'
)

# Fit on train, transform test
x_train = preprocessor.fit_transform(x_train)
x_test = preprocessor.transform(x_test)




In [6]:

from imblearn.over_sampling import SMOTE

# Initialize SMOTE
smote = SMOTE(random_state=42)

# Apply only on training data
x_train, y_train = smote.fit_resample(x_train, y_train)

In [10]:
svm_classifier = SVC(kernel='linear',C=0.5, random_state=42)
svm_classifier.fit(x_train,y_train)


0,1,2
,C,0.5
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [11]:
#Make Predictions
y_pred_test = svm_classifier.predict(x_test)
y_pred_train = svm_classifier.predict(x_train)

In [12]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#Check accuracy 
print("Test Accuracy:", accuracy_score(y_test,y_pred_test))

#Detailed report
print("Test Classification Report:\n", classification_report(y_test, y_pred_test))

#Confution Matrix
print("Test Confution Matrix:\n", confusion_matrix(y_test,y_pred_test))

print()
#Check accuracy 
print("Train Accuracy:", accuracy_score(y_train,y_pred_train))

#Detailed report
print("Train Classification Report:\n", classification_report(y_train, y_pred_train))

#Confution Matrix
print("Train Confution Matrix:\n", confusion_matrix(y_train,y_pred_train))

Test Accuracy: 0.7238225880201189
Test Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.76      0.79      1504
           1       0.55      0.64      0.59       683

    accuracy                           0.72      2187
   macro avg       0.69      0.70      0.69      2187
weighted avg       0.74      0.72      0.73      2187

Test Confution Matrix:
 [[1145  359]
 [ 245  438]]

Train Accuracy: 0.7758545135845749
Train Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.74      0.77      4564
           1       0.76      0.81      0.78      4564

    accuracy                           0.78      9128
   macro avg       0.78      0.78      0.78      9128
weighted avg       0.78      0.78      0.78      9128

Train Confution Matrix:
 [[3369 1195]
 [ 851 3713]]


In [9]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'C': [0.1, 0.5, 1, 5, 10],
    'gamma': ['scale', 'auto']  # only relevant for rbf/poly/sigmoid
}

# Create the base model
svm = SVC(class_weight='balanced', random_state=42)

# Grid search with 5-fold cross-validation
grid_search = GridSearchCV(
    estimator=svm,
    param_grid=param_grid,
    scoring='accuracy',
    cv=5,
    n_jobs=-1,
    verbose=2
)

# Fit on training data
grid_search.fit(x_train, y_train)

# Best parameters
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Val Accuracy:", grid_search.best_score_)


Fitting 5 folds for each of 40 candidates, totalling 200 fits


KeyboardInterrupt: 

In [None]:
# Evaluate best model
best_svm = grid_search.best_estimator_

# Predict
y_pred_test = best_svm.predict(x_test)
y_pred_train = best_svm.predict(x_train)

# Test results
print("\nTest Accuracy:", accuracy_score(y_test, y_pred_test))
print("Test Classification Report:\n", classification_report(y_test, y_pred_test))
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_pred_test))

# Train results
print("\nTrain Accuracy:", accuracy_score(y_train, y_pred_train))
print("Train Classification Report:\n", classification_report(y_train, y_pred_train))
print("Train Confusion Matrix:\n", confusion_matrix(y_train, y_pred_train))