In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.inspection import DecisionBoundaryDisplay


In [3]:
df = pd.read_csv("./data/min_max_scaled")

In [None]:
df.head()

In [None]:
df.info()

In [5]:
df = df.drop(columns=['CLM_AMT', 'Unnamed: 0', 'INCOME','HOME_VAL'])

In [7]:
y = df['CLAIM_FLAG']
x = df.drop(columns=['CLAIM_FLAG'])

In [9]:
x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.2, stratify=y, random_state=42)


In [24]:
svc_model = SVC(kernel='linear', random_state=42, class_weight='balanced')

In [13]:
svc_model

In [15]:
svc_result = svc_model.fit(x_train,y_train)

In [17]:
y_pred = svc_result.predict(x_temp)

In [19]:
accuracy_score(y_temp,y_pred)

0.7777777777777778

In [21]:
print(classification_report(y_temp,y_pred))

              precision    recall  f1-score   support

         0.0       0.80      0.94      0.86      1512
         1.0       0.66      0.34      0.45       549

    accuracy                           0.78      2061
   macro avg       0.73      0.64      0.65      2061
weighted avg       0.76      0.78      0.75      2061



In [23]:
# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 0.1, 1, 10],
    'kernel': ['rbf', 'linear', 'poly']
}

# Perform grid search
grid = GridSearchCV(SVC(), param_grid, refit=True, cv=5)
grid.fit(x_train, y_train)

# Best parameters and score
print("Best Parameters:", grid.best_params_)
print("Best Cross-validation Score:", grid.best_score_)


Best Parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
Best Cross-validation Score: 0.7809726548010856


In [None]:
# Make predictions with the best model
best_svc = grid.best_estimator_
y_test_pred = best_svc.predict(x_temp)

# Evaluate the best model
print("Test Accuracy:", accuracy_score(y_temp, y_test_pred))
print(classification_report(y_temp, y_test_pred))

# Confusion Matrix for Test Set
confusion_mat_test = confusion_matrix(y_temp, y_test_pred)
sns.heatmap(confusion_mat_test, annot=True, fmt='d', cmap='Blues')
plt.title('Test Set Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
