## USING SUPPORT VECTOR MACHINE

In [57]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.svm import LinearSVC
from sklearn.svm import SVC

In [58]:
df = pd.read_csv('new_data3.csv')

In [59]:
X_demograph = df.loc[:, ['Motive','Income Range','Gender','Education level','Number of Children','Age range','Occupation']]
y_demograph = df['Insured']

In [60]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()

X_dem_resampled, y_dem_resampled = smote.fit_resample(X_demograph, y_demograph)

In [61]:
X_train_dem, X_test_dem, y_train_dem, y_test_dem = train_test_split(X_dem_resampled,y_dem_resampled, test_size = 0.3)

**FITTING THE DEMOGRAPHIC MODEL**

In [62]:
clfd = svm.SVC()
clfd.fit(X_train_dem, y_train_dem)


# predict on the train set
y_train_pred_dem = clfd.predict(X_train_dem)
# predict on the test set
y_test_pred_dem = clfd.predict(X_test_dem)


In [63]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(y_test_dem, y_test_pred_dem)
print("Accuracy:", accuracy)

# Calculate precision
precision = precision_score(y_test_dem, y_test_pred_dem)
print("Precision:", precision)

# Calculate recall
recall = recall_score(y_test_dem, y_test_pred_dem)
print("Recall:", recall)

# Calculate F1-score
f1 = f1_score(y_test_dem, y_test_pred_dem)
print("F1-score:", f1)


Accuracy: 0.75
Precision: 0.6521739130434783
Recall: 0.8823529411764706
F1-score: 0.75


In [64]:
from sklearn.model_selection import cross_val_score

scored1 = cross_val_score(clfd, X_test_dem,y_test_dem,cv =5)
print("%0.2f accuracy with a standard deviation of %0.2f" % (scored1.mean(), scored1.std()))


0.68 accuracy with a standard deviation of 0.17


In [65]:
# Instantiate an SVM classifier
svm = SVC(kernel='linear')

# Fit the classifier to the training data
svm.fit(X_train_dem, y_train_dem)


# Get the absolute values of the coefficients
coefficients = np.abs(svm.coef_)

# Calculate the feature importances
importances = np.sum(coefficients, axis=0)

# Sort feature importances in descending order
indices = np.argsort(importances)[::-1]

# Print feature importance rankings
print("Feature importance rankings:")
for i, index in enumerate(indices):
    print(f"{i+1}. Feature: {X_train_dem.columns[index]} - Importance: {importances[index]}")

Feature importance rankings:
1. Feature: Education level - Importance: 1.4939770129359085
2. Feature: Occupation - Importance: 0.9602044227100883
3. Feature: Number of Children - Importance: 0.9601393391494216
4. Feature: Motive - Importance: 0.45438870068518655
5. Feature: Income Range - Importance: 0.2238017432621522
6. Feature: Gender - Importance: 0.08657862450600642
7. Feature: Age range - Importance: 0.08006575894497645
