In [258]:
import pandas as pd

In [259]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [260]:
df = pd.read_csv("synthetic_insurance_data.csv")

In [261]:
df.head()

Unnamed: 0,Age,Is_Senior,Marital_Status,Married_Premium_Discount,Prior_Insurance,Prior_Insurance_Premium_Adjustment,Claims_Frequency,Claims_Severity,Claims_Adjustment,Policy_Type,...,Time_Since_First_Contact,Conversion_Status,Website_Visits,Inquiries,Quotes_Requested,Time_to_Conversion,Credit_Score,Premium_Adjustment_Credit,Region,Premium_Adjustment_Region
0,47,0,Married,86,1-5 years,50,0,Low,0,Full Coverage,...,10,0,5,1,2,99,704,-50,Suburban,50
1,37,0,Married,86,1-5 years,50,0,Low,0,Full Coverage,...,22,0,5,1,2,99,726,-50,Urban,100
2,49,0,Married,86,1-5 years,50,1,Low,50,Full Coverage,...,28,0,4,4,1,99,772,-50,Urban,100
3,62,1,Married,86,>5 years,0,1,Low,50,Full Coverage,...,4,1,6,2,2,2,809,-50,Urban,100
4,36,0,Single,0,>5 years,0,2,Low,100,Full Coverage,...,14,1,8,4,2,10,662,50,Suburban,50


In [262]:
df.isnull().sum().sum()

0

In [263]:
df.info(  )

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 27 columns):
 #   Column                              Non-Null Count  Dtype 
---  ------                              --------------  ----- 
 0   Age                                 10000 non-null  int64 
 1   Is_Senior                           10000 non-null  int64 
 2   Marital_Status                      10000 non-null  object
 3   Married_Premium_Discount            10000 non-null  int64 
 4   Prior_Insurance                     10000 non-null  object
 5   Prior_Insurance_Premium_Adjustment  10000 non-null  int64 
 6   Claims_Frequency                    10000 non-null  int64 
 7   Claims_Severity                     10000 non-null  object
 8   Claims_Adjustment                   10000 non-null  int64 
 9   Policy_Type                         10000 non-null  object
 10  Policy_Adjustment                   10000 non-null  int64 
 11  Premium_Amount                      10000 non-null  int

In [264]:
features = ["Age", "Claims_Frequency", "Premium_Amount", "Website_Visits", "Inquiries",
            "Quotes_Requested", "Credit_Score", "Marital_Status", "Prior_Insurance",
            "Claims_Severity", "Policy_Type", "Region"]
target = "Conversion_Status"

In [265]:
A = df.dropna(subset=features + [target]).copy()

In [266]:
encoder = LabelEncoder()
df['Marital_Status'] = encoder.fit_transform(df['Marital_Status'])
df['Prior_Insurance'] = encoder.fit_transform(df['Prior_Insurance'])
df['Claims_Severity'] = encoder.fit_transform(df['Claims_Severity'])
df['Policy_Type'] = encoder.fit_transform(df['Policy_Type'])
df['Source_of_Lead'] = encoder.fit_transform(df['Source_of_Lead'])
df['Region'] = encoder.fit_transform(df['Region'])

In [267]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 27 columns):
 #   Column                              Non-Null Count  Dtype
---  ------                              --------------  -----
 0   Age                                 10000 non-null  int64
 1   Is_Senior                           10000 non-null  int64
 2   Marital_Status                      10000 non-null  int64
 3   Married_Premium_Discount            10000 non-null  int64
 4   Prior_Insurance                     10000 non-null  int64
 5   Prior_Insurance_Premium_Adjustment  10000 non-null  int64
 6   Claims_Frequency                    10000 non-null  int64
 7   Claims_Severity                     10000 non-null  int64
 8   Claims_Adjustment                   10000 non-null  int64
 9   Policy_Type                         10000 non-null  int64
 10  Policy_Adjustment                   10000 non-null  int64
 11  Premium_Amount                      10000 non-null  int64
 12  Safe_

In [268]:
df.isnull().sum().sum()

0

In [269]:
X = df[features]
Y = df[target]

In [270]:
X_train , X_test , Y_train , Y_test = train_test_split(X , Y , test_size=0.2 , random_state=42)

In [271]:
model = KNeighborsClassifier(n_neighbors=3)

In [272]:
model.fit(X_train, Y_train)

In [273]:
Y_pred = model.predict(X_test)

In [274]:
accuracy = accuracy_score(Y_test, Y_pred)

In [275]:
conf_matrix = confusion_matrix(Y_test, Y_pred)

In [276]:
class_report = classification_report(Y_test, Y_pred)

In [277]:
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Accuracy: 0.513
Confusion Matrix:
 [[337 523]
 [451 689]]
Classification Report:
               precision    recall  f1-score   support

           0       0.43      0.39      0.41       860
           1       0.57      0.60      0.59      1140

    accuracy                           0.51      2000
   macro avg       0.50      0.50      0.50      2000
weighted avg       0.51      0.51      0.51      2000

