In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cartopy.crs as ccrs
import cartopy.feature as cf
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN, KMeans
from sklearn.metrics import silhouette_score

In [2]:
df = pd.read_csv('insurance_customers.csv')
df

Unnamed: 0,CustomerID,Age,AnnualIncome,ClaimAmount,City
0,1001,44,47565,6513,Kumasi
1,1002,38,51597,3155,Takoradi
2,1003,46,71209,6739,Kumasi
3,1004,55,69155,7711,Sunyani
4,1005,37,59686,5826,Sunyani
...,...,...,...,...,...
295,1296,33,52349,6297,Takoradi
296,1297,48,55951,4665,Takoradi
297,1298,43,45318,5293,Accra
298,1299,48,53335,7413,Tamale


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   CustomerID    300 non-null    int64 
 1   Age           300 non-null    int64 
 2   AnnualIncome  300 non-null    int64 
 3   ClaimAmount   300 non-null    int64 
 4   City          300 non-null    object
dtypes: int64(4), object(1)
memory usage: 11.8+ KB


In [4]:
df.describe()

Unnamed: 0,CustomerID,Age,AnnualIncome,ClaimAmount
count,300.0,300.0,300.0,300.0
mean,1150.5,39.413333,59677.14,5163.916667
std,86.746758,9.872218,14424.279149,1993.324103
min,1001.0,7.0,22925.0,-393.0
25%,1075.75,33.0,49433.75,3856.5
50%,1150.5,40.0,59718.0,5085.0
75%,1225.25,46.0,69244.5,6422.0
max,1300.0,78.0,106183.0,10264.0


In [5]:
df = pd.get_dummies(df, columns=['City'], drop_first=True)
df

Unnamed: 0,CustomerID,Age,AnnualIncome,ClaimAmount,City_Kumasi,City_Sunyani,City_Takoradi,City_Tamale
0,1001,44,47565,6513,True,False,False,False
1,1002,38,51597,3155,False,False,True,False
2,1003,46,71209,6739,True,False,False,False
3,1004,55,69155,7711,False,True,False,False
4,1005,37,59686,5826,False,True,False,False
...,...,...,...,...,...,...,...,...
295,1296,33,52349,6297,False,False,True,False
296,1297,48,55951,4665,False,False,True,False
297,1298,43,45318,5293,False,False,False,False
298,1299,48,53335,7413,False,False,False,True


In [6]:
scaler = StandardScaler()

In [7]:
scaled_data = scaler.fit_transform(df.drop(columns=['CustomerID']))
scaled_data

array([[ 0.46537974, -0.84110806,  0.67793162, ..., -0.47380354,
        -0.56708545, -0.46852129],
       [-0.1434019 , -0.56111231, -1.00950631, ..., -0.47380354,
         1.76340269, -0.46852129],
       [ 0.66830695,  0.80081145,  0.79149951, ..., -0.47380354,
        -0.56708545, -0.46852129],
       ...,
       [ 0.36391613, -0.99714736,  0.06486603, ..., -0.47380354,
        -0.56708545, -0.46852129],
       [ 0.87123416, -0.4404197 ,  1.13019313, ..., -0.47380354,
        -0.56708545,  2.13437475],
       [ 0.66830695,  0.41540064, -0.90347611, ...,  2.11057941,
        -0.56708545, -0.46852129]])

In [8]:
km_model = KMeans(n_clusters=3, n_init=10, random_state = 42)

In [None]:
km_model.fit(scaled_data)

In [10]:
dbs_model = DBSCAN(eps = 1.5, min_samples=2)
dbs_model.fit(scaled_data)
dbs_trans = dbs_model.fit_predict(scaled_data)

In [11]:
dbs_trans

array([ 0,  1,  0,  2,  2,  1,  0,  2,  3,  4,  4,  4,  0,  1,  2,  3,  3,
        0,  4,  0,  1,  1,  2,  2,  1,  1,  1,  0,  1,  3,  4,  1,  4,  0,
        4,  0,  3,  0,  0,  2,  1,  1,  3,  3,  1,  2, -1,  2,  3,  4,  3,
        3,  0,  0,  0,  0,  2,  1,  4,  3,  2,  4,  2,  2,  3,  1,  0,  1,
        3,  3,  3,  4,  1,  1, -1,  2,  2,  3,  1,  3,  4,  2,  3,  3,  4,
        0,  1,  0,  2,  3,  4,  4,  1,  0,  1,  3,  1,  4,  4,  2,  2,  3,
        0,  1,  3,  2,  1,  2,  1,  0,  3,  1,  4,  4,  1,  1,  1,  4,  4,
        1,  2,  2,  0,  3,  2,  4,  2,  0,  0,  3,  2,  4,  4,  1,  4,  3,
        3,  4,  4,  1,  4,  1,  1,  4,  0,  0,  1,  4,  0,  4,  4,  4,  2,
        4,  2,  1,  0,  0,  0,  0,  0,  0,  0,  1,  4,  3,  2,  3,  3,  4,
        4,  4,  0,  1,  0,  1,  1,  3, -1, -1,  1,  0,  0,  3,  4,  1,  0,
        3,  0,  1,  1,  2,  4,  2,  3,  0,  3,  1,  2,  2,  2,  3,  1,  0,
        0,  3,  2,  4,  3, -1,  1,  0,  3,  3,  2,  2,  4,  1,  4,  2,  0,
        2,  1,  2,  1,  1