In [54]:
''' Importing the libraries'''
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from sklearn.cluster import DBSCAN
from collections import Counter
import pickle

In [55]:
''' Reading the csv file '''
data = pd.read_csv("jewellery.csv") 

In [56]:
''' Printing the 1st 5 rows '''
data.head()

Unnamed: 0,Age,Income,SpendingScore,Savings
0,58,77769,0.791329,6559.829923
1,59,81799,0.791082,5417.661426
2,62,74751,0.702657,9258.992965
3,59,74373,0.76568,7346.334504
4,87,17760,0.348778,16869.50713


In [57]:
''' Printing the datatypes '''
data.dtypes

Age                int64
Income             int64
SpendingScore    float64
Savings          float64
dtype: object

In [58]:
''' Printing the shape of the dataset'''
data.shape

(505, 4)

In [59]:
''' Checking for null values '''
data.isnull().sum()


Age              0
Income           0
SpendingScore    0
Savings          0
dtype: int64

In [60]:
df= pd.DataFrame(data['Age'])
df['SpendingScore']=data['SpendingScore']
df.head()

Unnamed: 0,Age,SpendingScore
0,58,0.791329
1,59,0.791082
2,62,0.702657
3,59,0.76568
4,87,0.348778


In [61]:
'''Implementing DBSCAN'''

dbscan = DBSCAN(eps = 3, min_samples = 4)
db = dbscan.fit_predict(df)
data["cluster"] = db
labels = dbscan.labels_
print(labels)

[0 0 0 0 1 2 0 1 1 1 1 2 2 2 0 1 0 1 1 1 2 1 0 1 1 2 2 0 1 0 0 0 1 0 2 0 1
 0 2 2 0 1 2 0 0 2 1 2 2 0 1 2 2 1 0 1 2 1 1 1 1 1 1 0 0 0 1 2 2 0 0 2 1 1
 2 0 2 0 0 2 0 1 0 1 1 2 2 0 0 0 0 0 2 1 0 2 2 0 0 2 2 2 2 2 2 1 1 2 2 0 1
 2 0 2 2 2 0 2 2 1 2 1 1 1 2 1 0 1 0 0 2 0 2 0 2 1 1 2 0 2 1 0 0 2 0 2 0 1
 0 1 0 2 1 0 1 1 2 0 1 0 2 1 0 2 1 2 1 0 2 0 1 0 2 0 1 0 0 1 1 1 0 0 2 2 2
 1 2 0 0 0 2 1 0 0 0 1 2 2 1 1 1 2 0 2 0 2 0 1 2 0 1 1 1 1 0 1 2 2 2 1 0 0
 2 1 2 2 2 0 0 1 0 0 1 0 0 0 2 0 2 2 2 0 0 0 1 0 2 1 2 2 2 1 0 2 1 1 2 2 0
 0 1 0 0 1 0 1 2 2 0 2 1 0 1 1 2 0 1 1 0 0 1 2 2 1 2 1 1 2 1 1 1 2 0 2 2 0
 2 1 1 2 0 2 1 0 2 2 0 1 2 2 1 1 1 0 1 2 0 0 1 2 1 2 1 2 1 1 2 1 2 0 2 0 0
 2 2 1 1 2 2 2 1 2 1 1 0 0 0 1 1 2 2 1 2 1 0 2 2 1 0 1 2 0 1 1 0 1 2 0 1 2
 2 0 1 1 1 1 1 2 2 1 0 2 0 1 1 1 0 1 1 0 1 1 2 1 0 2 2 2 0 1 2 0 0 0 1 2 2
 2 0 2 0 0 2 2 1 2 2 2 1 0 1 2 1 2 0 0 1 1 2 1 2 0 0 2 0 1 0 2 0 1 0 1 2 2
 2 2 2 2 1 1 0 2 0 2 2 1 2 2 2 1 2 2 0 2 1 1 2 0 2 0 0 2 0 0 0 1 2 2 0 0 0
 1 2 1 1 1 1 2 1 1 1 0 0 

In [62]:
''' Finding the clusters '''

n_clusters = len(set(labels))
print(n_clusters)

3


In [63]:
print(Counter(dbscan.labels_))

Counter({2: 176, 1: 172, 0: 157})


In [64]:
''' Silhoutte score'''

db_score = silhouette_score(df, dbscan.labels_)
print('Silhouette Score: %.3f' % db_score)

Silhouette Score: 0.826


In [66]:
pickle.dump(dbscan,open("dbscan.pkl","wb"))