In [1]:
import numpy as np
import pandas as pd
import sklearn
import warnings
import pickle

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize
from sklearn.mixture import GaussianMixture

pd.pandas.set_option('display.max_columns',None)
warnings.filterwarnings('ignore')

In [2]:
dataset = pd.read_csv('new_dataset.csv')
data=dataset[['Income','Relation','TotalAmtSpent']]

In [3]:
data.astype('int64')

Unnamed: 0,Income,Relation,TotalAmtSpent
0,58138,110,1617
1,46344,91,27
2,71613,98,776
3,26646,92,53
4,58293,93,422
...,...,...,...
2192,61223,100,1341
2193,64014,88,444
2194,56981,93,1241
2195,69245,93,843


In [4]:
sc = StandardScaler()

In [5]:
scaled_data = sc.fit_transform(data)

In [6]:
norm_data = normalize(scaled_data,norm='l2')

In [7]:
gmm=GaussianMixture(n_components=4, covariance_type='spherical',max_iter=2000, random_state=5).fit(norm_data)

In [8]:
test_data = {'Income':[10000,110000],'Relation':[89,107],'TotalAmtSpent':[240,2420]}
final_features = pd.DataFrame(data=test_data,dtype= np.int64)
scaled_features = sc.fit_transform(final_features)
norm_features = normalize(scaled_features,norm='l2')
prediction = gmm.predict(norm_features)

In [9]:
final_features

Unnamed: 0,Income,Relation,TotalAmtSpent
0,10000,89,240
1,110000,107,2420


In [10]:
prediction

array([0, 3], dtype=int64)

In [11]:
with open('cpr.pkl', 'wb') as file:
    pickle.dump(gmm, file)

In [12]:
# model = pickle.load(open('model.pkl','rb'))

with open(f'cpr.pkl', 'rb') as f:
    model = pickle.load(f)


In [2]:
import random

Income_list=[]
Relation_list=[]
TotalAmtSpent_list=[]
for i in range(1,16):
    income = random.randint(8000,110000)
    relation = random.randint(88,111)
    total = random.randint(30,2500)
    Income_list.append(income)
    Relation_list.append(relation)
    TotalAmtSpent_list.append(total)

test_data = {'Income':Income_list,'Relation':Relation_list,'TotalAmtSpent':TotalAmtSpent_list}
df= pd.DataFrame(data=test_data)

In [3]:
df.to_csv('test_dataset2.csv',index=False)

In [14]:
final_features = pd.DataFrame(data=df,dtype= np.int64)
scaled_features = sc.fit_transform(final_features)
norm_features = normalize(scaled_features,norm='l2')
prediction = model.predict(norm_features)

In [15]:
df['Cluster'] = prediction

In [16]:
pd.options.display.float_format = "{:.0f}".format
summary=df[['Income','Relation','TotalAmtSpent','Cluster']]
summary = summary.replace({0:'Platinum',1:'Diamond',2:'Gold',3:'Ace'})
summary.set_index("Cluster", inplace = True)
summary=summary.groupby(['Cluster'])
summary.head()

Unnamed: 0_level_0,Income,Relation,TotalAmtSpent
Cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Gold,13763,111,1229
Ace,33292,98,2449
Gold,34994,111,233
Gold,20449,106,282
Platinum,20286,92,149
Diamond,57148,88,1286
Diamond,104886,90,1091
Platinum,57612,92,638
Gold,8976,108,852
Diamond,40320,94,1369
