In [1]:
from sklearn.cluster import MeanShift
from sklearn.preprocessing import StandardScaler
import pandas as pd

wine_data = pd.read_csv('winequality/winequality-red.csv', delimiter=';')

X = wine_data.drop('quality', axis=1)  # Independent variables
y = wine_data['quality']  # Target variable

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

ms_model = MeanShift()
ms_model.fit(X_scaled)
labels = ms_model.labels_
cluster_centers = ms_model.cluster_centers_

n_clusters = len(cluster_centers)

print("Number of clusters:", n_clusters)
print("Cluster centers:")
for i, center in enumerate(cluster_centers):
    print("Cluster", i+1, ":", center)

wine_data['cluster'] = labels

wine_data

Number of clusters: 10
Cluster centers:
Cluster 1 : [-0.1426303   0.00749361 -0.16863429 -0.1764756  -0.15012427 -0.08437319
 -0.10030509 -0.08159465  0.10457206 -0.15723648 -0.10482356]
Cluster 2 : [-3.38774318e-04  9.33125757e-02  1.23962759e+00 -4.32947609e-01
  6.26600472e+00 -8.36694460e-02  2.24702516e-01  9.94340353e-02
 -1.56841019e+00  3.02902161e+00 -9.87065321e-01]
Cluster 3 : [ 0.99414607 -0.04368911  0.84227282  8.55715131  2.59366519  3.40685358
  1.27815602  3.34349643 -0.91431164  0.60105502 -1.42958235]
Cluster 4 : [-0.05437199 -0.46267485  0.08485351 -0.38227061  0.17605432  0.75309977
  2.63135506  0.14751385 -2.45314019  7.18101309 -0.60824392]
Cluster 5 : [-0.24109439 -1.27271396  2.10035914  4.08744035 -0.79630346  2.06802283
  7.20790462 -1.90098908 -1.95099614 -0.87427191  1.76190411]
Cluster 6 : [-1.44760832 -1.16098443  1.43280314  7.7057778  -0.49875135 -0.13148483
  1.70388156 -0.26589683  0.21956203 -0.81525883  0.54162988]
Cluster 7 : [ 4.35514917 -0.93752

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,cluster
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,0
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5,0
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5,0
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6,0
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5,0
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6,0
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6,0
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5,0


In [3]:
wine_data = pd.read_csv('winequality/winequality-red.csv', delimiter=';')

X = wine_data.drop('quality', axis=1)  # Independent variables
y = wine_data['quality']  # Target variable

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

bandwidth = 0.5  # Set h value
ms_model = MeanShift(bandwidth=bandwidth)
ms_model.fit(X_scaled)

labels = ms_model.labels_
cluster_centers = ms_model.cluster_centers_

n_clusters = len(cluster_centers)

print("Number of clusters:", n_clusters)
print("Cluster centers:")
for i, center in enumerate(cluster_centers):
    print("Cluster", i+1, ":", center)

wine_data['cluster'] = labels

wine_data

Number of clusters: 1280
Cluster centers:
Cluster 1 : [-0.65371171  0.52765508 -1.18140221 -0.04043125 -0.20699571  0.09020468
  1.20766251  0.37421516  1.3534357  -0.57920652 -0.72984468]
Cluster 2 : [-0.50537839 -0.84814174  0.15931168 -0.53835576 -0.2352052   1.216909
  0.63044502  0.14698384  0.84805201 -0.3313516  -0.86637886]
Cluster 3 : [-0.81562483  0.63227455 -1.30931154 -0.33970194 -0.45624391 -0.71483251
 -0.70754953  0.2190657   1.01651324  0.04633209 -0.94147266]
Cluster 4 : [ 1.22395825 -1.57997017  0.97064898 -0.50642925 -0.23839326 -0.94434636
 -0.75164254  0.46552207 -0.68753691  2.26817444  0.42429582]
Cluster 5 : [ 0.44834214 -1.30064634  0.41863151 -0.36453367 -0.48812449 -0.37056175
 -0.63000667 -0.83168645 -0.31497841  1.0879129   1.19870062]
Cluster 6 : [-0.47090657 -0.09955388 -1.28877135 -0.59511399 -0.07367691 -0.27493098
 -0.47035959 -0.72435868  0.3167512  -0.69723268  0.07229363]
Cluster 7 : [-0.49963309  0.48702616 -0.03068503 -0.34679672 -0.09493063  0.10

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,cluster
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,26
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5,798
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5,802
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6,346
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5,1207
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6,1239
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6,247
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5,1238


In [4]:
wine_data = pd.read_csv('winequality/winequality-red.csv', delimiter=';')

X = wine_data.drop('quality', axis=1)  # Independent variables
y = wine_data['quality']  # Target variable

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

bandwidth = 0.1  # Set h value
ms_model = MeanShift(bandwidth=bandwidth)
ms_model.fit(X_scaled)

labels = ms_model.labels_
cluster_centers = ms_model.cluster_centers_

n_clusters = len(cluster_centers)

print("Number of clusters:", n_clusters)
print("Cluster centers:")
for i, center in enumerate(cluster_centers):
    print("Cluster", i+1, ":", center)

wine_data['cluster'] = labels

wine_data

Number of clusters: 1353
Cluster centers:
Cluster 1 : [-0.47090657 -0.09955388 -1.28877135 -0.59511399 -0.07367691 -0.27493098
 -0.47035959 -0.72435868  0.3167512  -0.69723268  0.07229363]
Cluster 2 : [-0.6432657   0.93394429 -0.72391627 -0.38227061 -0.24370669 -0.37056175
 -0.80485823 -0.68195758 -0.13679827 -0.69723268 -0.30317536]
Cluster 3 : [-0.6432657  -0.93752536  0.97064898 -0.31132282 -0.28621413  0.77700746
 -0.07504302 -0.74555923  0.57592232  1.13217271  0.54162988]
Cluster 4 : [-0.93053092 -0.37887771 -0.15906119 -0.59511399 -0.22245296  0.20322286
 -0.37913269 -1.03176662  0.51112954 -0.34315421  0.16616088]
Cluster 5 : [ 4.14448801  0.65462046  1.12470036  1.17858083  0.16011403 -0.56182328
 -0.71363133  3.39384773 -2.53413117  0.48302886  0.63549713]
Cluster 6 : [ 1.82721521 -1.94309115  1.22740129  1.03668525  0.35139753 -1.13560789
 -0.71363133  1.51229911 -0.20159105  1.84032963 -0.30317536]
Cluster 7 : [ 0.9079665   0.06804042  0.91929852 -0.16942723 -0.3499753   0.

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,cluster
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,150
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5,814
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5,818
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6,310
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,150
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5,1274
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6,1308
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6,214
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5,1307


In [5]:
wine_data = pd.read_csv('winequality/winequality-red.csv', delimiter=';')

X = wine_data.drop('quality', axis=1)  # Independent variables
y = wine_data['quality']  # Target variable

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

bandwidth = 1  # Set h value
ms_model = MeanShift(bandwidth=bandwidth)
ms_model.fit(X_scaled)

labels = ms_model.labels_
cluster_centers = ms_model.cluster_centers_

n_clusters = len(cluster_centers)

print("Number of clusters:", n_clusters)
print("Cluster centers:")
for i, center in enumerate(cluster_centers):
    print("Cluster", i+1, ":", center)

wine_data['cluster'] = labels

wine_data

Number of clusters: 836
Cluster centers:
Cluster 1 : [-0.55630974  0.3617079  -1.14027137 -0.35734301 -0.15007543 -0.37056175
 -0.58788614 -0.49630954  0.2046772  -0.53135808 -0.26512107]
Cluster 2 : [-0.64722798  0.75575495 -1.29408347 -0.36759176 -0.21292543 -0.28482382
 -0.40325015  0.25616666  0.86637272 -0.37774809 -0.93111489]
Cluster 3 : [-0.76421948  0.28855923 -0.71851096 -0.4569525  -0.21574126 -0.20446621
 -0.41914449  0.19144919  1.39435745  0.06683137 -0.4810291 ]
Cluster 4 : [-0.3864168   0.40322901 -0.1620818  -0.34471002 -0.15994202  0.30447896
  1.53842101 -0.29411227 -0.18253435 -0.72500354 -0.68416596]
Cluster 5 : [-0.46731575 -0.27413127 -0.36767244 -0.40000756 -0.27691563 -0.8248079
 -0.56538761 -0.17678828  0.06567917 -0.5533883  -0.87224556]
Cluster 6 : [-0.54368042 -0.76620675  0.14561822 -0.48159752 -0.19128084  1.42729669
  0.80478977  0.13143677  0.77030067 -0.53593027 -0.99153519]
Cluster 7 : [-0.66378465  0.46108895 -1.20074199 -0.05287015 -0.19664487  0.21

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,cluster
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,1
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5,601
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5,30
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6,85
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5,263
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6,801
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6,271
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5,800
