In [None]:
import numpy as np  
import matplotlib.pyplot as plt  
import pandas as pd  
from sklearn.preprocessing import Binarizer, LabelEncoder, OneHotEncoder, MinMaxScaler
from sklearn.cluster import KMeans  
import seaborn as sns
from sklearn.mixture import GaussianMixture as GMM

## Gaussian Mixture Modeling
Overcomes the limitations of k-means clustering by making soft assignments of the data points. A Gaussian mixture will almost always fit better than k-means: the clustering will mimic the data cloud better and with a smaller k. k-means is useful primarily because it’s very fast, so might be more easily fit to very large data sets with largish dimensions. 

In [None]:
data = pd.read_csv('Wine2.csv')

In [None]:
## scale the data

mms = MinMaxScaler()
data.loc[:, data.columns != 'class'] = mms.fit_transform(data.loc[:, data.columns != 'class'])

In [None]:
kmeans = KMeans(n_clusters= 3)
kmeans.fit(data.drop('class',axis=1))

In [None]:
# Create a colormap
plt.scatter(data['Alcohol'], data['Malic_acid'], c= data['class'], s=40)
plt.title('Actual Classification')
plt.show()

In [None]:
gmm = GMM(n_components=3).fit(data.drop('class',axis=1))
labels = gmm.predict(data.drop('class',axis=1))
plt.scatter(data['Alcohol'], data['Malic_acid'], c=labels, s=40)
plt.title('GMM Classification')
plt.show()

In [None]:
pd.crosstab(data['class'], labels, rownames=['Actual Result'], colnames=['Predicted Result'])