# Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal

In [None]:
#Load Data
data=pd.read_csv("/content/Clustering_Data.csv")

In [None]:
data.head()

# Standardize the dataset using StandardScaler to normalize the features.

In [None]:
sc=StandardScaler()
scaled_data=sc.fit_transform(data)

In [None]:
scaled_data

In [None]:
n_clusters=4
random_state=18
num_iterations=100

In [None]:
scaled_data.shape

In [None]:
new_data=scaled_data.copy()

##This function implements the Expectation-Maximization (EM) algorithm for GMM clustering.


In [None]:
def gmm(X, n_clusters, num_iterations):
    n_samples, n_features = X.shape
    means = X[np.random.choice(n_samples, n_clusters, replace=False)]
    cov = [np.eye(n_features) for _ in range(n_clusters)]
    w = np.ones(n_clusters) / n_clusters
    for _ in range(num_iterations):
        res = []
        for j in range(n_clusters):
            numer = w[j] * multivariate_normal.pdf(X, means[j], cov[j])
            res.append(numer)
        res = np.array(res).T
        res /= res.sum(axis=1, keepdims=True)
        total_res = np.sum(res, axis=0)
        w = total_res / n_samples
        means = (res.T @ X) / total_res[:, np.newaxis]
        for k in range(n_clusters):
            diff = X - means[k]
            cov[k] = ((res[:, k][:, np.newaxis] * diff).T @ diff) / total_res[k]
    return means, cov, w

##Run the GMM algorithm on the standardized dataset to estimate means, covariances, and weights.


In [None]:
e_mean,e_cov,e_w=gmm(new_data,n_clusters,num_iterations)
print("Estimated Mean",e_mean)
print("Estimated covriance",e_cov)
print("Estimated Weight",e_w)

##Assign Clusters
###Using the estimated parameters from GMM, assign each point to the cluster with the highest probability.


In [None]:
def assign_clusters(X,means,cov,w):
    n_samples=X.shape[0]
    n_clusters=len(means)
    res=np.zeros((n_samples, n_clusters))
    for k in range(n_clusters):
        res[:, k] = multivariate_normal.pdf(X, mean=means[k], cov=cov[k]) * w[k]
    cluster_assignments = np.argmax(res,axis=1)
    return cluster_assignments

predictes=assign_clusters(new_data,e_mean,e_cov,e_w)

In [None]:
#Visualize Clusters.
plt.scatter(new_data[:, 0], new_data[:, 1], c=predictes, cmap='viridis')
plt.scatter(e_mean[:, 0], e_mean[:, 1], marker='x', s=100, color='red', label='Cluster Centers')
plt.legend()
plt.title("GMM Clustering")
plt.show()