# PRML Assignment 8

@uthor :- Darshil Patel(202011034)

In [None]:
# import useful libraries
import numpy as np
from scipy.stats import multivariate_normal
from sklearn.datasets import load_iris

# load dataset
X, Y = load_iris(return_X_y=True)

## Gaussian Mixture Model

In [None]:
# custom Gaussian mixturwe model
class GMM:

    def __init__(self, n_clusters, max_iter=5):
        self.n_clusters = n_clusters
        self.max_iter = int(max_iter)

    # initialize parameters(variables)
    def initialize(self, X):
        self.shape = X.shape
        self.n, self.m = self.shape

        # class probability
        self.phi = np.full(shape=self.n_clusters, fill_value=1/self.n_clusters)
        # weights
        self.weights = np.full( shape=self.shape, fill_value=1/self.n_clusters)
        
        # Initialize with means taken as a random drow from the data
        random_row = np.random.randint(low=0, high=self.n, size=self.n_clusters)
        self.mu = [  X[row_index,:] for row_index in random_row ]
        self.sigma = [ np.cov(X.T) for _ in range(self.n_clusters) ]

    def e_step(self, X):
        # update weights and phi
        self.weights = self.predict_proba(X)
        self.phi = self.weights.mean(axis=0)
    
    def m_step(self, X):
        # update mu and sigma
        for i in range(self.n_clusters):
            weight = self.weights[:, [i]]
            total_weight = weight.sum()
            self.mu[i] = (X * weight).sum(axis=0) / total_weight
            self.sigma[i] = np.cov(X.T, aweights=(weight/total_weight).flatten(), bias=True)

    def fit(self, X):
        # initialize parameters
        self.initialize(X)
        
        # train model
        for iteration in range(self.max_iter):
            self.e_step(X)
            self.m_step(X)
            
    # predict probability for all cluster
    def predict_proba(self, X):/
        likelihood = np.zeros( (self.n, self.n_clusters) )
        for i in range(self.n_clusters):
            distribution = multivariate_normal( mean=self.mu[i], cov=self.sigma[i])
            likelihood[:,i] = distribution.pdf(X)
        
        numerator = likelihood * self.phi
        denominator = numerator.sum(axis=1)[:, np.newaxis]
        weights = numerator / denominator
        return weights
    
    # prediction
    def predict(self, X):
        weights = self.predict_proba(X)
        return np.argmax(weights, axis=1)

In [None]:
# create model
gmm = GMM(n_clusters=3, max_iter=25)
# train model
gmm.fit(X)

In [None]:
# adjusted rand score
from sklearn.metrics.cluster import adjusted_rand_score

score = adjusted_rand_score(Y, gmm.predict(X))
print("Adjusted Rand Score:",score)

Adjusted Rand Score: 0.9038742317748124


## logistic regression

In [None]:
# import library
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split


# train test split
X_train,X_test, Y_train, Y_test = train_test_split(X, Y, stratify=y, test_size=0.2)

# create classifier
classifier = LogisticRegression()
# fit data
classifier = classifier.fit(X_train, y_train)
# prediction on test data
prediction = classifier.predict(X_test)
# accuracy
accuracy = classifier.score(X_test, y_test)

# show details
print("Predictions: ", prediction)
print("Accuracy: ", accuracy)

Predictions:  [0 2 1 1 0 1 0 0 2 1 2 2 2 1 0 0 0 1 1 2 0 2 1 2 2 2 1 0 2 0]
Accuracy:  0.9666666666666667
