In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
data_path = './hwk2_datasets/'
submit_path = './hwk2_submit/'

# Linear Classification and Nearest Neighbor Classification

## 1. Dataset Creation
You will use a synthetic data set for the classification task that you’ll generate yourself.
Generate two classes with 20 features each. Each class is given by a multivariate Gaussian
distribution, with both classes sharing the same covariance matrix. You are provided
with the mean vectors (DS1-m0 for mean vector of negative class and DS1-m1 for mean
vector of positive class) and the covariance matrix (DS1-cov).

Generate 2000 examples for each class, and label the data to be positive if they came
from the Gaussian with mean m1 and negative if they came from the Gaussian with
mean m0. Randomly pick (without replacement) 20% of each class (i.e., 400 data points
per class) as test set, 20% of each class (i.e., 400 data points per class) as validation set
set and train the classifiers on the remaining 60% data. When you report performance results, it should be on the test set. Call this dataset as DS1, and submit it with your
code. Follow the instructions from Assignment 1 for data submission format.

In [2]:
m0 = pd.read_csv(data_path + 'DS1_m_0.txt', sep=",", header=None).to_numpy()[0]
m1 = pd.read_csv(data_path + 'DS1_m_1.txt', sep=",", header=None).to_numpy()[0]
cov = pd.read_csv(data_path + 'DS1_Cov.txt', sep=",", header=None).to_numpy()

In [3]:
# synthetic data from multivariate Gaussian distribution with mean m0
data_0 = pd.DataFrame(data=np.random.multivariate_normal(m0, cov, 2000))
data_0.insert(0, 'class', -1)
# synthetic data from multivariate Gaussian distribution with mean m1
data_1 = pd.DataFrame(data=np.random.multivariate_normal(m1, cov, 2000))
data_1.insert(0, 'class', 1)
# Create Dataset
data_train = pd.concat([data_0.iloc[:400], data_1.iloc[:400]], ignore_index=True)
data_valid = pd.concat([data_0.iloc[400:800], data_1.iloc[400:800]], ignore_index=True)
data_test = pd.concat([data_0.iloc[800:], data_1.iloc[800:]], ignore_index=True)

In [4]:
# Write data into file for submission
data_train.to_csv(submit_path + 'Assignment2_1948612_1_1_DS1_train.csv')
data_test.to_csv( submit_path + 'Assignment2_1948612_1_1_DS1_test.csv' )
data_valid.to_csv(submit_path + 'Assignment2_1948612_1_1_DS1_valid.csv')

## 2. GDA Model 

We first consider the GDA model as seen in class: given the class variable, the data are
assumed to be Gaussians with different means for different classes but with the same
covariance matrix. This model can formally be specified as follows:
Y ∼ Bernoulli(π), X | Y = j ∼ N(μj, Σ).

Estimate the parameters of the GDA model using the maximum likelihood approach.

(a) For DS1, report the best fit accuracy achieved by the classifier.

(b) Report the coefficients learnt.

In [5]:
def find_GDA_parameters(data_train: pd.DataFrame):
    """
    Find the GDA parameters
    """
    # P(y)
    phi = data_train['class'].value_counts()[1] / len(data_train['class'])

    # μ0 
    data_train_0 = data_train.drop(['class'], axis=1).loc[data_train['class'] == -1]
    mu_0 = data_train_0.mean()

    # μ1
    data_train_1 = data_train.drop(['class'], axis=1).loc[data_train['class'] == 1]
    mu_1 = data_train_1.mean()

    # Covariance 
    n = len(mu_0)
    cov_train = np.zeros((n,n))
    for index, row in data_train.iterrows():
        if row['class'] == -1:
            row = row.drop(['class']) - mu_0
        else:
            row = row.drop(['class']) - mu_1
        x_std = np.asmatrix(row.to_numpy())
        cov_train += np.matmul(np.transpose(x_std), x_std)
    cov_train = cov_train / len(data_train)
    
    return phi, mu_0, mu_1, cov_train

def eval_px_knowing_y(x, mu, sigma):
    # Gaussien
    n = len(mu)
    det = np.linalg.det(sigma)
    denum = np.power((2*np.pi),(n/2)) * np.sqrt(det)
    X = np.asmatrix(x-mu)
    S = np.linalg.inv(np.asmatrix(sigma))
    num = np.exp(-0.5 * np.matmul(np.matmul(X, S),np.transpose(X)).item(0) )
    return (num/denum)
    
def eval_py(y, phi):
    return phi if y==1 else (1-phi)

In [6]:
# Find the parameters
start_time = time.time()
phi, mu_0, mu_1, sigma = find_GDA_parameters(data_train)
print(f"--- {(time.time() - start_time)}s seconds ---")

--- 0.567986249923706s seconds ---


In [7]:
# TEST: Try a prediction a single variable
x = data_valid.iloc[0].drop(['class']).to_numpy()
real_class = data_valid.iloc[0]['class']
px_0 = eval_px_knowing_y(x, mu_0, sigma) * eval_py(0, phi)
px_1 = eval_px_knowing_y(x, mu_1, sigma) * eval_py(1, phi)
guess_class = -1 if px_0 > px_1 else 1
print(f"Good prediction: {True if guess_class == real_class else False}")

Good prediction: True


In [8]:
# Evaluation of the accuracy
start_time = time.time()
good_prediction = 0
for index, row in data_test.iterrows():
    real_class = row['class']
    x = row.drop(['class'])
    px_0 = eval_px_knowing_y(x, mu_0, sigma) * eval_py(0, phi)
    px_1 = eval_px_knowing_y(x, mu_1, sigma) * eval_py(1, phi)
    guess_class = -1 if px_0 > px_1 else 1
    good_prediction += 1 if guess_class == real_class else 0
accuracy = good_prediction / len(data_test)
print(f"a) Accuracy: {accuracy * 100}% --- {(time.time() - start_time)}s seconds ---")

a) Accuracy: 94.91666666666667% --- 2.5012707710266113s seconds ---


In [9]:
# Report the data
np.savetxt(submit_path + 'Assignment2_1948612_1_2_b_phi.txt', np.array([phi]), delimiter=',')
np.savetxt(submit_path + 'Assignment2_1948612_1_2_b_mu0.txt', mu_0, delimiter=',')
np.savetxt(submit_path + 'Assignment2_1948612_1_2_b_mu1.txt', mu_1, delimiter=',')
np.savetxt(submit_path + 'Assignment2_1948612_1_2_b_cov.txt', sigma, delimiter=',')

## 3. K_NN Model
For DS1, use k-NN to learn a classifier. Repeat the experiment for different values of k
and report the performance for each value. We will compare this non-linear classifier to
the linear approach, and find out how powerful linear classifiers can be.

(a) Does this classifier performs better than GDA or worse? Are there particular values
of k which perform better? Why does this happen ? Use validation accuracy for
model selection.

(b) Report the best fit accuracy achieved by this classifier.

In [10]:
import heapq

def most_common(lst):
    return max(set(lst), key=lst.count)

class KNN_Classifier:
    
    def __init__(self, X, y, k=1):
        self.x = X
        self.y = y
        self.k = k

    def eval(self, x):
        h = []
        for index in range(len(self.x)):
            c = self.y[index]
            dist = np.linalg.norm(x - self.x[index])
            heapq.heappush(h, (dist, c))
        k_nn = []
        for _ in range(self.k):
            k_nn.append(heapq.heappop(h)[1])
        return most_common(k_nn)

In [11]:
k_lst = [1,3,5,9,13]
X = [row.drop(['class']).tolist() for index, row in data_train.iterrows()]
y = data_train['class'].tolist()
x_test = [np.array(row.drop(['class']).tolist()) for index, row in data_test.iterrows()]
y_test = data_test['class'].tolist()

for k in k_lst:
    k_nn = KNN_Classifier(X, y, k)
    start_time = time.time()
    r = len(y_test)
    good_pred = 0
    for i in range(r):
        pred = k_nn.eval(x_test[i])
        real = y_test[i]
        good_pred += 1 if pred == real else 0 
    print(f"Accuracy with k={k}: {(good_pred / r) * 100}% --- {(time.time() - start_time)}s seconds ---")

Accuracy with k=1: 52.0% --- 16.897339344024658s seconds ---
Accuracy with k=3: 54.083333333333336% --- 17.181105136871338s seconds ---
Accuracy with k=5: 52.87500000000001% --- 16.308059215545654s seconds ---
Accuracy with k=9: 54.37499999999999% --- 16.138160705566406s seconds ---
Accuracy with k=13: 55.041666666666664% --- 16.134053230285645s seconds ---


## 4. Class Generated By 3 Gaussians 
Now instead of having a single multivariate Gaussian distribution per class, each class
is going to be generated by a mixture of 3 Gaussians. For each class, we’ll define
3 Gaussians, with the first Gaussian of the first class sharing the covariance matrix
with the first Gaussian of the second class and so on. For both the classes, fix the
mixture probability as (0.1,0.42,0.48) i.e. the sample has arisen from first Gaussian with
probability 0.1, second with probability 0.42 and so on. Mean for three Gaussians in the
positive class are given as DS2-c1-m1, DS2-c1-m2, DS2-c1-m3. Mean for three Gaussians
in the negative class are gives as DS2-c2-m1, DS2-c2-m2, DS2-c2-m3. Corresponding 3
covariance matrices are given as DS2-cov-1, DS2-cov-2 and DS2-cov-3. Now sample
from this distribution and generate the dataset similar to question 1. Call this dataset
as DS2, and submit it with your code. Follow the instructions from Assignment 1 for
data submission format.

In [12]:
# Import Data
DS2_c1_m1 = pd.read_csv(data_path + 'DS2_c1_m1.txt', sep=",", header=None).to_numpy()[0]
DS2_c1_m2 = pd.read_csv(data_path + 'DS2_c1_m2.txt', sep=",", header=None).to_numpy()[0]
DS2_c1_m3 = pd.read_csv(data_path + 'DS2_c1_m3.txt', sep=",", header=None).to_numpy()[0]
DS2_c2_m1 = pd.read_csv(data_path + 'DS2_c2_m1.txt', sep=",", header=None).to_numpy()[0]
DS2_c2_m2 = pd.read_csv(data_path + 'DS2_c2_m2.txt', sep=",", header=None).to_numpy()[0]
DS2_c2_m3 = pd.read_csv(data_path + 'DS2_c2_m3.txt', sep=",", header=None).to_numpy()[0]

DS2_Cov1 = pd.read_csv(data_path + 'DS2_Cov1.txt', sep=",", header=None).to_numpy()
DS2_Cov2 = pd.read_csv(data_path + 'DS2_Cov2.txt', sep=",", header=None).to_numpy()
DS2_Cov3 = pd.read_csv(data_path + 'DS2_Cov3.txt', sep=",", header=None).to_numpy()

In [13]:
# synthetic data from 3 multivariate Gaussian distribution with mean m0, m1 and m2
data_c1_m1 = pd.DataFrame(data=np.random.multivariate_normal(DS2_c1_m1, DS2_Cov1, int(2000 * 0.1)))
data_c1_m2 = pd.DataFrame(data=np.random.multivariate_normal(DS2_c1_m2, DS2_Cov2, int(2000 * 0.42)))
data_c1_m3 = pd.DataFrame(data=np.random.multivariate_normal(DS2_c1_m3, DS2_Cov2, int(2000 * 0.48)))
data_c1 = pd.concat([data_c1_m1, data_c1_m2, data_c1_m3])
data_c1 = data_c1.sample(frac = 1)
data_c1.insert(0, 'class', 1)

# synthetic data from 3 multivariate Gaussian distribution with mean m1, m1 and m2
data_c2_m1 = pd.DataFrame(data=np.random.multivariate_normal(DS2_c2_m1, DS2_Cov1, int(2000 * 0.1)))
data_c2_m2 = pd.DataFrame(data=np.random.multivariate_normal(DS2_c2_m2, DS2_Cov2, int(2000 * 0.42)))
data_c2_m3 = pd.DataFrame(data=np.random.multivariate_normal(DS2_c2_m3, DS2_Cov2, int(2000 * 0.48)))
data_c2 = pd.concat([data_c2_m1, data_c2_m2, data_c2_m3])
data_c2 = data_c2.sample(frac = 1)
data_c2.insert(0, 'class', -1)

# Create Dataset
data_c2_train = pd.concat([data_c1.iloc[:400], data_c2.iloc[:400]], ignore_index=True)
data_c2_valid = pd.concat([data_c1.iloc[400:800], data_c2.iloc[400:800]], ignore_index=True)
data_c2_test = pd.concat([data_c1.iloc[800:], data_c2.iloc[800:]], ignore_index=True)

In [14]:
# Write data into file for submission
data_c2_train.to_csv(submit_path + 'Assignment2_1948612_1_4_DS2_train.csv')
data_c2_test.to_csv( submit_path + 'Assignment2_1948612_1_4_DS2_test.csv' )
data_c2_valid.to_csv(submit_path + 'Assignment2_1948612_1_4_DS2_valid.csv')

## 5. Model

Now perform the experiments in questions 2 and 3 again, but now using DS2.

1. Estimate the parameters of the GDA model using the maximum likelihood ap-
proach.

    (a) For DS2, report the best fit accuracy achieved by the classifier.

    (b) Report the coefficients learnt.

In [15]:
start_time = time.time()
phi, mu_0, mu_1, sigma = find_GDA_parameters(data_c2_train)
print(f"--- {(time.time() - start_time)}s seconds ---")

--- 0.45681095123291016s seconds ---


In [16]:
# TEST: Try a prediction
x = data_c2_valid.iloc[0].drop(['class']).to_numpy()
real_class = data_c2_valid.iloc[0]['class']
px_0 = eval_px_knowing_y(x, mu_0, sigma) * eval_py(0, phi)
px_1 = eval_px_knowing_y(x, mu_1, sigma) * eval_py(1, phi)
guess_class = -1 if px_0 > px_1 else 1
print(f"Good prediction: {True if guess_class == real_class else False}")

Good prediction: False


In [17]:
# Evaluation of the accuracy
start_time = time.time()
good_prediction = 0
for index, row in data_c2_test.iterrows():
    real_class = row['class']
    x = row.drop(['class'])
    px_0 = eval_px_knowing_y(x, mu_0, sigma) * eval_py(0, phi)
    px_1 = eval_px_knowing_y(x, mu_1, sigma) * eval_py(1, phi)
    guess_class = -1 if px_0 > px_1 else 1
    good_prediction += 1 if guess_class == real_class else 0
accuracy = good_prediction / len(data_c2_test)
print(f"1 a) Accuracy: {accuracy * 100}% --- {(time.time() - start_time)}s seconds ---")

1 a) Accuracy: 51.125% --- 2.6442489624023438s seconds ---


In [18]:
# Report the data
np.savetxt(submit_path + 'Assignment2_1948612_1_5_1_b_phi.txt', np.array([phi]), delimiter=',')
np.savetxt(submit_path + 'Assignment2_1948612_1_5_1_b_mu0.txt', mu_0, delimiter=',')
np.savetxt(submit_path + 'Assignment2_1948612_1_5_1_b_mu1.txt', mu_1, delimiter=',')
np.savetxt(submit_path + 'Assignment2_1948612_1_5_1_b_cov.txt', sigma, delimiter=',')

2. Does k-NN classifier perform better than GDA or worse? Are there particular
values of k which perform better? Why does this happen ?


3. Report the best fit accuracy achieved by this classifier.

In [19]:
k_lst = [1,3,5,9,13]
X = [row.drop(['class']).tolist() for index, row in data_c2_train.iterrows()]
y = data_c2_train['class'].tolist()
x_test = [np.array(row.drop(['class']).tolist()) for index, row in data_c2_test.iterrows()]
y_test = data_c2_test['class'].tolist()

for k in k_lst:
    k_nn = KNN_Classifier(X, y, k)
    start_time = time.time()
    r = len(y_test)
    good_pred = 0
    for i in range(r):
        pred = k_nn.eval(x_test[i])
        real = y_test[i]
        good_pred += 1 if pred == real else 0 
    print(f"Accuracy with k={k}: {(good_pred / r) * 100}% --- {(time.time() - start_time)}s seconds ---")

Accuracy with k=1: 49.125% --- 16.059664726257324s seconds ---
Accuracy with k=3: 49.25% --- 16.311174869537354s seconds ---
Accuracy with k=5: 51.916666666666664% --- 16.200719118118286s seconds ---
Accuracy with k=9: 51.33333333333333% --- 15.901687145233154s seconds ---
Accuracy with k=13: 51.37500000000001% --- 16.707412004470825s seconds ---


# 2 MNIST Handwritting Digits Classification

In this section, we will use the MNIST handwritten digits classification dataset. The task
is to classify a given image of a handwritten digit into one of 10 classes representing integer
values from 0 to 9, inclusively. The dataset consists of 60,000 training data points and
10,000 test data points. The datapoints are represented as 28×28 pixel grayscale images of
handwritten single digits between 0 and 9.

To load the dataset, using the following code. However, remember that for any other implementation you should not use tensorflow/sklearn/keras as mentioned in the instructions.

In [21]:
#Import Keras
from tensorflow import keras
# Loading the MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

Use the first 50,000 examples in (x train, y train) as your training data and use the last
10,000 examples in (x train, y train) as your validation data. The examples are represented
as pixel matrices of size (28,28). You should first flatten the image matrix to 784 features
before passing it to the classifiers. The pixel values are in the range of (0,255). You should
normalize the features by dividing them by 255.

In [22]:
def flat_normalize_images(image_arr: np.array):
    flatten_images = []
    for i in range(len(image_arr)):
        flatten_images.append(image_arr[i].flatten()/255)
    return np.array(flatten_images)

In [23]:
x_valid = flat_normalize_images(x_train[50000:60000])
y_valid = y_train[50000:60000]
x_train = flat_normalize_images(x_train[:50000])
y_train = y_train[:50000]
x_test = flat_normalize_images(x_test) 
y_test = y_test

In [24]:
print(x_train.shape)
df_train = pd.DataFrame(data=x_train)
df_train.insert(0, 'class', y_train)
df_train.head()

(50000, 784)


Unnamed: 0,class,0,1,2,3,4,5,6,7,8,...,774,775,776,777,778,779,780,781,782,783
0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 1. Gaussian Naïve Bayes (GNB) 
First we will consider Gaussian Na ̈ıve Bayes (GNB) model for this task. This is same as
the GDA model from the previous question but with two modifications: The covariance
matrices are not shared and they are diagonal (Na ̈ıve Bayes assumption).

(a) Write down the equations for computing the mean and diagonal covariance matrices
for the class conditional densities and also the prior class probabilities using the
maximum likelihood approach.

(b) Estimate the parameters of the GNB model from the dataset. Report the best fit
accuracy achieved.

In [25]:
def find_GNB_params(data: pd.DataFrame, lsc: int=0):
    """
    Find the parameters of Gaussian Naïve Bayes
    @params:
        - Data
        - lsc: Laplace smoothing coeffience
    """
    # P(y)
    phi = data['class'].value_counts() / len(data)
    
    # μ and variance
    mu = {}
    variance = {}
    n = data['class'].value_counts()

    for c in range(0,10):
        features = data.loc[data['class'] == c].drop(['class'], axis=1)
        
        # Means
        n_feature = features.shape[1]
        n_occ_c = features.shape[0]
        features_mean = (np.sum(features, axis=0) + lsc) / (n_occ_c + lsc * n_feature) 
        mu[c] = features_mean
        
        # Variance
        s_2 = np.zeros(n[c], n[c])
        sum_of_error_2 = features.sub(features_mean).pow(2).sum()
        variances_arr = sum_of_error_2 / (n[c] - 1)
        variance[c] = np.diag(np.sqrt(variances_arr))
        
    return phi, mu, variance

def compute_wk(sigma_inv, mu):
    return np.matmul(sigma_inv, mu)

def compute_w0(sigma_inv, mu, pc):
    return (-0.5) * np.transpose(mu) @ sigma_inv @ mu + np.log(pc)

def compute_ak(x, mu, sigma, pc):
    mu = np.transpose(mu)
    inv = np.linalg.inv(np.asmatrix(sigma))
    wk = compute_wk(inv, mu)
    w0 = compute_w0(inv, mu, pc)
    return np.transpose(wk) @ x + w0
                     
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [26]:
start_time = time.time()
phi, mu, sigma = find_GNB_params(df_train, 0.5)
print(f"--- {(time.time() - start_time)}s seconds ---")

--- 5.3728649616241455s seconds ---


In [27]:
i = 1099
x = x_valid[i]
y = y_valid[i]

ak = [compute_ak(x, mu[c], sigma[c], 0.1) for c in range(10)]
sm = softmax(np.array(ak))
pred = np.argmax(sm)
        
print(f"pred: {pred} | real: {y}")

pred: 9 | real: 9


In [85]:
good_prediction = 0
n = x_valid.shape[0]
# n = 1000

start_time = time.time()

for i in range(n):
    # Features and 
    x = x_valid[i]
    y = y_valid[i]
    
    ak = [compute_ak(x, mu[c], sigma[c], 0.1) for c in range(10)]
    sm = softmax(np.array(ak))
    pred = np.argmax(sm)

    good_prediction += 1 if pred == y else 0

accuracy = good_prediction / n
print(f"a) Accuracy: {accuracy * 100}% --- {(time.time() - start_time)}s seconds ---")


a) Accuracy: 80.58% --- 1911.9195761680603s seconds ---


## 2. K-NN
Use k-NN to learn a classifier. Repeat the experiment for different values of k and report
the performance for each value.

(a) Are there particular values of k which perform better? Why does this happen ? Use validation accuracy for model selection.

(b) Report the best fit accuracy achieved by this classifier.

In [28]:
import heapq

def most_common(lst):
    return max(set(lst), key=lst.count)

class KNN_Classifier:
    
    def __init__(self, X, y, k=1):
        self.x = X
        self.y = y
        self.k = k

    def eval(self, x):
        h = []
        for index in range(len(self.x)):
            c = self.y[index]
            dist = np.linalg.norm(x - self.x[index])
            heapq.heappush(h, (dist, c))
        k_nn = []
        for _ in range(self.k):
            k_nn.append(heapq.heappop(h)[1])
        return most_common(k_nn)

In [62]:
k_lst = [1,10,12,15,21,35]
for k in k_lst:
    k_nn = KNN_Classifier(x_train, y_train, k)
    start_time = time.time()
    r = 500
    good_pred = 0
    for i in range(r):
        pred = k_nn.eval(x_test[i])
        real = y_test[i]
        good_pred += 1 if pred == real else 0 
    print(f"Accuracy with k={k}: {(good_pred / r) * 100}% --- {(time.time() - start_time)}s seconds ---")


Accuracy with k=1: 96.2% --- 202.660737991333s seconds ---
Accuracy with k=10: 96.0% --- 204.27352786064148s seconds ---
Accuracy with k=12: 96.0% --- 200.88634586334229s seconds ---
Accuracy with k=15: 96.0% --- 196.36129307746887s seconds ---
Accuracy with k=21: 95.8% --- 192.36278986930847s seconds ---
Accuracy with k=35: 94.6% --- 184.72234272956848s seconds ---


Validation with sklearn k_nn algo

In [208]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(x_train, y_train)

start_time = time.time()
good_pred = 0
for i in range(len(x_test)):
    pred = neigh.predict([x_test[i]])[0]
    real = y_test[i]
    good_pred += 1 if pred == real else 0
print(f"Accuracy: {(good_pred / r)*100}% --- {(time.time() - start_time)}s seconds ---")

Accuracy: 0.9664
