In [255]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

In [256]:
def cal_phi(y):
    return(np.mean(y==1))

def cal_mu1(x,y):
    m = len(y)
    y_pos = np.sum(y==0)
    conditional_sum_x = np.zeros_like(x[1,:], dtype=np.float64)
    for i in range(m):
        xi = x[i,:]
        yi = y[i]
        if (yi==1):
            conditional_sum_x += xi
        else:
            conditional_sum_x += np.zeros_like(xi)
    return (1/m)*conditional_sum_x/y_pos
    
def cal_mu0(x,y):
    m = len(y)
    y_neg = np.sum(y==1)
    conditional_sum_x = np.zeros_like(x[1,:])
    for i in range(m):
        xi = x[i,:]
        yi = y[i]
        if (yi==0):
            conditional_sum_x += xi
        else:
            conditional_sum_x += np.zeros_like(xi)
    return (1/m)*conditional_sum_x/y_neg
    
def cal_sigma(x, y, mu0, mu1):
    mu = [mu0, mu1]
    m = len(y)
    # x=x.to_numpy()
    x_minus_mu = x[0] - mu[int(y[0])]
    x_minus_mu = x_minus_mu.reshape(*(x_minus_mu.shape), 1)
    s = np.matmul(x_minus_mu, x_minus_mu.T)
 
    for i in range(1, m):
        x_minus_mu = x[i] - mu[int(y[i])]
        x_minus_mu = x_minus_mu.reshape(*(x_minus_mu.shape), 1)
        s += np.matmul(x_minus_mu, x_minus_mu.T)
    s = s/m
    return(s)

def px_py(x, mu, sigma):
    pi = 3.1415926535
    dim = len(mu)
    x_minus_mu = x-mu
    x_minus_mu = x_minus_mu.reshape(*(x_minus_mu.shape), 1)
    expval = (np.matmul((x-mu),np.linalg.inv(sigma)))
    return ((1/(((2*pi)**(dim/2))*np.sqrt(np.linalg.det(sigma))))*np.exp(-0.5*(np.matmul(expval, (x-mu).T))))

def cal_py(y, phi):
    if (y==1):
        return phi
    else:
        return 1-phi


In [257]:
dtrain1 = pd.read_csv("ds1_train.csv")
dtest1 = pd.read_csv("ds1_test.csv")

xtrain1 = dtrain1[['x_1','x_2']]
ytrain1 = dtrain1['y']
xtest1 = dtest1[['x_1','x_2']]
ytest1 = dtest1['y']
ytest1 = ytest1.values.reshape(*(ytest1.shape), 1)



In [258]:
pca = PCA(n_components=2)
pca.fit(xtrain1)
xtrain1 = pca.transform(xtrain1)
xtest1 = pca.transform(xtest1)

In [259]:
phi1 =  cal_phi(ytrain1)
mu1_1 = cal_mu1(xtrain1,ytrain1)
mu0_1 = cal_mu0(xtrain1,ytrain1)
sig1 = cal_sigma(xtrain1, ytrain1, mu0_1, mu1_1)

In [260]:
m = len(ytest1)
y_pred1 = np.zeros_like(ytest1)
for i in range(m):   
    px_y0_1 = px_py(xtest1[i], mu0_1, sig1)*cal_py(0, phi1)
    px_y1_1 = px_py(xtest1[i], mu1_1, sig1)*cal_py(0, phi1)
    if (px_y0_1 > px_y1_1):
        y_pred1[i]=0
    else:
        y_pred1[i]=1
print('accuracy -> {}'.format(sum(y_pred1 == ytest1)/ytest1.shape[0]))

accuracy -> [0.83]


In [261]:
dtrain2 = pd.read_csv("ds2_train.csv")
dtest2 = pd.read_csv("ds2_test.csv")

xtrain2 = dtrain2[['x_1','x_2']]
ytrain2 = dtrain2['y']
xtest2 = dtest2[['x_1','x_2']]
ytest2 = dtest2['y']
ytest2 = ytest2.values.reshape(*(ytest2.shape), 1)



In [262]:
pca.fit(xtrain2)
xtrain2 = pca.transform(xtrain2)
xtest2 = pca.transform(xtest2)

In [263]:
phi2 =  cal_phi(ytrain2)
mu1_2 = cal_mu1(xtrain2,ytrain2)
mu0_2 = cal_mu0(xtrain2,ytrain2)
sig2 = cal_sigma(xtrain2, ytrain2, mu0_2, mu1_2)

m = len(ytest2)
y_pred2 = np.zeros_like(ytest2)
for i in range(m):
    px_y0_2 = px_py(xtest2[i], mu0_2, sig2)*cal_py(0, phi2)
    px_y1_2 = px_py(xtest2[i], mu1_2, sig2)*cal_py(0, phi2)
    if (px_y0_2 > px_y1_2):
        y_pred2[i]=0
    else:
        y_pred2[i]=1
print('accuracy -> {}'.format(sum(y_pred2 == ytest2)/ytest2.shape[0]))

accuracy -> [0.91]
