In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# @title Importing Relevent Libraries
import torch
%cd drive/MyDrive/2학년 2학기/COSE362-MachineLearning/Hw1

/content/drive/MyDrive/2학년 2학기/COSE362-MachineLearning/Hw1


In [None]:
# @title Checking GPU availibity
if torch.cuda.is_available():
    print("GPU is enabled and available!")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is not enabled or available, working with CPU...")

In [17]:
# @title GMM implementation using class
from torch.distributions import MultivariateNormal as MVN
class GMM:
  def __init__(self, data, c):
    self.weights = torch.tensor([1/c for i in range(c)], device="cuda")
    self.means = data[torch.randint(len(data), (c,))]
    self.cov_matrices = torch.stack([torch.cov(data.T) for _ in range(c)]).to("cuda")
    self.data = data
    self.c = c

  def expectation(self):
    GMMs = MVN(loc = self.means, covariance_matrix = self.cov_matrices)
    log_prob = GMMs.log_prob(self.data.unsqueeze(1))
    weighted_log_prob = log_prob + torch.log(self.weights)
    weighted_log_sum = torch.logsumexp(weighted_log_prob, dim=1)
    self.responsibility = torch.exp(weighted_log_prob - weighted_log_sum.unsqueeze(1))
    return torch.sum(weighted_log_sum)



  def maximize(self):
    self.weights = torch.sum(self.responsibility, 0) / len(self.responsibility)
    self.means = torch.div((self.data.T@self.responsibility).T, torch.sum(self.responsibility, 0).unsqueeze(1))
    centered_data = torch.sub(self.data, self.means.unsqueeze(1))
    weighted_centered_data = centered_data * self.responsibility.T.unsqueeze(-1)
    self.cov_matrices = torch.bmm(torch.transpose(centered_data, 1, 2), weighted_centered_data) / torch.sum(self.responsibility, 0).unsqueeze(1).unsqueeze(1)

    #ensuring positive definiteness
    self.cov_matrices += torch.diag_embed(torch.full((self.c, self.cov_matrices.shape[-1]), 1e-6, device="cuda"))
    self.cov_matrices = (self.cov_matrices + torch.transpose(self.cov_matrices, 1, 2)) / 2

    #returning new likelihood
    GMMs = MVN(loc = self.means, covariance_matrix = self.cov_matrices)
    log_prob = GMMs.log_prob(self.data.unsqueeze(1))
    weighted_log_prob = log_prob + torch.log(self.weights)
    weighted_log_sum = torch.logsumexp(weighted_log_prob, dim=1)
    return torch.sum(weighted_log_sum)

  def log_likelihood(self, input_data): #returns t length vector
    GMMs = MVN(loc = self.means, covariance_matrix = self.cov_matrices)
    log_prob = GMMs.log_prob(input_data.unsqueeze(1))
    weighted_log_prob = log_prob + torch.log(self.weights)
    weighted_log_sum = torch.logsumexp(weighted_log_prob, dim=1)
    return weighted_log_sum

  def fit(self, max_iter=1000, tol=1e-4):
        for i in range(max_iter):
            log_likelihood = self.expectation()
            new_likelihood = self.maximize()
            # Check convergence
            if i == (max_iter - 1) or abs(new_likelihood - log_likelihood) < tol:
                break
            prev_likelihood = new_likelihood

In [25]:
# @title Data preparation
K = 5
train_file_path = "train.txt"
with open(train_file_path, 'r') as f:
  train_data = f.readlines()
data_with_label = []
for data in train_data:
  data_with_label.append([float(x) for x in data.split()])
#full dataset with label
data_with_label = torch.tensor(data_with_label, dtype=torch.float, device="cuda")
data_class = data_with_label[:, -1]
#full dataset divided into class 0/1, with removed labels
data_0 = data_with_label[data_class==0]
data_1 = data_with_label[data_class==1]

section_size_0 = len(data_0) // K
section_size_1 = len(data_1) // K

#preparing train data
data_sections_0 = torch.zeros([K, len(data_0) // K, 14], device="cuda")
data_sections_1 = torch.zeros([K, len(data_1) // K, 14], device="cuda")
train_sections_0 = torch.zeros([K, (K - 1) * (len(data_0) // K), 13], device="cuda")
train_sections_1 = torch.zeros([K, (K - 1) * (len(data_1) // K), 13], device="cuda")
for k in range(K):
  data_sections_0[k] = data_0[k * section_size_0 : (k + 1) * section_size_0]
  data_sections_1[k] = data_1[k * section_size_1 : (k + 1) * section_size_1]
for k in range(K):
  train_sections_0[k] = torch.cat((data_sections_0[:k, :, :-1], data_sections_0[k + 1:, :, :-1])).flatten(end_dim=1)
  train_sections_1[k] = torch.cat((data_sections_1[:k, :, :-1], data_sections_1[k + 1:, :, :-1])).flatten(end_dim=1)

#preparing test data
test_sections = torch.zeros([K, (len(data_0) // K) + (len(data_1) // K), 14], device="cuda")
for k in range(K):
  test_sections[k] = torch.cat((data_sections_0[k], data_sections_1[k]))

In [69]:
C = 20
errors = []
for c in range(2, C):
  error = 0
  for k in range(K):
    GMM_0 = GMM(train_sections_0[k], c)
    GMM_1 = GMM(train_sections_1[k], c)
    GMM_0.fit()
    GMM_1.fit()

    GMM_0_likelihood = GMM_0.log_likelihood(test_sections[k, :, :-1])
    GMM_1_likelihood = GMM_1.log_likelihood(test_sections[k, :, :-1])
    true_class = test_sections[k, :, -1]
    model_likelihood = torch.cat((GMM_0_likelihood.unsqueeze(1), GMM_1_likelihood.unsqueeze(1)), 1) #(# of data, 2)
    model_class = torch.argmax(model_likelihood, dim=1).to("cuda")
    error += torch.mean(abs(model_class - true_class))
  print("accuracy for component", c, ":", float(error/K))
  errors.append(float(error/K))
optimal_c = torch.argmin(torch.tensor(errors)) + 2
print("optimal c:", optimal_c)

accuracy for component 2 : 0.15599235892295837
accuracy for component 3 : 0.151696115732193
accuracy for component 4 : 0.15149705111980438
accuracy for component 5 : 0.15041884779930115
accuracy for component 6 : 0.14950649440288544
accuracy for component 7 : 0.14864395558834076
accuracy for component 8 : 0.14867711067199707
accuracy for component 9 : 0.14826242625713348
accuracy for component 10 : 0.14847806096076965
accuracy for component 11 : 0.14876006543636322
accuracy for component 12 : 0.14912499487400055
accuracy for component 13 : 0.14884299039840698
accuracy for component 14 : 0.14945673942565918
accuracy for component 15 : 0.14930744469165802
accuracy for component 16 : 0.1485610008239746
accuracy for component 17 : 0.15141412615776062
accuracy for component 18 : 0.151762455701828
accuracy for component 19 : 0.15003731846809387
optimal c: tensor(9)


In [72]:
test_file_path = "test.txt"
with open(test_file_path, 'r') as f:
  test_data = f.readlines()
test_data_l = []
for data in test_data:
  test_data_l.append([float(x) for x in data.split()])
test_data_l = torch.tensor(test_data_l, device="cuda")
test_true_class = test_data_l[:, -1]
test_data_nl = test_data_l[:, :-1]

GMM_final_0 = GMM(data_0[:, :-1], optimal_c)
GMM_final_1 = GMM(data_1[:, :-1], optimal_c)
GMM_final_0.fit()
GMM_final_1.fit()

GMM_0_final_likelihood = GMM_final_0.log_likelihood(test_data_nl)
GMM_1_final_likelihood = GMM_final_1.log_likelihood(test_data_nl)
final_model_likelihood = torch.cat((GMM_0_final_likelihood.unsqueeze(1), GMM_1_final_likelihood.unsqueeze(1)), 1) #(# of data, 2)
final_model_class = torch.argmax(final_model_likelihood, dim=1).to("cuda")
final_error = torch.mean(abs(final_model_class - test_true_class))
print("Final error with optimal number of components:", final_error)

Final error with optimal number of components: tensor(0.1461, device='cuda:0')
