In [183]:
import torch
import torchvision
from PIL import Image
import torchvision.transforms.functional as TF
import matplotlib.pyplot as plt
import numpy as np

In [206]:

def evaluate_gaussian(mu, sigma, x):
    coef_norm = 1 / (torch.sqrt(torch.tensor(2.0 * torch.pi * sigma ** 2)))
    return coef_norm * torch.exp(torch.tensor(-0.5*((mu - x)/sigma )** 2))
    

def binarize_image(image_tensor):
    image_tensor[image_tensor > 0.5] = 1
    image_tensor[image_tensor <= 0.5] = 0
    return image_tensor

def imshow(image_tensor):
    im = plt.imshow(image_tensor.numpy(), cmap = 'gray')
    plt.show()

def test_model(input_torch, p_m_pix_val_given_k, p_t_tensor, num_classes = 10):
    #assumes that the input comes in a row
    
    # Obtiene p_t_k_given_m para input_torch
    p_m_1 = ((input_torch == 1).sum(dim=0)/input_torch.shape[0])+1e-12
    p_t = torch.tensor([1/10])
    p_posterior = torch.log(p_m_1).sum(dim=0) + torch.log(p_t)
    print(p_posterior)
    
    p_t_k_given_m = [] 
    for k in range(num_classes):
        # Estimacion de la probabilidad posterior
        p_t_k_given_m.append(torch.log(p_m_pix_val_given_k[:, k]).sum(dim=0) + torch.log(p_t_tensor[k]))
        
    print(p_t_k_given_m)
    print(max(p_t_k_given_m))
    #TODO IMPLEMENT
    #return (predicted_label, scores_classes)

#def test_model_batch(test_set, labels, p_m_pix_val_given_k, p_t_tensor):
    #TODO IMPLEMENT

def train_model(train_data_tensor_bin, train_data_tensor_gray, labels_training, num_classes = 10):

    first_tensor = True
    p_t_tensor_acc = None
    p_m_1_given_k_acc = None
    p_m_0_given_k_acc = None
    gaussian_given_k_acc = None
    #print(train_data_tensor_gray[650, 0:60])
    
    for k in range(num_classes):
        # Filtra train_data_tensor por clase para dataset binarizado y en escala de grises
        train_data_tensor_bin_per_k = train_data_tensor_bin[:, labels_training == k].type(torch.int64)
        train_data_tensor_gray_per_k = train_data_tensor_gray[:, labels_training == k].type(torch.float64)
        #print(train_data_tensor_bin_per_k)
        #print(train_data_tensor_gray_per_k)
        
        # D = cantidad de filas (784 pixeles)
        # N = cantidad de columnas (600 imágenes)
        D, N = train_data_tensor_bin.shape
        
        # Estimacion de probabilidad a priori
        p_t_tensor = torch.tensor([train_data_tensor_bin_per_k.shape[1] / N])
        #print(p_t_tensor)
        
        # Estimacion de verisimilitud para cada pixel por clase
        p_m_1_given_k = ((train_data_tensor_bin_per_k == 1).sum(dim=1)/train_data_tensor_bin_per_k.shape[1])+1e-12
        #print(p_m_pix_val_given_k)
        
        # Transforma tensor de verisimilitud a una sola columna
        p_m_1_given_k = p_m_1_given_k.view(p_m_1_given_k.shape[0], -1)
        #print(p_m_pix_val_given_k)
    
        # Calculo de media y desviacion estandar
        mu_given_k = torch.mean(train_data_tensor_gray_per_k, dim=1)
        sigma_given_k = torch.std(train_data_tensor_gray_per_k, dim=1)+1e-12
        #print(mu_given_k.shape)
        #print(sigma_given_k.shape)
        
        # Transforma tensor de mu y sigma a una sola columna
        mu_given_k = mu_given_k.view(mu_given_k.shape[0], -1)
        sigma_given_k = sigma_given_k.view(sigma_given_k.shape[0], -1)
        #print(mu_given_k)
        
        # Estimacion de funcion de densidad de probabilidad
        gaussian_given_k = evaluate_gaussian(mu_given_k, sigma_given_k, train_data_tensor_gray_per_k)
        #print(p_gaussian)
    
        if(first_tensor):
            first_tensor = False
            p_m_1_given_k_acc = p_m_1_given_k
            p_t_tensor_acc = p_t_tensor
            gaussian_given_k_acc = gaussian_given_k
        else:
            p_m_1_given_k_acc = torch.cat((p_m_1_given_k_acc, p_m_1_given_k), 1)
            p_t_tensor_acc = torch.cat((p_t_tensor_acc, p_t_tensor), 0)
            gaussian_given_k_acc = torch.cat((gaussian_given_k_acc, gaussian_given_k), 1)

    # Saca complemento de p_m_1_given_k_acc
    p_m_0_given_k_acc = 1 - p_m_1_given_k_acc
    
    print("p_m_0_given_k_acc = ", p_m_0_given_k_acc.shape)
    print("p_m_1_given_k_acc = ", p_m_1_given_k_acc.shape)
    print("p_t_tensor_acc = ", p_t_tensor_acc.shape)
    print("gaussian_given_k_acc = ", gaussian_given_k_acc.shape)
        
    return (list([p_m_0_given_k_acc, p_m_1_given_k_acc]), gaussian_given_k_acc, p_t_tensor_acc)

def load_dataset(path = "src_base/mnist_dataset/train"):
    #Open up the dataset
    dataset =  torchvision.datasets.ImageFolder(path)   
    #print(dataset)
    list_images = dataset.imgs
    #print(list_images)
    train_data_tensor  = None 
    train_data_tensor_gray = None
    labels_training = []
    first_tensor = True
    #list_images_training =  set(data_labeled.train_ds.x.items)
    #print(list_images)
    for i in range(len(list_images)):
        pair_path_label = list_images[i]
        image = Image.open(pair_path_label[0])        

        x_tensor = TF.to_tensor(image).squeeze() #Return tensor in range [0.0 - 1.0]
        x_tensor_gray = TF.pil_to_tensor(image).squeeze() #Return tensor in grayscale
        x_tensor_bin = binarize_image(x_tensor)
               
        #plt.figure()
        #plt.imshow(x_tensor_bin)
        x_tensor_bin_plain = x_tensor_bin.view(x_tensor_bin.shape[0] * x_tensor_bin.shape[1], -1)
        x_tensor_gray_plain = x_tensor_gray.view(x_tensor_gray.shape[0] * x_tensor_gray.shape[1], -1)
        #print("tensor ", x_tensor_bin_plain)
        #test dataset case        
        #if("train" in pair_path_label[0]):
        labels_training += [pair_path_label[1]]
        #print(pair_path_label)
        if(first_tensor):
            first_tensor = False
            train_data_tensor = x_tensor_bin_plain
            train_data_tensor_gray = x_tensor_gray_plain
        else:
            train_data_tensor = torch.cat((train_data_tensor, x_tensor_bin_plain), 1)  
            train_data_tensor_gray = torch.cat((train_data_tensor_gray, x_tensor_gray_plain), 1)
    return (train_data_tensor, train_data_tensor_gray, torch.tensor(labels_training))       

(train_data_tensor, train_data_tensor_gray, labels_training) = load_dataset()

print("train gray dimensions", train_data_tensor_gray.shape)
print("train bin dimensions ", train_data_tensor.shape)
print("train labels ", len(labels_training))

p_m_pix_val_given_k, gaussian_given_k, p_t_tensor = train_model(train_data_tensor, train_data_tensor_gray, labels_training)

new_image = torch.randn(1,784).uniform_(0,1)
input_torch = torch.bernoulli(new_image)
#print(input_torch)
test_model(input_torch, p_m_pix_val_given_k[1], p_t_tensor, num_classes = 10)

#train model by calculating the prior probabilities
#(p_m_pix_val_given_k, p_t_tensor) = train_model(train_data_tensor, labels_training)
#print("p_m_pix_val_given_k size ", p_m_pix_val_given_k.shape)
#(predicted_label, scores_classes) = test_model(train_data_tensor[:, 500], p_m_pix_val_given_k, p_t_tensor)
#print("predicted_label ", predicted_label)
#print("real label ", labels_training[500])
#acc = test_model_batch(train_data_tensor, labels_training, p_m_pix_val_given_k, p_t_tensor)
#print("Model accuracy ", acc)
    

               
    
    

train gray dimensions torch.Size([784, 600])
train bin dimensions  torch.Size([784, 600])
train labels  600
p_m_0_given_k_acc =  torch.Size([784, 10])
p_m_1_given_k_acc =  torch.Size([784, 10])
p_t_tensor_acc =  torch.Size([10])
gaussian_given_k_acc =  torch.Size([784, 600])
tensor([-10861.2949])
[tensor(-11780.0215), tensor(-15003.5352), tensor(-10533.9688), tensor(-11356.7598), tensor(-11474.4941), tensor(-10872.4629), tensor(-11858.9521), tensor(-12010.2129), tensor(-11312.6689), tensor(-12453.1494)]
tensor(-10533.9688)


  coef_norm = 1 / (torch.sqrt(torch.tensor(2.0 * torch.pi * sigma ** 2)))
  return coef_norm * torch.exp(torch.tensor(-0.5*((mu - x)/sigma )** 2))


In [3]:
score_d_1_t_1 = evaluate_gaussian(mu = 20.25, sigma = 0.95, x = 7)
score_d_2_t_1 = evaluate_gaussian(mu = 59.75, sigma = 1.25, x = 17)
print("score_d_1_t_1 ", score_d_1_t_1)
print("score_d_2_t_1 ", score_d_2_t_1)
score_d_1_t_0 = evaluate_gaussian(mu = 5.5, sigma = 0.7, x = 7)
score_d_2_t_0 = evaluate_gaussian(mu = 17.5, sigma = 3.53, x = 17)
print("score_d_1_t_0 ", score_d_1_t_0)
print("score_d_2_t_0 ", score_d_2_t_0)

score_d_1_t_1  tensor(2.4102e-43)
score_d_2_t_1  tensor(0.)
score_d_1_t_0  tensor(0.0574)
score_d_2_t_0  tensor(0.1119)
