## Lista 4 - Aprendizagem de máquina probabilístico
- Aluno: Lucas Rodrigues Aragão - Graduação 538390

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.stats import multivariate_normal as mvn

## Modelos

### PPCA Usando algoritmo EM

1. Inicializar os parâmetros $\mu =\frac{1}{N}\sum^N_{i=1}x_i$, $W^{(0)}$, $(\sigma^2)^{(0)}$

2. Repetir até convergência 

    Expectation:
    $$\mathbb{E}[z_i] = M^{-1} W^T (x_i - \mu)$$
    $$M = W^T W + \sigma^2 I$$
    $$\mathbb{E}[z_i z_i^T] = \sigma^2 M^{-1} + \mathbb{E}[z_i] \mathbb{E}[z_i]^T$$
    

    Maximization: 
    $$W = \big[\sum^N_{i=1} (x_i - \mu) \mathbb{E}[z_i]^T \big] \big[\sum^N_{i=1} \mathbb{E}[z_i z_i^T] \big]$$

    $$\sigma^2 = \frac{1}{ND} \sum^N_{i=1} \{ || x_i - \mu||^2 - 2 \mathbb{E} W^T (x_i - \mu) + \text{Tr}(\mathbb{E} [z_i z_i^T] W^T W)\}$$

A projeção probabilística dos dados é dada por:
    $$p(z_i| x_i) = \mathcal{N}(z_i| \hat{M}^{-1} W^T (x - \hat{\mu}), \sigma^2 \hat{M}^{-1})$$
Reconstrução probabilística das projeções é:
    $$p(x_i| z_i) =  \mathcal{N}(x_i| \hat{W}z_i + \hat{\mu}, \sigma^2 I)$$    

In [24]:
class PPCA_EM:
    def __init__(self, mu, W0,sigma0):
        self.mu = mu
        self.W = W0
        self.sigma2 = sigma0
        self.Ez = None
        self.Ezz = None

    def expectation(self, xi, index):
        W = self.W
        self.M = W.T @ W + self.sigma2 * np.eye(W.shape[1])
        M_inv = np.linalg.inv(self.M)
        EzI = M_inv @ W.T @ (xi - self.mu)
        self.Ez[index] = EzI
        self.Ezz[index] = self.sigma2 * M_inv + np.outer(EzI, EzI)
        
    def maximization(self, X):
        mu = self.mu
        Ez = self.Ez
        Ezz = self.Ezz
        N, D = X.shape
        X_centered = X - mu  

        # soma dos (x_i - mu) E[z_i]^T
        M1 = X_centered.T @ Ez 
        # soma das E[z_i z_i^T]
        M2 = np.sum(self.Ezz, axis=0) 

        W = M1 @ np.linalg.inv(M2)
        self.W = W


        #||x_i - mu||^2
        term1 = np.sum(X_centered**2, axis=1)  

        # 2) -2 * E[z_i]^T W^T (x_i - mu)
        term2 = -2 * np.sum((Ez @ W.T) * X_centered, axis=1)  

        # 3) Tr(E[z_i z_i^T] W^T W)
        WtW = W.T @ W 
        term3 = np.array([np.trace(Ezz[i] @ WtW) for i in range(N)])  
        # soma e média
        sigma2 = np.sum(term1 + term2 + term3) / (N * D)
        self.sigma2 = sigma2

    def train(self, X, epochs):
        N = X.shape[0]
        q = self.W.shape[1]
        self.Ez = np.zeros((N, q))
        self.Ezz = np.zeros((N, q, q))

        for epoch in range(epochs):
            print(f"Epoch: {epoch +1}")
            for index, xi in enumerate(X):
                self.expectation(xi, index)
            self.maximization(X)

    def project(self, X):
        M_inv = np.linalg.inv(self.M) 
        mean = M_inv  @ self.W.T @ (X - self.mu)
        cov = self.sigma2 * M_inv
        pzx = np.random.multivariate_normal(mean, cov)
        return pzx

    def reconstruct(self, Z):
        mean = Z @ self.W.T + self.mu
        cov = self.sigma2 * np.eye(self.W.shape[0])  
        X_samples = np.array([
            np.random.multivariate_normal(mean[i], cov)
            for i in range(Z.shape[0])
        ])
        return X_samples
        
    def generate(self, n_samples):
        q = self.W.shape[1]
        z = np.random.randn(n_samples, q)  
        mean = z @ self.W.T + self.mu
        cov = self.sigma2 * np.eye(self.W.shape[0])
        X_samples = np.array([np.random.multivariate_normal(mean[i], cov) for i in range(n_samples)])
        return X_samples

### Inferência Variacional 

## Questões

### Questão 1

In [14]:
data_mnist = pd.read_csv("mnist_5.csv", header= None)
data_mnist

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,774,775,776,777,778,779,780,781,782,783
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6308,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6310,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6311,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
D = data_mnist.shape[1]
L = 2
A = np.random.randn(D, L)
Q, _ = np.linalg.qr(A)
W0 = Q   

mu = np.mean(data_mnist, axis = 0)
var_mean = np.mean(np.var(data_mnist, axis=0))
sigma0 = 0.1 * var_mean

model_mnist = PPCA_EM(mu=mu, W0=W0, sigma0= sigma0)

In [26]:
model_mnist.train(X= data_mnist, epochs = 300)

Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
Epoch: 10
Epoch: 11
Epoch: 12
Epoch: 13
Epoch: 14
Epoch: 15
Epoch: 16
Epoch: 17
Epoch: 18
Epoch: 19
Epoch: 20
Epoch: 21
Epoch: 22
Epoch: 23
Epoch: 24
Epoch: 25
Epoch: 26
Epoch: 27
Epoch: 28
Epoch: 29
Epoch: 30
Epoch: 31
Epoch: 32
Epoch: 33
Epoch: 34
Epoch: 35
Epoch: 36
Epoch: 37
Epoch: 38
Epoch: 39
Epoch: 40
Epoch: 41
Epoch: 42
Epoch: 43
Epoch: 44
Epoch: 45
Epoch: 46
Epoch: 47
Epoch: 48
Epoch: 49
Epoch: 50
Epoch: 51
Epoch: 52
Epoch: 53
Epoch: 54
Epoch: 55
Epoch: 56
Epoch: 57
Epoch: 58
Epoch: 59
Epoch: 60
Epoch: 61
Epoch: 62
Epoch: 63
Epoch: 64
Epoch: 65
Epoch: 66
Epoch: 67
Epoch: 68
Epoch: 69
Epoch: 70
Epoch: 71
Epoch: 72
Epoch: 73
Epoch: 74
Epoch: 75
Epoch: 76
Epoch: 77
Epoch: 78
Epoch: 79
Epoch: 80
Epoch: 81
Epoch: 82
Epoch: 83
Epoch: 84
Epoch: 85
Epoch: 86
Epoch: 87
Epoch: 88
Epoch: 89
Epoch: 90
Epoch: 91
Epoch: 92
Epoch: 93
Epoch: 94
Epoch: 95
Epoch: 96
Epoch: 97
Epoch: 98
Epoch: 99
Epoch: 100
Epoch: 1