In [1]:
import h5py
import torch
from pykeops.torch import LazyTensor
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.mixture import GaussianMixture
from matplotlib.patches import Ellipse
import numpy as np
from math import pi, cos, sin
from scipy.integrate import quad

In [2]:
f = h5py.File('./MNIST data/train_0.h5', 'r')
data = f['data']
label = f['label']

In [3]:
# parameters
numOfClass = 10
numOfImagesEachClass = 100
numOfGaussian = 10
eta = 0.9
epsilon = 0.1

In [5]:
# data cleaning: choose images from each of the classes
selectedData, selectedLabel = ImageChoosing(numOfImagesEachClass, numOfClass)

# putting 1000 images together
finalData = np.zeros((256 * numOfImagesEachClass * numOfClass, 4))
finalData = selectedData.reshape((256 * numOfImagesEachClass * numOfClass, 4))

In [6]:
# initializing C
gmm = GaussianMixture(n_components = numOfGaussian)
gmm.fit(finalData)
means = gmm.means_
covariances = gmm.covariances_
C = covariances
C = torch.from_numpy(C)
C.requires_grad_(True)

tensor([[[ 8.0157e-02, -1.2806e-11, -1.2423e-02, -7.9514e-06],
         [-1.2806e-11,  1.0000e-06,  7.1197e-11,  1.5149e-13],
         [-1.2423e-02,  7.1197e-11,  1.1307e-01, -3.5915e-05],
         [-7.9514e-06,  1.5149e-13, -3.5915e-05,  1.4227e-05]],

        [[ 2.4754e-02, -5.5033e-11, -9.0513e-03,  6.3079e-06],
         [-5.5033e-11,  1.0000e-06,  2.3975e-10,  4.5027e-14],
         [-9.0513e-03,  2.3975e-10,  1.0171e-01, -1.7361e-05],
         [ 6.3079e-06,  4.5027e-14, -1.7361e-05,  1.6571e-05]],

        [[ 3.1947e-02, -4.4786e-11,  4.0124e-03,  1.0072e-03],
         [-4.4786e-11,  1.0000e-06,  6.1836e-11,  4.7169e-12],
         [ 4.0124e-03,  6.1836e-11,  5.7615e-02, -3.7454e-04],
         [ 1.0072e-03,  4.7169e-12, -3.7454e-04,  1.2136e-03]],

        [[ 8.0707e-02, -7.4472e-11, -1.6705e-02,  7.7261e-04],
         [-7.4472e-11,  1.0000e-06,  4.6358e-12, -2.1334e-11],
         [-1.6705e-02,  4.6358e-12,  1.3294e-01, -4.9966e-03],
         [ 7.7261e-04, -2.1334e-11, -4.9966e-03, 

In [7]:
# initializing W
W = np.random.random_sample((numOfClass * numOfImagesEachClass, numOfGaussian))

# normalizing W
W = NormalizingW(W)

W = torch.from_numpy(W)
W.requires_grad_(True)

tensor([[0.1099, 0.0911, 0.1299,  ..., 0.1857, 0.1497, 0.0599],
        [0.1683, 0.0404, 0.1136,  ..., 0.0098, 0.1215, 0.1185],
        [0.0159, 0.1235, 0.1178,  ..., 0.1141, 0.0392, 0.0899],
        ...,
        [0.1249, 0.1600, 0.2105,  ..., 0.1121, 0.1596, 0.0388],
        [0.1120, 0.0924, 0.1688,  ..., 0.1930, 0.0943, 0.0821],
        [0.0265, 0.0783, 0.1987,  ..., 0.0949, 0.0860, 0.0420]],
       dtype=torch.float64, requires_grad=True)

In [8]:
# initializing flag
flag = 1

# compute X
X = GenerateX()
X = torch.from_numpy(X)

# compute Xhat
Xhat = GenerateXhat()

In [9]:
# compute objective function E
E = GenerateE()
E.requires_grad_(True)

tensor(-3223.1974, dtype=torch.float64, grad_fn=<TraceBackward>)

In [None]:
torch.autograd.grad(E, C, create_graph = True, allow_unused = True)

In [10]:
# begin gradient descent
lambda_ = 1
YW = W
YC = C

while True:
    print("Error of iteration", flag, "is:", E)
    EOld = E
    lambdaNew_ = (1 + np.sqrt(1 + 4 * lambda_^2)) / 2
    gama = (1 - lambdaNew_) / lambda_
    E.backward(retain_graph = True, create_graph = True)
    YWNew = W - eta * W.grad
    W = (1 - gama) * YWNew + gama * YW
    YW = YWNew
    YCNew = YC
    for j in range(numOfGaussian):
        YCNew[j] = ExponentialMap(C[j], - eta * C.grad[j])
        C[j] = ExponentialMap(YCNew[j], gama * LogMap(YCNew[j], YC[j]))
    YC = YCNew
    E = GenerateE()
    if np.abs(E - EOld) < eta:
        break
    flag += 1

Error of iteration 1 is: tensor(-3223.1974, dtype=torch.float64, grad_fn=<TraceBackward>)


NameError: name 'YCNew' is not defined

In [None]:
# From numpy
x = np.array([1., 2., 3.]) #Only Tensors of floating point dtype can require gradients
x = torch.from_numpy(x)
# Now enable gradient 
x.requires_grad_(True)
z = x ** 2
z.requires_grad_(True)
# _ above makes the change in-place (its a common pytorch thing)
y = 2 * torch.dot(x, z)
grad = torch.autograd.grad(y, x, allow_unused = True)
print(grad)

In [4]:
# helper functions
def ImageChoosing(numEach, numOfClass):
    selectedData = np.zeros((numEach * numOfClass, 256, 4))
    selectedLabel = np.zeros(numEach * numOfClass)
    count = np.zeros(numOfClass, dtype = int)
    for i in range(label.size):
        thisData = data[i]
        thisLabel = label[i]
        thisCount = count[thisLabel]
        if thisCount < 100:
            selectedData[thisLabel * 100 + thisCount] = data[i]
            selectedLabel[thisLabel * 100 + thisCount] = label[i]
            count[thisLabel] += 1
        else:
            exit = True
            for j in range(10):
                if count[j] < 100:
                    exit = False
            if exit:
                break
    return selectedData, selectedLabel

def NormalizingW(W):
    for i in range(W.shape[0]):
        sumOfW = np.sum(W[i])
        for j in range(W.shape[1]):
            W[i][j] = W[i][j] / sumOfW
    return W

def GenerateX():
    result = np.zeros((numOfClass * numOfImagesEachClass, 4, 4))
    for i in range(numOfClass * numOfImagesEachClass):
        thisData = selectedData[i]
        gmm = GaussianMixture(n_components = 1)
        gmm.fit(thisData)
        means = gmm.means_
        covariances = gmm.covariances_
        result[i] = covariances
    return result

def GenerateXhat():
    result = np.zeros((numOfClass * numOfImagesEachClass, 4, 4))
    result = torch.from_numpy(result)
    for i in range(numOfClass * numOfImagesEachClass):
        A = 0
        B = 0
        for j in range(numOfGaussian):
            A += W[i][j] * C[j]
            B += W[i][j] * torch.inverse(C[j])
        firstTerm = sqrtMatrix(torch.inverse(B))
        secondTerm = sqrtMatrix(torch.mm(torch.mm(sqrtMatrix(B),A),sqrtMatrix(B)))
        thirdTerm = firstTerm
        result[i] = torch.mm(torch.mm(firstTerm, secondTerm), thirdTerm)
    return result

def sqrtMatrix(matrix):
    u, s, vt = torch.svd(matrix)
    newS = torch.sqrt(torch.diag(s))
    return torch.mm(torch.mm(u, newS), vt)
    
def GenerateE():
    E = 0
    for i in range(numOfClass * numOfImagesEachClass):
        E += (1 / 4) * (torch.mm(torch.inverse(X[i]), Xhat[i]) + torch.mm(torch.inverse(Xhat[i]), X[i]) - 8)
    return torch.trace(E)

def ExponentialMap(x, V):
    return sqrtMatrix(x) * torch.exp(torch.mm(torch.mm(torch.inverse(sqrtMatrix(x)), V), torch.inverse(sqrtMatrix(x)), )) * sqrtMatrix(x)
    
def LogMap(x, Y):
    return sqrtMatrix(x) * torch.exp(torch.mm(torch.mm(torch.inverse(sqrtMatrix(x)), V), torch.inverse(sqrtMatrix(x)), )) * sqrtMatrix(x)
    
    
    
    