In [105]:
import h5py
import torch
from pykeops.torch import LazyTensor
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.mixture import GaussianMixture
from matplotlib.patches import Ellipse
import numpy as np
from math import pi, cos, sin
from scipy.integrate import quad

In [33]:
f = h5py.File('./MNIST data/train_0.h5', 'r')
data = f['data']
label = f['label']

In [148]:
# parameters
numOfClass = 10
numOfImagesEachClass = 100
numOfGaussian = 10
eta = 0.9
epsilon = 0.1

In [109]:
# data cleaning: choose images from each of the classes
selectedData, selectedLabel = ImageChoosing(numOfImagesEachClass, numOfClass)

# putting 1000 images together
finalData = np.zeros((256 * numOfImagesEachClass * numOfClass, 4))
finalData = selectedData.reshape((256 * numOfImagesEachClass * numOfClass, 4))

In [120]:
# initializing C
gmm = GaussianMixture(n_components = numOfGaussian)
gmm.fit(finalData)
means = gmm.means_
covariances = gmm.covariances_
C = covariances

In [100]:
# initializing W
W = np.random.random_sample((numOfClass * numOfImagesEachClass, numOfGaussian))

# normalizing W
W = NormalizingW(W)

In [140]:
# initializing flag
flag = 1

# compute X
X = GenerateX()

# compute Xhat
Xhat = GenerateXhat()

In [146]:
# compute objective function E
E = GenerateE()

In [147]:
print(E)

[[ -470.96468441 -2000.00000012 -2088.30908105 -1960.1263591 ]
 [-1999.99853546 -1499.99999999 -2000.00300581 -2000.00127519]
 [-1999.87665993 -1999.99999979 -1067.41318633 -1986.15395558]
 [-1604.23513902 -2000.0000066  -2077.14124076 11174.58542179]]


In [181]:
# begin gradient descent
lambda_ = 1
YW = W
YC = C

def GradientDescent():
    while flag == 1:
        EOld = E
        lambdaNew_ = (1 + np.sqrt(1 + 4 * lambda_^2)) / 2
        gama = (1 - lambdaNew_) / lambda_
        functionE = torch.from_numpy(E)
        functionW = torch.from_numpy(W)
        functionW.requires_grad_(True)
        functionE.requires_grad_(True)
        grad = torch.autograd.grad(functionE[2][2],functionW, allow_unused = True)
        print(grad)
        print(functionW) ######error
        YWNew = W - eta * grad
        W = (1 - gama) * YWNew + gama * YW
        for j in range(numOfGaussian):
            ######################################
            ######################################
        E = GenerateE()
        if np.abs(E - EOld) < eta:
            break
            
GradientDescent()

(None,)
tensor([[0.1230, 0.0589, 0.1226,  ..., 0.1691, 0.1502, 0.1511],
        [0.0409, 0.1272, 0.1587,  ..., 0.1310, 0.0977, 0.0404],
        [0.1397, 0.1351, 0.0752,  ..., 0.0238, 0.1212, 0.0904],
        ...,
        [0.1804, 0.1310, 0.0547,  ..., 0.0296, 0.0019, 0.1370],
        [0.0610, 0.0441, 0.0208,  ..., 0.0543, 0.2451, 0.0335],
        [0.0907, 0.1313, 0.1121,  ..., 0.1068, 0.0377, 0.1005]],
       dtype=torch.float64, requires_grad=True)


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [180]:
# From numpy
x = np.array([1., 2., 3.]) #Only Tensors of floating point dtype can require gradients
x = torch.from_numpy(x)
# Now enable gradient 
x.requires_grad_(True)
z = x ** 2
z.requires_grad_(True)
# _ above makes the change in-place (its a common pytorch thing)
y = 2 * z[0] + 3 * z[1]**2
grad = torch.autograd.grad(y, x, allow_unused = True)
print(grad)

(tensor([ 4., 96.,  0.], dtype=torch.float64),)


In [145]:
# helper functions
def ImageChoosing(numEach, numOfClass):
    selectedData = np.zeros((numEach * numOfClass, 256, 4))
    selectedLabel = np.zeros(numEach * numOfClass)
    count = np.zeros(numOfClass, dtype = int)
    for i in range(label.size):
        thisData = data[i]
        thisLabel = label[i]
        thisCount = count[thisLabel]
        if thisCount < 100:
            selectedData[thisLabel * 100 + thisCount] = data[i]
            selectedLabel[thisLabel * 100 + thisCount] = label[i]
            count[thisLabel] += 1
        else:
            exit = True
            for j in range(10):
                if count[j] < 100:
                    exit = False
            if exit:
                break
    return selectedData, selectedLabel

def NormalizingW(W):
    for i in range(W.shape[0]):
        sumOfW = np.sum(W[i])
        for j in range(W.shape[1]):
            W[i][j] = W[i][j] / sumOfW
    return W

def GenerateX():
    result = np.zeros((numOfClass * numOfImagesEachClass, 4, 4))
    for i in range(numOfClass * numOfImagesEachClass):
        thisData = selectedData[i]
        gmm = GaussianMixture(n_components = 1)
        gmm.fit(thisData)
        means = gmm.means_
        covariances = gmm.covariances_
        result[i] = covariances
    return result

def GenerateXhat():
    result = np.zeros((numOfClass * numOfImagesEachClass, 4, 4))
    for i in range(numOfClass * numOfImagesEachClass):
        A = 0
        B = 0
        for j in range(numOfGaussian):
            A += W[i][j] * C[j]
            B += W[i][j] * np.linalg.inv(C[j])
        firstTerm = sqrtMatrix(np.linalg.inv(B))
        secondTerm = sqrtMatrix(np.dot(np.dot(sqrtMatrix(B),A),sqrtMatrix(B)))
        thirdTerm = firstTerm
        result[i] = np.dot(np.dot(firstTerm, secondTerm), thirdTerm)
    return result

def sqrtMatrix(matrix):
    u, s, vt = np.linalg.svd(matrix)
    newS = np.sqrt(s)
    resultS = np.zeros((newS.shape[0], newS.shape[0]))
    for i in range(newS.shape[0]):
        resultS[i][i] = newS[i]
    return np.dot(np.dot(u, resultS), vt)
    
def GenerateE():
    E = 0
    for i in range(numOfClass * numOfImagesEachClass):
        E += (1 / 4) * (np.dot(np.linalg.inv(X[i]), Xhat[i]) + np.dot(np.linalg.inv(Xhat[i]), X[i]) - 8)
    return E