In [62]:
import custom_utils
import cv2
import matplotlib
import matplotlib.colors as col
import matplotlib.pyplot as plt
import numpy
import numpy as np
import pandas as pd
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
from custom_utils import plot_graph
from matplotlib import pyplot as plt
from sklearn.manifold import TSNE
from sklearn.metrics import ConfusionMatrixDisplay
from torch.optim import SGD
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import DataLoader, Dataset
from torchsummary import summary

In [63]:
from warnings import warn
from numpy import mean, transpose, cov, cos, sin, shape, exp, newaxis, concatenate
from numpy.linalg import linalg, LinAlgError, solve
from scipy.stats import chi2

In [64]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Create 2 tensor

In [65]:
import torch

# Create two random tensors of size (16, 256)
A = torch.randn(16, 256)
B = torch.randn(16, 256)

# Print the resulting tensors
print(A.size())
print(B.size())


torch.Size([16, 256])
torch.Size([16, 256])


In [66]:
# points = numpy.random.randn(2, 256)
# points = torch.randn(2, 256)

### Original code

In [70]:
def mahalanobis_distance(difference, num_random_features):
    num_samples, _ = shape(difference)
    sigma = cov(transpose(difference))

    try:
        linalg.inv(sigma)
    except LinAlgError:
        warn('covariance matrix is singular. Pvalue returned is 1.1')
        raise

    mu = mean(difference, 0)

    if num_random_features == 1:
        stat = float(num_samples * mu ** 2) / float(sigma)
    else:
        print('Original')
        solve_function = solve(sigma, transpose(mu))
        print(f'solve_function:\n{solve_function}')
        right_side = mu.dot(solve_function)
        print(f'right_side:\n{right_side} **')
        stat = num_samples * right_side
        print(f'stat:\n{stat}')
        print('Modified')
        right_side = transpose(mu).dot(mu)
        print(f'right_side:\n{right_side}')
        stat = num_samples * right_side
        print(f'stat:\n{stat}')

    return chi2.sf(stat, num_random_features)

In [71]:
class MeanEmbeddingTest:

    def __init__(self, data_x, data_y, scale=1, number_of_random_frequencies=5):
        self.data_x = scale*data_x
        self.data_y = scale*data_y
        self.number_of_frequencies = number_of_random_frequencies
        self.scale = scale

    def get_estimate(self, data, point):
        z = data - self.scale * point
        z2 = numpy.linalg.norm(z, axis=1)**2
        return numpy.exp(-z2/2.0)


    def get_difference(self, point):
        return self.get_estimate(self.data_x, point) - self.get_estimate(self.data_y, point)


    def vector_of_differences(self, dim):
        points = numpy.random.randn(self.number_of_frequencies, dim)
        a = [self.get_difference(point) for point in points]
        return numpy.array(a).T

    def compute_pvalue(self):

        _, dimension = numpy.shape(self.data_x)
        obs = self.vector_of_differences(dimension)

        return mahalanobis_distance(obs, self.number_of_frequencies)

In [74]:
mkme_loss = MeanEmbeddingTest(A, B, scale=1, number_of_random_frequencies=2)
transfer_loss = mkme_loss.compute_pvalue()
print(f'transfer_loss: {transfer_loss}')

Original
solve_function:
[-1.35100983e+99 -5.72294528e+87]
right_side:
0.17409520712915558 **
stat:
2.7855233140664892
Modified
right_side:
1.4737542740953602e-178
stat:
2.3580068385525763e-177
transfer_loss: 1.0


### custom_eu

In [46]:
def mahalanobis_distance(difference, num_random_features):
    num_samples, _ = difference.shape
    sigma = torch.cov(difference.T)
    mu = torch.mean(difference, 0)
    if num_random_features == 1:
        stat = float(num_samples * torch.pow(mu,2)) / float(sigma)
    else:
        right_side =  torch.matmul(mu, mu.T)
        print(f'right_side:\n{right_side}')
        stat = num_samples * right_side
        print(f'stat:\n{stat}')
    return chi2.sf(stat.detach().cpu(), num_random_features)

class MeanEmbeddingTest:

    def __init__(self, data_x, data_y, scale, number_of_random_frequencies, device):
        self.device = device
        self.data_x = scale * data_x.to(device)
        self.data_y = scale * data_y.to(device)
        self.number_of_frequencies = number_of_random_frequencies
        self.scale = scale

    def get_estimate(self, data, point):
        z = data - self.scale * point
        z2 = torch.norm(z, p=2, dim=1)**2
        return torch.exp(-z2/2.0)

    def get_difference(self, point):
        return self.get_estimate(self.data_x, point) - self.get_estimate(self.data_y, point)

    def vector_of_differences(self, dim):
        points = torch.tensor(numpy.random.randn(self.number_of_frequencies, dim)).to(device)
        a = [self.get_difference(point) for point in points]
        return torch.stack(a).T

    def compute_pvalue(self):

        _, dimension = self.data_x.size()
        obs = self.vector_of_differences(dimension)
        return mahalanobis_distance(obs, self.number_of_frequencies)

In [47]:
mkme_loss = MeanEmbeddingTest(A, B ,scale=1, number_of_random_frequencies=2, device=device)
transfer_loss = mkme_loss.compute_pvalue()
print(f'transfer_loss: {transfer_loss}')

right_side:
2.0932218697095176e-190
transfer_loss: 1.0


### pinverse

In [48]:
def mahalanobis_distance(difference, num_random_features):
    num_samples, _ = difference.shape
    sigma = torch.cov(difference.T)
    mu = torch.mean(difference, 0)
    if num_random_features == 1:
        stat = float(num_samples * torch.pow(mu,2)) / float(sigma)
    else:
        sigma = torch.pinverse(sigma)
        mathmul_function = torch.matmul(sigma, mu.T)
        print(f'mathmul_function:\n{mathmul_function}')
        right_side =  torch.matmul(mu, mathmul_function)
        print(f'right_side:\n{right_side}')
        stat = num_samples * right_side
    return chi2.sf(stat.detach().cpu(), num_random_features)

class MeanEmbeddingTest:

    def __init__(self, data_x, data_y, scale, number_of_random_frequencies, device):
        self.device = device
        self.data_x = scale * data_x.to(device)
        self.data_y = scale * data_y.to(device)
        self.number_of_frequencies = number_of_random_frequencies
        self.scale = scale

    def get_estimate(self, data, point):
        z = data - self.scale * point
        z2 = torch.norm(z, p=2, dim=1)**2
        return torch.exp(-z2/2.0)

    def get_difference(self, point):
        return self.get_estimate(self.data_x, point) - self.get_estimate(self.data_y, point)

    def vector_of_differences(self, dim):
        points = torch.tensor(numpy.random.randn(self.number_of_frequencies, dim)).to(device)
        a = [self.get_difference(point) for point in points]
        return torch.stack(a).T

    def compute_pvalue(self):

        _, dimension = self.data_x.size()
        obs = self.vector_of_differences(dimension)
        return mahalanobis_distance(obs, self.number_of_frequencies)

In [49]:
mkme_loss = MeanEmbeddingTest(
    A, B, scale=1, number_of_random_frequencies=2, device=device)
transfer_loss = mkme_loss.compute_pvalue()
print(f'transfer_loss: {transfer_loss}')

mathmul_function:
tensor([  8.9676e+98, -6.2560e+100], device='cuda:0', dtype=torch.float64)
right_side:
0.12482258023708817
transfer_loss: 0.36840196457349483
