## Evaluation over Sample Size on SVHN

In [None]:
# Load Packages
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F

import timeit
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import CelebA,MNIST,SVHN
import torchvision.datasets as dset
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm

import torch
from torch import nn
from torch import distributions
from torch.nn.parameter import Parameter

from AE import *
from Sampling import *
from Metric import *
from RealNVP import *
import warnings
import timeit
warnings.filterwarnings("ignore",category=UserWarning)

import rpy2.robjects.numpy2ri
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
robjects.numpy2ri.activate()
base = importr('base')
rvinecop = importr('rvinecopulib')

from sklearn.mixture import GaussianMixture
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV

trans_SVHN =transforms.ToTensor()

In [None]:
# Load data
path = %pwd
dataset_train = SVHN(path,split="train", transform=trans_SVHN, download=False)
dataset_test = SVHN(path,split="test", transform=trans_SVHN, download=False)

In [None]:

# Initialize model
model_AE = ae_SVHN(image_size=32, hidden_dim=20, z_size=20, device='cpu')
model_VAE = vae_SVHN(image_size=32, hidden_dim=20, z_size=20, device='cpu')
model_AE.load_state_dict(torch.load('./ae_SVHN_200.pth', map_location=torch.device('cpu')))
model_VAE.load_state_dict(torch.load('./vae_SVHN_200.pth', map_location=torch.device('cpu')))

In [None]:
def calculate_score(img_test, img_new):
    c = ConvNetFeatureSaver()
    # Extract features_new
    img_new = transforms.Resize(224)(img_new)
    img_new = transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))(img_new)
    features_new = c.extract(img_new)

    # Extract features_real
    img_test = transforms.Resize(224)(img_test)
    img_test = transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))(img_test)
    features_real = c.extract(img_test)
    
    # Calculate metrics
    score = np.zeros(2 * 3 + 2)
    for i in range(0,2):
        score[3*i:3*i+3] = compute_score(features_real[i],features_new[i],sqrt=True)
    score[6] = inception_score(features_new[3])
    score[7] = fid(features_real[3], features_new[3])
    return score

##### Small sample size

In [None]:
interval = 200; nr_model = 7 ; nr_metric = 8
score = np.zeros((10,nr_model,nr_metric))

dataloader_test = DataLoader(dataset_test, batch_size=2000, shuffle=True)
img_test,attr = next(iter(dataloader_test))
dataloader_train = DataLoader(dataset_train, batch_size=4000, shuffle=True)
img_train,attr = next(iter(dataloader_train))

for n in range(200,2001,200):   
    i = int(n/200-1)
    
    
    # Get latent variable
    with torch.no_grad():
        lv = model_AE.encode(img_train)
    lv = lv.detach().numpy()
 
    # Test set
    with torch.no_grad():
        y_test = model_AE.encode(img_test)
    
    for m in [0,1,2,3,4,5,6,7]:
        
            # Beta Copula
        if m == 0: 
            for num in range(0,int(2000/n)):
                randnum = np.random.choice(4000,size=n,replace=False)
                lv_new = lv[randnum]
                y_sample[num*n:(n)*(num+1),:] = sampleing1(lv_new, lv_new, lv_new.shape[0])
            if 2000%n!=0:
                randnum = np.random.choice(4000,size=n,replace=False)
                lv_new = lv[randnum]
                y_sample[int(2000/n)*n:,:]=sampleing1(lv_new, lv_new, 2000%n)          
                y_sample=torch.Tensor(y_sample)
                
           
        # VAE
        elif m == 1:
            y_sample=torch.randn(2000, 20)
           

        # Vine copula trun_level=15
        elif m == 2:
            copula_controls = base.list(family_set="tll", trunc_lvl=15, cores=1)
            for num in range(0,int(2000/n)):
                fixed_noise = np.random.rand(n, lv.shape[1]) 
                randnum=np.random.choice(4000,size=n,replace=False)
                lv_new=lv[randnum]
                vine_obj = rvinecop.vine(lv_new, copula_controls=copula_controls)
                sampled_r = rvinecop.inverse_rosenblatt(fixed_noise, vine_obj)
                y_sample[num*n:(n)*(num+1),:] =torch.Tensor( np.asarray(sampled_r)).view(n, -1).to("cpu")
            if 2000%n!=0:
                fixed_noise = np.random.rand(2000%n, lv.shape[1]) 
                randnum=np.random.choice(4000,size=n,replace=False)
                lv_new=lv[randnum]
                vine_obj = rvinecop.vine(lv_new, copula_controls=copula_controls)
                sampled_r = rvinecop.inverse_rosenblatt(fixed_noise, vine_obj)
                y_sample[int(2000/n)*n:,:]=torch.Tensor(np.asarray(sampled_r)).view(2000%n, -1).to("cpu")
            del sampled_r  
            y_sample=torch.Tensor(y_sample)
                
                
        # Gauss  
        elif m==3:
            for num in range(0,int(2000/n)):
                randnum=np.random.choice(4000,size=n,replace=False)
                lv_new=lv[randnum]
                mean = np.mean(lv_new, axis=0)
                cov = np.cov(lv_new, rowvar=0)
                y_sample[num*n:(n)*(num+1),:] = torch.tensor(np.random.multivariate_normal(mean, cov, lv_new.shape[0])).float()
            if 2000%n!=0:
                randnum=np.random.choice(4000,size=n,replace=False)
                lv_new=lv[randnum]
                mean = np.mean(lv_new, axis=0)
                cov = np.cov(lv_new, rowvar=0)
                y_sample[int(2000/n)*n:,:] = torch.tensor(np.random.multivariate_normal(mean, cov, 2000%n)).float()
                    
                
        # Independent
        if m == 4: 
            for num in range(0,int(2000/n)):
                randnum=np.random.choice(4000,size=n,replace=False)
                lv_new=lv[randnum]
                y_sample[num*n:(n)*(num+1),:] = indep_sampling(lv_new, lv_new, lv_new.shape[0])
            if 2000%n!=0:
                randnum=np.random.choice(4000,size=n,replace=False)
                lv_new=lv[randnum]
                y_sample[int(2000/n)*n:,:]=indep_sampling(lv_new, lv_new, 2000%n)
                    
            y_sample=torch.tensor(shuffle_along_axis(y_sample, axis=0)).float()
            
            
        # GMM
        if m == 5: 
            for num in range(0,int(2000/n)):
                randnum=np.random.choice(4000,size=n,replace=False)
                lv_new=lv[randnum]
                gm = GaussianMixture(n_components=10, random_state=0).fit(lv_new)
                y_sample[num*n:(n)*(num+1),:] = torch.tensor(gm.sample(n_samples=lv_new.shape[0])[0]).float()
            if 2000%n!=0:
                randnum=np.random.choice(4000,size=n,replace=False)
                lv_new=lv[randnum]
                gm = GaussianMixture(n_components=10, random_state=0).fit(lv_new)
                y_sample[int(2000/n)*n:,:] = torch.tensor(gm.sample(n_samples=2000%n)[0]).float()
                    
                    
        # KDE
        if m == 6: 
            for num in range(0,int(2000/n)):
                randnum=np.random.choice(4000,size=n,replace=False)
                lv_new=lv[randnum]
                grid = GridSearchCV(KernelDensity(),
                {'bandwidth': np.linspace(0.1, 2.0, 40)},
                cv=10) 
                grid.fit(lv_new)
                kde = grid.best_estimator_
                lvnew_kde=kde.sample(n_samples=lv_new.shape[0])
                lvnew_kde=np.array(lvnew_kde,dtype=np.double)
                y_sample[num*n:(n)*(num+1),:] = torch.tensor(lvnew_kde).float()
            if 2000%n!=0:
                randnum=np.random.choice(4000,size=n,replace=False)
                lv_new=lv[randnum]
                grid = GridSearchCV(KernelDensity(),
                {'bandwidth': np.linspace(0.1, 2.0, 40)},
                cv=10) 
                grid.fit(lv_new)
                kde = grid.best_estimator_
                lvnew_kde=kde.sample(n_samples=2000%n)
                lvnew_kde=np.array(lvnew_kde,dtype=np.double)
                y_sample[int(2000/n)*n:,:] = torch.tensor(lvnew_kde).float()
                    
        # RealNVP        
        elif m==7:
            randnum=np.random.choice(4000,size=n,replace=False)#np.random.randint(0,2000,size=n)
            lv_new=lv[randnum]
            nnets = lambda: nn.Sequential(nn.Linear(20, 256), nn.LeakyReLU(), nn.Linear(256, 256), nn.LeakyReLU(), nn.Linear(256, 20),nn.Linear(20, 256), nn.LeakyReLU(), nn.Linear(256, 256), nn.LeakyReLU(), nn.Linear(256, 20), nn.Linear(20, 256), nn.LeakyReLU(), nn.Linear(256, 256), nn.LeakyReLU(), nn.Linear(256, 20),nn.Linear(20, 256), nn.LeakyReLU(), nn.Linear(256, 256), nn.LeakyReLU(), nn.Linear(256, 20),nn.Tanh())
            nett = lambda: nn.Sequential(nn.Linear(20, 256), nn.LeakyReLU(), nn.Linear(256, 256), nn.LeakyReLU(), nn.Linear(256, 20),nn.Linear(20, 256), nn.LeakyReLU(), nn.Linear(256, 256), nn.LeakyReLU(), nn.Linear(256, 20),nn.Linear(20, 256), nn.LeakyReLU(), nn.Linear(256, 256), nn.LeakyReLU(), nn.Linear(256, 20),nn.Linear(20, 256), nn.LeakyReLU(), nn.Linear(256, 256), nn.LeakyReLU(), nn.Linear(256, 20))
            masks = torch.from_numpy(np.array([np.resize([1,0], [1,20]),np.resize([0,1], [1,20])]*3).astype(np.float32))
            prior = distributions.MultivariateNormal(torch.zeros(20), torch.eye(20))
            flow = RealNVP(nets, nett, masks, prior)
            optimizer = torch.optim.Adam([p for p in flow.parameters() if p.requires_grad==True], lr=1e-4)
            loss_hist = np.array([])
            num_samples=128
            for a in tqdm(range(2001)): 
                helper=np.random.randint(0,n-1,size=num_samples)
                x_np= lv_new[helper]
                x_np = x_np.astype(np.float32)
                loss = -flow.log_prob(torch.from_numpy(x_np)).mean()
                optimizer.zero_grad()
                loss.backward(retain_graph=True)
                optimizer.step()
                    
            x = flow.sample(2000).detach().numpy()
            x=np.reshape(x,newshape=(x.shape[0],x.shape[2]))
            y_sample=torch.tensor(x).float()
            
                    
            
        #Generate iamges by deoding
        if m == 2: 
            img_new = model_VAE.decode(torch.tensor(y_sample).float())
        else:
            img_new = model_AE.decode(torch.tensor(y_sample).float())
               
        # Evaluate
        with torch.no_grad():
            s = calculate_score(img_test,img_new)
            score[i,m,:] = s

            print(s)
            print('Finished_{}'.format(m))
    
    print("Finished:",n)

In [None]:
from matplotlib.pyplot import cm
xaxis = [i for i in range(200,2001,200)]
title = ['EMD PIXEL','MMD PIXEL','1NN PIXEL','EMD CONV','MMD CONV','1NN CONV','Inception','FID']
label = ['EBCAE','VCAE_5','VAE','VCAE','Gauss','Independent','GMM','KDE']
score_show = score[:19,:,[0,1,2,3,4,5,6,7]]

plt.rcParams["font.family"] = "Times New Roman"
fig=plt.figure(figsize=(8.27, 3.8))
k=1
for i in [0,3,1,4,2,5,6,7]:
    color = iter(cm.Set2(np.linspace(0, 1, 8)))
    ax = plt.subplot(2,4,k)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10)
    #ax.grid(visible=True, which='both', axis='both', alpha=0.2)
    plt.xticks(xaxis[0::2])
    for j in [0,2,3,4,5,6,7]:
        if j==7:
            c = next(color)
        c = next(color)
        ax.plot(xaxis,score_show[:,j,i],'--bo',label=label[j], color=c,markersize=3.5)
 
        plt.xlabel('sample size',fontsize=10)
    ax.set_title(title[i])
    k+=1
    #if i == 3: ax.legend()

lines_labels = [axi.get_legend_handles_labels() for axi in fig.axes]
lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]    
lines=[lines[0],lines[1],lines[2],lines[3],lines[4],lines[5],lines[6]]
labels=[labels[0],labels[1],labels[2],labels[3],labels[4],labels[5],labels[6]]

bbox_transform=fig.transFigure
fig.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, 0.05),fancybox=False, markerscale=2
          ,shadow=False, framealpha=0, ncol=7, fontsize=10)

plt.tight_layout()

plt.savefig("score_SVHN_samplesize.png",dpi=300,bbox_inches='tight')