In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.random_projection import GaussianRandomProjection
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from tqdm import tqdm
import sys

sys.path.append('../')
sys.path.append('../src/')
from src.generative import *
from src.detection import DataCopyingDetector, ThreeSampleDetector
from src.autoencoding import InceptionV3
from src.utils import set_plotting_params

In [2]:
# load data
emb_data = np.load("../data.nosync/imagenet_val_emb_data.npy")
print(f"Shape of data: {emb_data.shape}")

# perform PCA
pca = PCA(n_components=64)
pca.fit(emb_data)
pca_emb_data = pca.transform(emb_data)
print(f"Shape of PCA data: {pca_emb_data.shape}")

Shape of data: (50000, 2048)
Shape of PCA data: (50000, 64)


In [3]:
# q = rho * q_copying + (1 - rho) * q_underfit
copier = Memorizer(radius=0.02, n_copying=20)
underfitter = Memorizer(radius=0.25, n_copying=len(pca_emb_data)) 

m = len(pca_emb_data) # number of samples to generate
k = 10 # number of random projections
lambdas = [5, 10, 20] 
d_proj = [1, 2, 3] # number of dimensions in the random projection
rhos = np.round(np.linspace(0, 1, 11), 2) # copying rate in the mixture model

In [4]:
# for lam in lambdas: 
#     results = np.zeros((m, k, len(d_proj), len(rhos))).astype(bool)

#     for i, r in enumerate(tqdm(rhos)):
#         q = Mixture(rho=r, q1=copier, q2=underfitter).fit(pca_emb_data)
#         X1_gen = q.sample(m)
#         X2_gen = q.sample(m)
#         for j, d in enumerate(d_proj):
#             for l in range(k):
#                 rp = GaussianRandomProjection(n_components=d).fit(pca_emb_data)
#                 dcd = DataCopyingDetector(lmbda=lam)
#                 idx = dcd.get_copying_indices(rp.transform(pca_emb_data), rp.transform(X1_gen), rp.transform(X2_gen))
#                 results[idx, l, j, i] = True

#     # save results
#     np.save(f'../doc/ImageNet/mixed_model_results_lambda_{lam}.npy', results)

In [5]:
textwidth = set_plotting_params()
line_styles = ['--', '-.', ':']
colors = sns.color_palette('colorblind')
markers = ['x', '^', 'h', '^', 'v', '>', '<', 'p', '*', 'h']
markersize = [5, 4, 3, 2, 1]

fig, axs = plt.subplots(1, 3, figsize=(textwidth, 1.5), sharey=True)

for i, lam in enumerate(lambdas):
    results = np.load(f'../doc/ImageNet/mixed_model_results_lambda_{lam}.npy')
    maj_votes = (results.mean(axis=1) > 0.5).mean(axis=0)
    # plot true copying rate
    axs[i].plot(rhos, rhos, label='True cr$_q$', linestyle='-', color='black')
    for j, d in enumerate(d_proj):
        axs[i].plot(rhos, maj_votes[j],
                    label='$d_{\\mathrm{proj}}=$' + str(d),
                    linestyle=line_styles[j],
                    color=colors[j],
                    marker=markers[j],
                    markersize=markersize[j],
                    alpha=0.75,
                )
        axs[i].set_xlabel('$\\rho$')
        axs[i].set_title(f'({chr(97 + i)})' + f' $\\lambda={lam}$', loc='center')

    # make x and y ticks at 0, 0.2, ..., 1
    axs[i].set_xticks(np.linspace(0, 1, 6))
    axs[i].set_yticks(np.linspace(0, 1, 6))


axs[0].set_ylabel('cr$_q$')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), fontsize=6)
plt.tight_layout(pad=0.3)
plt.savefig('../doc/ImageNet/mixed_model_results.png', dpi=300)
plt.close()