In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from tqdm import tqdm
from sklearn.random_projection import GaussianRandomProjection
from sklearn.decomposition import PCA
import sys
sys.path.append('../')
sys.path.append('../src/')

from src.generative import *
from src.detection import *
from src.utils import set_plotting_params

In [2]:
fmnist = FashionMNIST(root='../data.nosync/').fit()
X = fmnist.sample().detach().numpy().reshape(-1, 28*28)
X_val = fmnist.sample(10000, S='val').detach().numpy().reshape(-1, 28*28)
X_test = fmnist.sample(10000, S='test').detach().numpy().reshape(-1, 28*28)

# print shapes
print('X:', X.shape)
print('X_val:', X_val.shape)
print('X_test:', X_test.shape)

# perform PCA
pca = PCA(n_components=100)
pca.fit(X)
X_pca = pca.transform(X)
X_val_pca = pca.transform(X_val)
X_test_pca = pca.transform(X_test)

# Variance explained
print('Explained variance:', pca.explained_variance_ratio_.sum())

X: (50000, 784)
X_val: (10000, 784)
X_test: (10000, 784)
Explained variance: 0.9119189


In [3]:
# perform meehan et al. method
tst = ThreeSampleDetector(num_regions=50)
rhos = np.round(np.linspace(0, 1, 11), 2) # copying rate in the mixture model
copier = Memorizer(radius=0.1, n_copying=20) 
underfitter = Memorizer(radius=5.5, n_copying=len(X_pca)) 

for r in rhos:
    print(f'rho={r}')
    q = Mixture(rho=r, q1=copier, q2=underfitter).fit(X_pca)
    stat = tst.C_T(q, X_pca, X_test_pca)
    print(stat)

rho=0.0
(4.912485212684187, -10.211927453639404)
rho=0.1
(3.1164297666070593, -10.772758023329494)
rho=0.2
(1.4992669903211666, -13.432220680598201)
rho=0.3
(-0.12847310019867045, -14.586123442938943)
rho=0.4
(-1.690963712267294, -18.96320367857377)
rho=0.5
(-2.943443108138642, -19.695547473638214)
rho=0.6
(-3.293582746253368, -19.005292495717146)
rho=0.7
(-4.267022236156733, -21.138924365070373)
rho=0.8
(-5.84158762769791, -23.201251149852844)
rho=0.9
(-6.7623887166598395, -26.717591812124457)
rho=1.0
(-22.66462401670645, -29.164414622741088)


## Synthetic Model

In [4]:
# q = rho * q_copying + (1 - rho) * q_underfit
copier = Memorizer(radius=0.1, n_copying=20) 
underfitter = Memorizer(radius=5.5, n_copying=len(X_pca)) 

m = 2*len(X) # number of samples to generate
k = 10 # number of random projections
lambdas = [5, 10, 20]
d_proj = [1, 2, 3] # number of dimensions in the random projection
rhos = np.round(np.linspace(0, 1, 11), 2) # copying rate in the mixture model

In [5]:
# for lam in lambdas: 
#     results = np.zeros((m, k, len(d_proj), len(rhos))).astype(bool)

#     for i, r in enumerate(tqdm(rhos)):
#         q = Mixture(rho=r, q1=copier, q2=underfitter).fit(X_pca)
#         X1_gen = q.sample(m)
#         X2_gen = q.sample(m)
#         for j, d in enumerate(d_proj):
#             for l in range(k):
#                 rp = GaussianRandomProjection(n_components=d).fit(X_pca)
#                 dcd = DataCopyingDetector(lmbda=lam)
#                 idx = dcd.get_copying_indices(rp.transform(X_pca), rp.transform(X1_gen), rp.transform(X2_gen))
#                 results[idx, l, j, i] = True

#     # save results
#     np.save(f'../doc/FashionMNIST/mixed_model_results_lambda_{lam}.npy', results)

In [6]:
textwidth = set_plotting_params()
line_styles = ['--', '-.', ':']
colors = sns.color_palette('colorblind')
markers = ['x', '^', 'h', '^', 'v', '>', '<', 'p', '*', 'h']
markersize = [5, 4, 3, 2, 1]

fig, axs = plt.subplots(1, 3, figsize=(textwidth, 1.5), sharey=True)

for i, lam in enumerate(lambdas):
    results = np.load(f'../doc/FashionMNIST/mixed_model_results_lambda_{lam}.npy')
    maj_votes = (results.mean(axis=1) > 0.5).mean(axis=0)
    # plot true copying rate
    axs[i].plot(rhos, rhos, label='True cr$_q$', linestyle='-', color='black')
    for j, d in enumerate(d_proj):
        axs[i].plot(rhos, maj_votes[j],
                    label='$d_{\\mathrm{proj}}=$' + str(d),
                    linestyle=line_styles[j],
                    color=colors[j],
                    marker=markers[j],
                    markersize=markersize[j],
                    alpha=0.75,
                )
        axs[i].set_xlabel('$\\rho$')
        axs[i].set_title(f'({chr(97 + i)})' + f' $\\lambda={lam}$', loc='center')
    axs[i].axvspan(xmin=0.7, xmax=1, color='#FFCCCB', alpha=0.5)
    axs[i].set_xticks(np.linspace(0, 1, 6))
    axs[i].set_yticks(np.linspace(0, 1, 6))

handles, labels = axs[0].get_legend_handles_labels()
gray_patch = mpatches.Patch(color='#FFCCCB', alpha=0.5, label='$C_S < -3$')
handles.append(gray_patch)
labels.append('$C_S < -3$')

axs[0].set_ylabel('cr$_q$')
plt.legend(handles, labels, loc='upper left', bbox_to_anchor=(1, 1), fontsize=6)
plt.tight_layout(pad=0.3)
plt.savefig('../doc/FashionMNIST/mixed_model_results.png', dpi=300)
plt.close()