## Overview
Notebook illustrating performance of a CNN classifier on MNIST dataset compared against generated data by simple GAN(simpGAN) and privGan

In [None]:
import numpy as np
import torch
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from classifier.cnn_classifier import CNNClassifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.__version__, "device:", device)


## Retrieve train and test data from the MNIST dataset

In [None]:
transform = transforms.Compose([transforms.ToTensor()])
train_ds = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
X = (train_ds.data.float() - 127.5) / 127.5
X = X.view(X.size(0), 1, 28, 28).numpy()
y = train_ds.targets.numpy()

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
print('train:', X_train.shape, 'val:', X_val.shape)


In [None]:
clf = CNNClassifier(num_classes=10, input_shape=(1, 28, 28), device=device)
val_loss, val_acc = clf.train(X_train, y_train, X_val, y_val, batch_size=256, epochs=5)
print('Validation loss:', val_loss, 'Validation acc:', val_acc)


## Using SimpGan generate synthetic images 

In [None]:
# Evaluate on test split
test_ds = datasets.MNIST(root="./data", train=False, download=True, transform=transform)
X_test = (test_ds.data.float() - 127.5) / 127.5
X_test = X_test.view(X_test.size(0), 1, 28, 28).numpy()
y_test = test_ds.targets.numpy()

with torch.no_grad():
    xb = torch.tensor(X_test, dtype=torch.float32, device=device)
    logits = clf.model(xb)
    preds = logits.argmax(dim=1).cpu().numpy()
    test_acc = (preds == y_test).mean()
print('Test accuracy:', test_acc)


In [None]:
# You can now reuse clf.model for downstream tasks or save it
# torch.save(clf.model.state_dict(), 'mnist_cnn.pth')


## Using privGan generate synthetic images

In [None]:
#generate simple synthetic images of same size as X_train with same balance with privGan
X_c2 = []
y_c2 = []

for i in range(NUM_CLASSES):
    print(i)
    In = np.where(y_train==i)
    X = X_train[In]
    tf.keras.backend.clear_session()
    optim = Adam(lr=0.0002, beta_1=0.5)
    generators = [mnist_gan.MNIST_Generator(optim = Adam(lr=0.0002, beta_1=0.5)),
                  mnist_gan.MNIST_Generator(optim = Adam(lr=0.0002, beta_1=0.5))]
    discriminators = [mnist_gan.MNIST_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))
                      ,mnist_gan.MNIST_Discriminator(optim = Adam(lr=0.0002, beta_1=0.5))]
    pDisc = mnist_gan.MNIST_DiscriminatorPrivate(OutSize = 2, 
                                          optim = Adam(lr=0.0002, beta_1=0.5))
    
    (generators, _, _, _, _, _)= pg.privGAN(X, epochs = 1, 
                                                                               disc_epochs=1,
                                                                               batchSize=256,
                                                                               generators = generators, 
                                                                               discriminators = discriminators,
                                                                               pDisc = pDisc,
                                                                               optim = optim,
                                                                               privacy_ratio = 1.0)    
    
    noise1 = np.random.normal(0, 1, size=[len(X)//2, 100])
    noise2 = np.random.normal(0, 1, size=[len(X)//2, 100])
    X_c2 += [generators[0].predict(noise1)]
    X_c2 += [generators[1].predict(noise2)]
    y_c2 += [i]*(len(noise1) + len(noise2))
    
X_c2 = np.concatenate(X_c2)    
y_c2 = np.array(y_c2)

In [None]:
## Shuffle labels around
arr = np.arange(len(X_c2))
np.random.shuffle(arr)
X_c2 = X_c2[arr]
y_c2 = y_c2[arr]

In [None]:
#train CNN model for images created by privGan
y_tr = tf.keras.utils.to_categorical(y_c2, NUM_CLASSES)
y_t = tf.keras.utils.to_categorical(y_test, NUM_CLASSES)

x_train = X_c2.reshape(X_c2.shape[0], 28, 28, 1)
x_test = X_test.reshape(X_test.shape[0], 28, 28, 1)



classifier = CNNClassifier(NUM_CLASSES,(28,28,1))
score = classifier.train(x_train,y_tr,x_test,y_t,BATCH_SIZE_PER_EPOCH, NUM_EPOCHS)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
r_2 = [score[0],score[1]]


## Plot the results of the CNN classifier on the three datasets

In [None]:
plt.bar([0,1,2],[r_0[1],r_1[1],r_2[1]])
plt.xticks([0,1,2],['Real','GAN','privGAN (1.0)'], rotation=45)
plt.ylabel('Accuracy')