## Imports

In [1]:
import sys 
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline  

from sklearn import svm, linear_model
from sklearn.linear_model import SGDClassifier
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.externals import joblib

import torch
import torchvision 
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset, DataLoader

sys.path.insert(0, '../../../Utils/')

import models
from train import *
from metrics import * 
from SVC_Utils import *

#audio
import librosa as libr

print("Python: %s" % sys.version)
print("Pytorch: %s" % torch.__version__)

# determine device to run network on (runs on gpu if available)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


Python: 3.6.5 (default, Jul  6 2018, 19:12:46) 
[GCC 5.4.0 20160609]
Pytorch: 0.4.0


## NN Hyperparameters

In [2]:
batch_size = 128
lr = 0.001
k = 3

pretrained = True #run this with networks that have already been trained

transform_type = 'SFTF' #either STFT or MFCC  

if transform_type == 'SFTF':
    target_net_type = models.STFT_CNN_classifier
    shadow_net_type = models.STFT_CNN_classifier
    in_size = 94# 20 forMFCC,  94 for STFT
elif transform_type == 'MFCC':
    target_net_type = models.audio_CNN_classifier
    shadow_net_type = models.audio_CNN_classifier
    in_size = 20

In [3]:
### Speech preprocessing

class tensorToMFCC:
    def __call__(self, y):
#         y = y.numpy()
        dims = y.shape
        y = libr.feature.melspectrogram(np.reshape(y, (dims[1],)), 16000, n_mels=number_of_mels,
                               fmax=8000)
        y = libr.feature.mfcc(S = libr.power_to_db(y))
        y = torch.from_numpy(y)                           
        return y.float()

class STFT:
    def __call__(self,y):
        dims = y.shape
        y = np.abs(libr.core.stft(np.reshape(y, (dims[1],))))
        y = torch.from_numpy(y).permute(1,0)
        return y.float()
    
if transform_type == 'SFTF':
    transform  = STFT() ## STFT or MFCC
elif transform_type == 'MFCC':
    transform  = tensorToMFCC()

## Audio hyperparameters

In [4]:
n_seconds = 3
n_epochs = 25
sampling_rate = 16000
number_of_mels =128
lr = 0.001

splits = [0.8, 0.2] #input fraction of data you want partitioned. Train, test fraction
attacking = 1 #0 for no attack, 1 for attack 1, 3 for attack 3

# attacking means data for a target & shadow network.
# This will also split "out data" from totally different speakers -- data none of the 
# other networks have seen, for training & testing the attack network. This will be
# an equivalent amount of data to the train split as defined about

if sum(splits) != 1:
    print('error: splits do not sum to 1.')

%load_ext autoreload
%autoreload 2
sys.path.insert(0, './../../../Utils')
from datasets import LibriSpeechDataset
from datasets import audio_preload_and_split

## Load VOiCES data

In [5]:
# directory path for VOiCES  dataset
path = os.path.join(os.getcwd(),'../../../Datasets/VOiCES/') #'add/yourDirectory/path/VOiCES'
all_data = ['train-clean-360','train-clean-100','dev-clean']
all_data = []

#Splits data into 2 sets of speakers for target & shadow network, into above defined train:test splits
dfs = audio_preload_and_split(path,all_data,n_seconds,pad=False,cache=True,splits=splits, attacking = attacking, data ='VOiCES')  


#reference files
spkGendr = pd.read_csv(path+'Lab41-SRI-VOiCES-speaker-gender-dataset_SUBSET.csv')

print('Total number of speakers: ', len(spkGendr))
print('Number of females:', spkGendr.Gender[spkGendr.Gender == 'F'].count())
print('Number of males:', spkGendr.Gender[spkGendr.Gender == 'F'].count())

dfs = audio_preload_and_split(path,all_data,n_seconds,pad=False,cache=True,splits=splits, attacking = attacking, data ='Libri')  


0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]

Initialising VOiCES Dataset with minimum length = 3s and subsets = []
['part_6', 'part_4', 'Lab41-SRI-VOiCES-speaker-gender-dataset_SUBSET.csv', 'part_2', 'part_8', 'part_1', 'part_5', 'part_3', 'part_7']
   Speaker Gender          DataSet
0     5126      M  train-clean-360
1     3549      F  train-clean-360
2     4331      F  train-clean-360
3      196      M  train-clean-100
4     2289      M  train-clean-100
   speaker_id sex           subset
0        5126   M  train-clean-360
1        3549   F  train-clean-360
2        4331   F  train-clean-360
3         196   M  train-clean-100
4        2289   M  train-clean-100
Indexing part_6...
Indexing part_4...
Indexing part_2...
Indexing part_8...
Indexing part_1...
Indexing part_5...
Indexing part_3...
Indexing part_7...





MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False

## Load LibriSpeech data:

In [None]:
### Libri hyperparameters

data_set = 100 #100 or 360
if data_set == 100:
    all_data = ['train-clean-100']
elif data_set == 360:
    all_data = ['train-clean-360']

### Data set
path = './../../../Classification_baselines/LibriSpeech/data/'

# #to load more data sets
# import data_downloaders
# data_downloaders._download_and_preprocess_data(path)

#Splits data into 2 sets of speakers for target & shadow network, into above defined train:test splits
dfs = audio_preload_and_split(path,all_data,n_seconds,pad=False,cache=True,splits=splits, attacking = attacking, data ='Libri')  

#target train & test
valid_sequence_train = LibriSpeechDataset(path, df = dfs[0], seconds = n_seconds, downsampling=1, 
                                    transform = transform, stochastic=False)

valid_sequence_test = LibriSpeechDataset(path, df = dfs[1], seconds = n_seconds, downsampling=1, 
                                    transform = transform, stochastic=False)

#shadow train & test
valid_sequence_train_shadow = LibriSpeechDataset(path, df = dfs[2], seconds = n_seconds, downsampling=1, 
                                    transform = transform, stochastic=False)



valid_sequence_test_shadow = LibriSpeechDataset(path, df = dfs[3], seconds = n_seconds, downsampling=1, 
                                    transform = transform, stochastic=False)


valid_sequence_out = LibriSpeechDataset(path, df = dfs[4], seconds = n_seconds, downsampling=1, 
                                    transform = transform, stochastic=False)

valid_sequence_out_shadow = LibriSpeechDataset(path, df = dfs[5], seconds = n_seconds, downsampling=1, 
                                    transform = transform, stochastic=False)

Downloading Librivox data set (55GB) into ./../../../Classification_baselines/LibriSpeech/data/ if not already present...
Found archive "./../../../Classification_baselines/LibriSpeech/data/train-clean-100.tar.gz" - not downloading.
Found archive "./../../../Classification_baselines/LibriSpeech/data/train-clean-360.tar.gz" - not downloading.
No archive "./../../../Classification_baselines/LibriSpeech/data/train-other-500.tar.gz" - downloading...


In [None]:
batch_size = 32


# Loaders for data for target model & shadow model 
target_train_loader = DataLoader(valid_sequence_train,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8,
                    drop_last = True
                     # pin_memory=True # CUDA only
                     )

target_out_loader = DataLoader(valid_sequence_out,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8,
                    drop_last = True
                     # pin_memory=True # CUDA only
                     )

target_test_loader = DataLoader(valid_sequence_test,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8
                     # pin_memory=True # CUDA only
                     )

shadow_train_loader = DataLoader(valid_sequence_train_shadow,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8,
                    drop_last = True
                     # pin_memory=True # CUDA only
                     )

shadow_out_loader = DataLoader(valid_sequence_out_shadow,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8,
                    drop_last = True
                     # pin_memory=True # CUDA only
                     )

shadow_test_loader = DataLoader(valid_sequence_test_shadow,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8
                     # pin_memory=True # CUDA only
                     )


# Initialize/Train Targets
The model being attacked; if network, architecture can differ from that of shadow network.

In [None]:
#Initialize NN

#in_size defined above
n_hidden = 512
n_classes = valid_sequence_test.num_speakers
print(n_classes)

target_net = target_net_type(in_size,n_hidden,n_classes).to(device)
target_net.apply(models.weights_init)

target_loss = nn.CrossEntropyLoss()
target_optim = optim.Adam(target_net.parameters(), lr=lr)

In [None]:
#Train NN
if not pretrained:
    train(target_net, target_train_loader, target_test_loader, target_optim, target_loss, n_epochs, verbose = False) #classes = range(valid_sequence_test.num_speakers),
else:
    fnm = 'model_weights/CNN_voice_classifier'+str(data_set)+'_target_'+transform_type+str(n_epochs-1)+'.pth'
#     fnm = 'model_weights/CNN_voice_classifier100_target_'+str(epoch)+'.pth'
    chpt = torch.load(fnm)
    target_net.load_state_dict(chpt['state_dict'])

In [None]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')
        
epoch = 24

In [None]:
if not pretrained:

    save_checkpoint({
                'epoch': epoch,
                'arch': 'audio_CNN_classifier',
                'state_dict': target_net.state_dict(),
                'optimizer' : target_optim.state_dict(),
            }, False, filename = 'model_weights/CNN_voice_classifier'+str(data_set)+'_target_'+transform_type+str(epoch)+'.pth')

### Hold off on adapting SVM for now

In [None]:
# #Initialize SVM

# # #The stored baseline SVM was fit using all of CIFAR10 training data. To attack for membership inference, use 
# # #images not in CIFAR10 training data, or fit new classifiers/run source code with subset of CIFAR10.

# # '''
# # dir='../../../Classification_baselines/CIFAR10'
# # target_gen=load_svm(dir, gen=True)
# # target_maxacc=load_svm(dir, gen=False)
# # '''

# # #Training example targets on loaded CIFAR10 target subset:

# gen_svm=make_pipeline(PCA(n_components=180), MinMaxScaler(feature_range=(-1,1)), svm.SVC(C=10, gamma=.1, probability=True))
# maxacc_svm=make_pipeline(PCA(n_components=180), MinMaxScaler(feature_range=(-1,1)), svm.SVC(C=1, gamma=.01, probability=True))

# # sv_target_fit_loader = torch.utils.data.DataLoader(sv_cifar10_trainset, batch_size=target_train_sampler.__len__(), 
# #                                                    sampler=target_train_sampler, num_workers=1)


# tin, tout=load(target_train_loader)

# #Train SVM
# gen_svm.fit(tin, tout)
# maxacc_svm.fit(tin, tout)

In [None]:
# #evaluate SVM targets

# classes = range(n_classes)
# inp, outp=load(target_test_loader)

# print('SVM A (C=', gen_svm.get_params(deep=True)['svc__C'], ', gamma= ',
#       gen_svm.get_params(deep=True)['svc__gamma'], '): ')
# class_acc(gen_svm.predict_proba(inp), outp, classes)

# print('SVM B (C=', maxacc_svm.get_params(deep=True)['svc__C'], ', gamma= ',
#       maxacc_svm.get_params(deep=True)['svc__gamma'], '): ')
# class_acc(maxacc_svm.predict_proba(inp), outp, classes)

# Initialize/Train Shadow Model
Shadow model mimics the target network, emulating the target model's differences in prediction probabilities for samples in and out of its dataset. For this attack, only one shadow model is used. 

In [None]:
#Initialize models

n_classes = valid_sequence_test_shadow.num_speakers
print(n_classes)

#NN
shadow_net = shadow_net_type(in_size,n_hidden,n_classes).to(device)
shadow_net.apply(models.weights_init)

shadow_loss = nn.CrossEntropyLoss()
shadow_optim = optim.Adam(shadow_net.parameters(), lr=lr)

#SVM
# shadowinputs, shadowtargets=load(sv_shadow_train_loader)
# shadow_svm=make_pipeline(PCA(n_components=180), MinMaxScaler(feature_range=(-1,1)), 
#                          svm.SVC(C=1, gamma=.1, probability=True))

In [None]:
# print(np.shape(shadowinputs))
# shadowtargets[5]

In [None]:
#Train NN

# below commented code is for comparison during debugging
# target_net = target_net_type(in_size,n_hidden,n_classes).to(device)
# target_net.apply(models.weights_init)

# target_loss = nn.CrossEntropyLoss()
# target_optim = optim.Adam(target_net.parameters(), lr=lr)

# train(target_net, target_train_loader, target_test_loader, target_optim, target_loss, n_epochs, verbose = False) #classes = range(valid_sequence_test.num_speakers),

if not pretrained:
    train(shadow_net, shadow_train_loader, shadow_test_loader, shadow_optim, shadow_loss, n_epochs, verbose = False)
else:
    fnm = 'model_weights/CNN_voice_classifier'+str(data_set)+'_shadow_'+transform_type+str(n_epochs-1)+'.pth'
#     fnm = 'model_weights/CNN_voice_classifier100_target_'+str(epoch)+'.pth'
    chpt = torch.load(fnm)
    shadow_net.load_state_dict(chpt['state_dict'])

In [None]:
epoch = 24
save_checkpoint({
            'epoch': epoch,
            'arch': 'CNN_voice_classifier',
            'state_dict': shadow_net.state_dict(),
            'optimizer' : shadow_optim.state_dict(),
        }, False, filename = 'model_weights/CNN_voice_classifier'+str(data_set)+'_shadow_'+transform_type+str(epoch)+'.pth')

In [None]:
#Train SVM
# shadow_svm.fit(shadowinputs, shadowtargets)

# Initialize Attack Model
A binary classifier to determine membership. 

In [None]:
#Creates two attack nets for comparison.

# attack_net_svm = models.mlleaks_mlp(n_in=k).to(device)
# attack_net_svm.apply(models.weights_init)

attack_net_nn = models.mlleaks_mlp(n_in=k).to(device)
attack_net_nn.apply(models.weights_init)

attack_loss = nn.BCEWithLogitsLoss() #this one works
# attack_loss = nn.BCELoss() # this one doesn't work 
# attack_optim_svm= optim.Adam(attack_net_svm.parameters(), lr=lr)
attack_optim_nn= optim.Adam(attack_net_nn.parameters(), lr=lr)

In [None]:
#Trains SVM attack model
# train_attacker(attack_net_svm, shadow_svm, shadow_train_loader, shadow_out_loader, attack_optim_svm, attack_loss, n_epochs=2, k=k)

In [None]:
pretrained = False

In [None]:
#Trains NN attack model
attack_loss = nn.BCEWithLogitsLoss()
n_epochs_attack = 50

if not pretrained:
    train_attacker(attack_net_nn, shadow_net, shadow_train_loader, shadow_out_loader, attack_optim_nn, 
                   attack_loss, n_epochs=n_epochs_attack, k=k)
    
else:
    fnm = 'model_weights/CNN_voice_classifier'+str(data_set)+'_attack_'+transform_type+str(24)+'.pth'
#     fnm = 'model_weights/CNN_voice_classifier100_target_'+str(epoch)+'.pth'
    chpt = torch.load(fnm)
    attack_net_nn.load_state_dict(chpt['state_dict'])

#original:
# train_attacker(attack_net_nn, shadow_net, shadow_train_loader, shadow_out_loader, attack_optim_nn, attack_loss, n_epochs=50, k=k)

In [None]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')
        

save_checkpoint({
            'epoch': epoch,
            'arch': 'CNN_voice_classifier',
            'state_dict': attack_net_nn.state_dict(),
            'optimizer' : attack_optim_nn.state_dict(),
        }, False, filename = 'model_weights/CNN_voice_classifier'+str(data_set)+'_attack_'+transform_type+str(n_epochs_attack)+'.pth')

# Evaluate Attack Nets
How well the trained attack models classify a sample as in or out of a target model's training dataset, and how performance is affected by target hyperparameters and which models attack which targets.

In [None]:
#attack net trained on svm shadow model on svm target, C=10, gamma=.1
# eval_attack_net(attack_net_svm, gen_svm, sv_target_train_loader, sv_target_out_loader, k)

In [None]:
#attack net trained on svm shadow model on svm target, C=1, gamma=.01
# eval_attack_net(attack_net_svm, maxacc_svm, sv_target_train_loader, sv_target_out_loader, k)

In [None]:
#attack net trained on nn shadow model on nn target
eval_attack_net(attack_net_nn, target_net, target_train_loader, target_out_loader, k)

In [None]:
#attack net trained on nn shadow model on svm target, C=10, gamma=.1
# eval_attack_net(attack_net_nn, gen_svm, sv_target_train_loader, sv_target_out_loader, k)

In [None]:
#table with 

#baselines
columns = ['Transform','Training epochs', '# speakers','Train accuracy', 'Test accuracy', 'Attack type', 'Precision','Recall']

# do this for 10 & 100 speakers
# .2 S & 3 S
#sufficient training and over-training

#manual data: 
df = pd.DataFrame(columns = columns)
#Attack 1:
df.loc[len(df)] = ['MFCC',25,69.0,.9994,.9632,1,0.89,0.90] 
df.loc[len(df)] = ['MFCC',25,255.0,.9961,.9443,1,0.88,0.91] 
df.loc[len(df)] = ['STFT',25,69.0,0.9989,0.9451,1,0.89,0.92] 
df.loc[len(df)] = ['STFT',25,255.0,0.9958,0.9181,1,0.85,0.86] 

#Attack 3 w/max data: 
df.loc[len(df)] = ['STFT',25,139.0,.9985,.9073,3,.81,.90] 
df.loc[len(df)] = ['STFT',50,511.0,.9942,.9057,3,.84,.87] 
df.loc[len(df)] = ['MFCC',25,139.0,.9969,.9136,3,.82,.92] 
df.loc[len(df)] = ['MFCC',25,511.0,.9960,.9321,3,0.83,0.93]


#Attack 3 on Attack1 models:
df.loc[len(df)] = ['MFCC',25,69.0,.9994,.9632,3,0.84,0.95] 
df.loc[len(df)] = ['MFCC',25,255.0,.9961,.9443,3,0.84,0.94] 
df.loc[len(df)] = ['STFT',25,69.0,0.9989,0.9451,3,0.81,0.97] 
df.loc[len(df)] = ['STFT',25,255.0,0.9958,0.9181,3,0.81,0.90] 

df['Training epochs'] =df['Training epochs'].astype(float)
df['Attack type'] =df['Attack type'].astype(float)

#style table
import seaborn as sns

cg = sns.light_palette("green", as_cmap=True)
cm = sns.light_palette("magenta", as_cmap=True)
bl = sns.light_palette("blue", as_cmap=True)
orr = sns.light_palette("orange", as_cmap=True)
gr = sns.light_palette("gray", as_cmap=True)

# df.style.bar(subset=['Train accuracy', 'Test accuracy'], align='mid', color=['#d65f5f', '#5fba7d'])
s = df.style.\
    background_gradient(cmap=cg,subset=['Train accuracy', 'Test accuracy']).\
    background_gradient(cmap=bl,subset=['Precision', 'Recall']).\
    background_gradient(cmap=orr,subset=['Training epochs']).\
    background_gradient(cmap=gr,subset=['Attack type']).\
    background_gradient(cmap=cm,subset=['# speakers']).\
    format({"Train accuracy": "{:.2%}","Test accuracy": "{:.2%}"}).\
    hide_index().\
    set_properties(**{'font-size': "16pt",'column-size':"24pt",'width': '100px'})

s