## Imports

In [1]:
import sys 
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline  

from sklearn import svm, linear_model
from sklearn.linear_model import SGDClassifier
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.externals import joblib

import torch
import torchvision 
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset, DataLoader

sys.path.insert(0, '../../../Utils/')
sys.path.insert(0, '../../../')
import cyphercat as cc

import models
from train import *
from metrics import * 
from SVC_Utils import *

#audio
import librosa as libr

print("Python: %s" % sys.version)
print("Pytorch: %s" % torch.__version__)

# determine device to run network on (runs on gpu if available)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


Python: 3.6.5 (default, Jul  6 2018, 19:12:46) 
[GCC 5.4.0 20160609]
Pytorch: 0.4.0


## NN Hyperparameters

In [2]:
batch_size = 128
lr = 0.001
k = 3

pretrained = False #run this with networks that have already been trained

transform_type = 'SFTF' #either STFT or MFCC  

data = 'Libri' #'Libri' or 'VOiCES'

In [3]:
### Speech preprocessing

class tensorToMFCC:
    def __call__(self, y):
#         y = y.numpy()
        dims = y.shape
        y = libr.feature.melspectrogram(np.reshape(y, (dims[1],)), 16000, n_mels=number_of_mels,
                               fmax=8000)
        y = libr.feature.mfcc(S = libr.power_to_db(y))
        y = torch.from_numpy(y)                           
        return y.float()

class STFT:
    def __call__(self,y):
        dims = y.shape
        y = np.abs(libr.core.stft(np.reshape(y, (dims[1],))))
        y = torch.from_numpy(y).permute(1,0)
        return y.float()

if transform_type == 'SFTF':
    target_net_type = cc.ft_cnn_classifer
    shadow_net_type = cc.ft_cnn_classifer
    in_size = 94# 20 forMFCC,  94 for STFT
    transform  = STFT() ## STFT or MFCC
elif transform_type == 'MFCC':
    transform  = tensorToMFCC()
    target_net_type = cc.MFCC_cnn_classifier
    shadow_net_type = cc.MFCC_cnn_classifier
    in_size = 20

## Audio hyperparameters

In [4]:
n_seconds = 3
n_epochs = 50
shadow_epochs = 50
n_epochs_attack = 100
sampling_rate = 16000
number_of_mels =128
lr = 0.001

# attacking means data for a target & shadow network.
# This will also split "out data" from totally different speakers -- data none of the 
# other networks have seen, for training & testing the attack network. This will be
# an equivalent amount of data to the train split as defined about

%load_ext autoreload
%autoreload 2
sys.path.insert(0, './../../../Utils')

## Load audio data: VOiCES or LibriSpeech, & Split into valid sequences

In [5]:
print('Loading splits')
subset = 'room-1'
if data == 'Libri':
    [dfs, sample_df] = cc.Libri_preload_and_split()
    print('Initializing dataset')
    valid_sequence_train_target = cc.LibriSpeechDataset(df=dfs[0], transform = transform)
    valid_sequence_test_target = cc.LibriSpeechDataset(df=dfs[1], transform = transform)
    valid_sequence_train_shadow = cc.LibriSpeechDataset(df=dfs[2], transform = transform)
    valid_sequence_test_shadow = cc.LibriSpeechDataset(df=dfs[3], transform = transform)
    valid_sequence_attack_in = cc.LibriSpeechDataset(df=dfs[4], transform = transform)
    valid_sequence_attack_out = cc.LibriSpeechDataset(df=dfs[5], transform = transform)

    print('Succesfully loaded libri-speech')
elif data == 'VOiCES':
    [dfs, sample_df] = cc.Voices_preload_and_split(subset = subset)
    print('Initializing dataset')
    valid_sequence_train_target = cc.Voices_dataset(df=dfs[0], transform = transform)
    valid_sequence_test_target = cc.Voices_dataset(df=dfs[1], transform = transform)
    valid_sequence_train_shadow = cc.Voices_dataset(df=dfs[2], transform = transform)
    valid_sequence_test_shadow = cc.Voices_dataset(df=dfs[3], transform = transform)
    valid_sequence_attack_in = cc.Voices_dataset(df=dfs[4], transform = transform)
    valid_sequence_attack_out = cc.Voices_dataset(df=dfs[5], transform = transform)

Loading splits
Initialising LibriSpeechDataset with minimum length = 3s and subset = train-clean-100
	 Finished indexing train-clean-100. 27949 usable files found.
	 Finished indexing test-clean. 2225 usable files found.
Build/load speaker membership inference splits
Found default speaker splits, loading dataframe
Build/load sample membership inference splits
Found default sample splits, loading dataframe

 ------- Speaker split statistics ------- 
		 ---- Split 0 ---- 
	Unique speakers 	 Samples
Male:		 63		 5539
Female:		 62		 5573
Total:		 125		 11112
		 ---- Split 1 ---- 
	Unique speakers 	 Samples
Male:		 63		 1414
Female:		 62		 1427
Total:		 125		 2841
		 ---- Split 2 ---- 
	Unique speakers 	 Samples
Male:		 31		 3519
Female:		 31		 3478
Total:		 62		 6997
		 ---- Split 3 ---- 
	Unique speakers 	 Samples
Male:		 32		 3462
Female:		 32		 3537
Total:		 64		 6999
		 ---- Split 4 ---- 
	Unique speakers 	 Samples
Male:		 16		 1414
Female:		 16		 1427
Total:		 32		 2841
		 ---- Split 

In [6]:
# # # to look at the index file:

# # #look at splits file for reference
# dff = pd.read_csv(os.getcwd()+'/../../../Datasets/splits/libri-train-clean-100/libri_4.csv')
# print(dff.head())
# dff2 = pd.read_csv(os.getcwd()+'/../../../Datasets/splits/VOiCES-room-1/VOiCES_0.csv')
# print(dff2.head())


# # df = pd.read_csv(os.getcwd()+'/../../../Datasets/VOiCES-room-1.index.csv')
# # df.head()

# # g = df.groupby(['id','Section']).groups

# # dfn = pd.DataFrame(columns = ['id','Section'])
# # idx = 0
# # for key in g.keys():
# #     dfn.at[idx,'id']=key[0]
# #     dfn.at[idx,'Section']=key[1]
# #     idx +=1

# # print(len(np.unique(df.id)), 'speakers')
# # print(len(df), 'files')
# # print(dfn.groupby('id').count().min()[0], 'tracks min')
# # print(dfn.groupby('id').count().max()[0], 'tracks max')
# # print(dfn.groupby('id').count().mean()[0], 'tracks mean')

# # print('min speaker minutes',df.groupby('id').mean()['speaker_minutes'].min())
# # print('max speaker minutes',df.groupby('id').mean()['speaker_minutes'].max())
# # print('mean speaker minutes',df.groupby('id').mean()['speaker_minutes'].mean())

In [7]:
# splits = [.2,.8]
# df1 = pd.DataFrame(columns = df.columns)
# df2 = pd.DataFrame(columns = df.columns)
# # For each speaker, identify unique segments: 
# for spkr_id in df.id.unique():
#     mini_df = df[df['id'] == spkr_id]
#     # Identify segments:
#     n_seg = len(mini_df.Section.unique())
#     seg1 = round(splits[0]*n_seg)
#     # Segments are not ordered in a particular way, so just pick the first few for seg1
#     seg1s = mini_df.Section.unique()[:seg1]
#     df1 = df1.append(mini_df[mini_df['Section'].isin(seg1s)])
#     df2 = df2.append(mini_df[~mini_df['Section'].isin(seg1s)])

In [8]:
batch_size = 32


# Loaders for data for target model & shadow model 
target_train_loader = DataLoader(valid_sequence_train_target,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8,
                    drop_last = True
                     # pin_memory=True # CUDA only
                     )


target_test_loader = DataLoader(valid_sequence_test_target,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8
                     # pin_memory=True # CUDA only
                     )

shadow_train_loader = DataLoader(valid_sequence_train_shadow,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8,
                    drop_last = True
                     # pin_memory=True # CUDA only
                     )


shadow_test_loader = DataLoader(valid_sequence_test_shadow,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8
                     # pin_memory=True # CUDA only
                     )


test_loader_in = DataLoader(valid_sequence_attack_in,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8
                     # pin_memory=True # CUDA only
                     )


test_loader_out = DataLoader(valid_sequence_attack_out,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=8
                     # pin_memory=True # CUDA only
                     )


## Set up model tracking

In [9]:
#table with summary

# To do: extract accuracy from train/eval funcs and automatically add to table
summary_file = 'summary.pkl'
columns = ['Transform','Training epochs', '# speakers','Train accuracy', 'Test accuracy', 'Attack type', 'Precision','Recall']

try:
    df = pd.read_pickle(summary_file)

except:
    df = pd.DataFrame(columns = columns)
    
df_idx = len(df)

#set a bunch of known values
df.at[df_idx,'Transform'] =transform_type
df.at[df_idx,'Training epochs'] = n_epochs
df.at[df_idx,'Attack type'] = 1

# Initialize/Train Targets
The model being attacked; if network, architecture can differ from that of shadow network.

In [10]:
#Initialize NN

#in_size defined above
n_hidden = 512
n_classes = valid_sequence_test_target.num_speakers
print(n_classes,' speakers')
df.at[df_idx,'# speakers']=n_classes


target_net = target_net_type(n_classes).to(device)
target_net.apply(models.weights_init)

target_loss = nn.CrossEntropyLoss()
target_optim = optim.Adam(target_net.parameters(), lr=lr)

125  speakers


In [11]:
#file name for this set of hyperparameters
fn = 'model_weights/CNN_voice_classifier'+data+'_target_'+transform_type+str(n_epochs-1)+'.pth'

#Train NN
if not pretrained:
    train_accuracy, test_accuracy = cc.train(target_net, target_train_loader, target_test_loader, target_optim, target_loss, n_epochs, verbose = False) 
    df.at[df_idx,'Train accuracy'] =round(train_accuracy,4)
    df.at[df_idx,'Test accuracy'] = round(test_accuracy,4)
    cc.save_checkpoint(model = target_net, optimizer = target_optim,
                           epoch = n_epochs-1, data_descriptor = data, 
                           accuracy = [train_accuracy, test_accuracy],
                           filename = fn)
    
else:
    cc.load_checkpoint(model = target_net, optimizer = target_optim, checkpoint = fn)


[0/50]
Training:

Accuracy = 62.84 %%


Test:

Accuracy = 54.73 %%


[1/50]
Training:

Accuracy = 84.90 %%


Test:

Accuracy = 76.91 %%


[2/50]
Training:

Accuracy = 88.97 %%


Test:

Accuracy = 79.23 %%


[3/50]
Training:

Accuracy = 94.34 %%


Test:

Accuracy = 85.92 %%


[4/50]
Training:

Accuracy = 94.52 %%


Test:

Accuracy = 87.96 %%


[5/50]
Training:

Accuracy = 94.14 %%


Test:

Accuracy = 85.22 %%


[6/50]
Training:

Accuracy = 95.15 %%


Test:

Accuracy = 87.68 %%


[7/50]
Training:

Accuracy = 95.04 %%


Test:

Accuracy = 88.28 %%


[8/50]
Training:

Accuracy = 96.28 %%


Test:

Accuracy = 90.11 %%


[9/50]
Training:

Accuracy = 97.15 %%


Test:

Accuracy = 91.94 %%


[10/50]
Training:

Accuracy = 96.11 %%


Test:

Accuracy = 90.07 %%


[11/50]
Training:

Accuracy = 96.37 %%


Test:

Accuracy = 91.31 %%


[12/50]
Training:

Accuracy = 97.06 %%


Test:

Accuracy = 90.85 %%


[13/50]
Training:

Accuracy = 97.25 %%


Test:

Accuracy = 91.24 %%


[14/50]
Training:

Accuracy = 

# Initialize/Train Shadow Model
Shadow model mimics the target network, emulating the target model's differences in prediction probabilities for samples in and out of its dataset. For this attack, only one shadow model is used. 

In [12]:
valid_sequence_train_shadow.num_speakers

62

In [13]:
#Initialize models

n_classes = valid_sequence_test_shadow.num_speakers
print('n shadow speakers',n_classes)

#NN
shadow_net = shadow_net_type(n_classes).to(device)
shadow_net.apply(models.weights_init)

shadow_loss = nn.CrossEntropyLoss()
shadow_optim = optim.Adam(shadow_net.parameters(), lr=lr)

n shadow speakers 64


# Initialize Attack Model
A binary classifier to determine membership. 

In [14]:
# Attack the network: 

attack_net_nn = models.mlleaks_mlp(n_in=k).to(device)
attack_loss = nn.BCEWithLogitsLoss()
attack_optim_nn= optim.Adam(attack_net_nn.parameters(), lr=lr)

df_pr = cc.ml_leaks1(target=target_net, shadow_model = shadow_net, attacker_model = attack_net_nn,
            target_in_loader = test_loader_in, target_out_loader = test_loader_out,
            shadow_train_loader = shadow_train_loader, shadow_out_loader=shadow_test_loader,
            shadow_optim = shadow_optim, attack_optim = attack_optim_nn, 
            shadow_criterion = shadow_loss, attack_criterion = attack_loss, 
            shadow_epochs = shadow_epochs, attack_epochs = n_epochs_attack, retrain = True)

---- Training shadow network ----
[0/50]
Training:

Accuracy = 66.63 %%


Test:

Accuracy = 2.59 %%


[1/50]
Training:

Accuracy = 86.32 %%


Test:

Accuracy = 1.59 %%


[2/50]
Training:

Accuracy = 93.16 %%


Test:

Accuracy = 2.26 %%


[3/50]
Training:

Accuracy = 93.23 %%


Test:

Accuracy = 2.24 %%


[4/50]
Training:

Accuracy = 94.11 %%


Test:

Accuracy = 4.04 %%


[5/50]
Training:

Accuracy = 93.51 %%


Test:

Accuracy = 3.64 %%


[6/50]
Training:

Accuracy = 95.60 %%


Test:

Accuracy = 2.37 %%


[7/50]
Training:

Accuracy = 96.23 %%


Test:

Accuracy = 1.99 %%


[8/50]
Training:

Accuracy = 97.86 %%


Test:

Accuracy = 2.04 %%


[9/50]
Training:

Accuracy = 97.46 %%


Test:

Accuracy = 1.81 %%


[10/50]
Training:

Accuracy = 97.92 %%


Test:

Accuracy = 1.33 %%


[11/50]
Training:

Accuracy = 97.56 %%


Test:

Accuracy = 1.11 %%


[12/50]
Training:

Accuracy = 97.63 %%


Test:

Accuracy = 1.90 %%


[13/50]
Training:

Accuracy = 98.77 %%


Test:

Accuracy = 1.50 %%


[14/50]
Tr

In [15]:
df_pr

Unnamed: 0,Thresholds,Accuracy,Precision,Recall
0,0.500,84.994401,0.791822,0.950893
1,0.505,85.083987,0.793001,0.950893
2,0.510,85.128779,0.793592,0.950893
3,0.515,85.151176,0.794546,0.949554
4,0.520,85.151176,0.794546,0.949554
5,0.525,85.173572,0.794843,0.949554
6,0.530,85.151176,0.794766,0.949107
7,0.535,85.173572,0.795284,0.948661
8,0.540,85.173572,0.795284,0.948661
9,0.545,85.195969,0.795582,0.948661


In [16]:
# Ascertain best results

df.at[df_idx,'Precision'] = round(df_pr[df_pr['Accuracy']==df_pr['Accuracy'].max()].Precision.values[0],4)
df.at[df_idx,'Recall'] = round(df_pr[df_pr['Accuracy']==df_pr['Accuracy'].max()].Recall.values[0],4)


In [17]:
df

Unnamed: 0,Transform,Training epochs,# speakers,Train accuracy,Test accuracy,Attack type,Precision,Recall
0,SFTF,50,125,99.5227,94.9666,1,0.8585,0.9076


In [18]:
df.to_pickle(summary_file)

# Evaluate Attack Nets
How well the trained attack models classify a sample as in or out of a target model's training dataset, and how performance is affected by target hyperparameters and which models attack which targets.

In [19]:
df['# speakers'] =df['# speakers'].astype(float)
df['Training epochs'] =df['Training epochs'].astype(float)
df['Attack type'] =df['Attack type'].astype(float)

#style table
import seaborn as sns

cg = sns.light_palette("green", as_cmap=True)
cm = sns.light_palette("magenta", as_cmap=True)
bl = sns.light_palette("blue", as_cmap=True)
orr = sns.light_palette("orange", as_cmap=True)
gr = sns.light_palette("gray", as_cmap=True)

# df.style.bar(subset=['Train accuracy', 'Test accuracy'], align='mid', color=['#d65f5f', '#5fba7d'])
s = df.style.\
    background_gradient(cmap=cg,subset=['Train accuracy', 'Test accuracy']).\
    background_gradient(cmap=bl,subset=['Precision', 'Recall']).\
    background_gradient(cmap=orr,subset=['Training epochs']).\
    background_gradient(cmap=gr,subset=['Attack type']).\
    background_gradient(cmap=cm,subset=['# speakers']).\
    format({"Train accuracy": "{:.2%}","Test accuracy": "{:.2%}"}).\
    hide_index().\
    set_properties(**{'font-size': "16pt",'column-size':"24pt",'width': '100px'})

s

TypeError: ("Cannot cast array data from dtype('O') to dtype('int64') according to the rule 'safe'", 'occurred at index Train accuracy')

<pandas.io.formats.style.Styler at 0x7fedb03139e8>

In [20]:
#old, for reference for now:

# do this for 10 & 100 speakers
# .2 S & 3 S
#sufficient training and over-training

#manual data: 

#Attack 1:
df.loc[len(df)] = ['MFCC',25,69.0,.9994,.9632,1,0.89,0.90] 
df.loc[len(df)] = ['MFCC',25,255.0,.9961,.9443,1,0.88,0.91] 
df.loc[len(df)] = ['STFT',25,69.0,0.9989,0.9451,1,0.89,0.92] 
df.loc[len(df)] = ['STFT',25,255.0,0.9958,0.9181,1,0.85,0.86] 

#Attack 3 w/max data: 
df.loc[len(df)] = ['STFT',25,139.0,.9985,.9073,3,.81,.90] 
df.loc[len(df)] = ['STFT',50,511.0,.9942,.9057,3,.84,.87] 
df.loc[len(df)] = ['MFCC',25,139.0,.9969,.9136,3,.82,.92] 
df.loc[len(df)] = ['MFCC',25,511.0,.9960,.9321,3,0.83,0.93]


#Attack 3 on Attack1 models:
df.loc[len(df)] = ['MFCC',25,69.0,.9994,.9632,3,0.84,0.95] 
df.loc[len(df)] = ['MFCC',25,255.0,.9961,.9443,3,0.84,0.94] 
df.loc[len(df)] = ['STFT',25,69.0,0.9989,0.9451,3,0.81,0.97] 
df.loc[len(df)] = ['STFT',25,255.0,0.9958,0.9181,3,0.81,0.90] 

df['# speakers'] =df['# speakers'].astype(float)
df['Training epochs'] =df['Training epochs'].astype(float)
df['Attack type'] =df['Attack type'].astype(float)

#style table
import seaborn as sns

cg = sns.light_palette("green", as_cmap=True)
cm = sns.light_palette("magenta", as_cmap=True)
bl = sns.light_palette("blue", as_cmap=True)
orr = sns.light_palette("orange", as_cmap=True)
gr = sns.light_palette("gray", as_cmap=True)

# df.style.bar(subset=['Train accuracy', 'Test accuracy'], align='mid', color=['#d65f5f', '#5fba7d'])
s = df.style.\
    background_gradient(cmap=cg,subset=['Train accuracy', 'Test accuracy']).\
    background_gradient(cmap=bl,subset=['Precision', 'Recall']).\
    background_gradient(cmap=orr,subset=['Training epochs']).\
    background_gradient(cmap=gr,subset=['Attack type']).\
    background_gradient(cmap=cm,subset=['# speakers']).\
    format({"Train accuracy": "{:.2%}","Test accuracy": "{:.2%}"}).\
    hide_index().\
    set_properties(**{'font-size': "16pt",'column-size':"24pt",'width': '100px'})

s

TypeError: ("Cannot cast array data from dtype('O') to dtype('int64') according to the rule 'safe'", 'occurred at index Train accuracy')

<pandas.io.formats.style.Styler at 0x7fed9cd5a4a8>