In [1]:
!pip3 install opacus

Collecting opacus
  Downloading opacus-1.4.1-py3-none-any.whl.metadata (7.9 kB)
Downloading opacus-1.4.1-py3-none-any.whl (226 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.7/226.7 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opacus
Successfully installed opacus-1.4.1


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import accuracy_score 

In [3]:
batch_size = 50

In [4]:
class ChurnDataset(Dataset):
 
    def __init__(self, csv_file):
  
        df = pd.read_csv(csv_file)
        
        df = df.drop(["Surname", "CustomerId", "RowNumber"], axis=1)

        # Grouping variable names
        self.categorical = ["Geography", "Gender"]
        self.target = "Exited"

        # One-hot encoding of categorical variables
        self.churn_frame = pd.get_dummies(df, prefix=self.categorical)

        # Save target and predictors
        self.X = self.churn_frame.drop(self.target, axis=1)
        self.y = self.churn_frame["Exited"]
        
        
        scaler = StandardScaler()
        X_array  = scaler.fit_transform(self.X)
        self.X = pd.DataFrame(X_array)

    def __len__(self):
        return len(self.churn_frame)

    def __getitem__(self, idx):
        # Convert idx from tensor to list due to pandas bug (that arises when using pytorch's random_split)
        if isinstance(idx, torch.Tensor):
            idx = idx.tolist()

        return [self.X.iloc[idx].values, self.y[idx]]

In [5]:
def get_CHURN_model():
    model = nn.Sequential(nn.Linear(13, 64), 
                    nn.ReLU(), 
                    nn.Linear(64, 64), 
                    nn.ReLU(), 
                    nn.Linear(64, 1)) 
    return model

In [6]:
def get_dataloader(csv_file, batch_size):
     # Load dataset
    dataset = ChurnDataset(csv_file)

    # Split into training and test
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    trainset, testset = random_split(dataset, [train_size, test_size])
    
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
    
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)
    
    return trainloader, testloader, trainset, testset

In [7]:
def train(trainloader, net, optimizer, n_epochs=100):
     
    device = "cpu"

    # Define the model
    #net = get_CHURN_model()
    net = net.to(device)
    
    #criterion = nn.CrossEntropyLoss() 
    criterion = nn.BCEWithLogitsLoss()


    # Train the net
    loss_per_iter = []
    loss_per_batch = []
    for epoch in range(n_epochs):

        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward + backward + optimize
            outputs = net(inputs.float())
            loss = criterion(outputs, labels.float().unsqueeze(1))
            loss.backward()
            optimizer.step()

            # Save loss to plot
            running_loss += loss.item()
            loss_per_iter.append(loss.item())

        
        print("Epoch {} - Training loss: {}".format(epoch, running_loss/len(trainloader))) 
        
        running_loss = 0.0
        
    return net


In [8]:
csv_file = "data/churn.csv"

trainloader, testloader, train_ds, test_ds = get_dataloader(csv_file, batch_size)

In [9]:
net = get_CHURN_model()

optimizer = optim.Adam(net.parameters(), weight_decay=0.0001, lr=0.003)

model = train(trainloader, net, optimizer, 50)

Epoch 0 - Training loss: 0.4377533576451242
Epoch 1 - Training loss: 0.3663581402041018
Epoch 2 - Training loss: 0.3442584626376629
Epoch 3 - Training loss: 0.33838390540331603
Epoch 4 - Training loss: 0.33441441049799325
Epoch 5 - Training loss: 0.33233569730073215
Epoch 6 - Training loss: 0.3310216405428946
Epoch 7 - Training loss: 0.3281798171810806
Epoch 8 - Training loss: 0.3269441844895482
Epoch 9 - Training loss: 0.32509594233706596
Epoch 10 - Training loss: 0.32279583225026726
Epoch 11 - Training loss: 0.3208660346455872
Epoch 12 - Training loss: 0.3186106376349926
Epoch 13 - Training loss: 0.3177291705273092
Epoch 14 - Training loss: 0.3174736758694053
Epoch 15 - Training loss: 0.3157928120344877
Epoch 16 - Training loss: 0.31431026444770394
Epoch 17 - Training loss: 0.3124549970962107
Epoch 18 - Training loss: 0.30975242527201774
Epoch 19 - Training loss: 0.31037411466240883
Epoch 20 - Training loss: 0.30951178735122087
Epoch 21 - Training loss: 0.3084459109697491
Epoch 22 - 

In [10]:
max_per_sample_grad_norm = 1.5
sample_rate = batch_size/len(train_ds)
noise_multiplier = 0.8

In [16]:
from opacus import PrivacyEngine
net = get_CHURN_model()
optimizer = optim.Adam(net.parameters(), weight_decay=0.0001, lr=0.003)
privacy_engine = PrivacyEngine()
model, optimizer, dataloader = privacy_engine.make_private(
    module=net,
    noise_multiplier=noise_multiplier,
    max_grad_norm=max_per_sample_grad_norm,
    optimizer = optimizer,
    data_loader = trainloader)
model = train(dataloader, model, optimizer, batch_size)

Epoch 0 - Training loss: 0.5341288127936423
Epoch 1 - Training loss: 0.5202931649051606
Epoch 2 - Training loss: 0.5341383019695058
Epoch 3 - Training loss: 0.5288592478260398
Epoch 4 - Training loss: 0.5171439621597529
Epoch 5 - Training loss: 0.5163358460646122
Epoch 6 - Training loss: 0.5369994322769344
Epoch 7 - Training loss: 0.49765840321779253
Epoch 8 - Training loss: 0.5046963236760348
Epoch 9 - Training loss: 0.5088824790902435
Epoch 10 - Training loss: 0.5092353354673833
Epoch 11 - Training loss: 0.5087992254644632
Epoch 12 - Training loss: 0.5309229969978333
Epoch 13 - Training loss: 0.5106239337474108
Epoch 14 - Training loss: 0.5036877773702144
Epoch 15 - Training loss: 0.4927969908807427
Epoch 16 - Training loss: 0.48599050249904396
Epoch 17 - Training loss: 0.5188661337830126
Epoch 18 - Training loss: 0.5169508713996038
Epoch 19 - Training loss: 0.49798491708934306
Epoch 20 - Training loss: 0.503883030358702
Epoch 21 - Training loss: 0.5164223099593073
Epoch 22 - Trainin

In [19]:
epsilon = privacy_engine.accountant.get_epsilon(delta=1e-5)
print (f" ε = {epsilon:.2f}")


 ε = 5.19
