In [1]:
!pip3 install opacus

You should consider upgrading via the '/usr/local/opt/python@3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import accuracy_score 

In [3]:
batch_size = 50

In [4]:
class ChurnDataset(Dataset):
 
    def __init__(self, csv_file):
  
        df = pd.read_csv(csv_file)
        
        df = df.drop(["Surname", "CustomerId", "RowNumber"], axis=1)

        # Grouping variable names
        self.categorical = ["Geography", "Gender"]
        self.target = "Exited"

        # One-hot encoding of categorical variables
        self.churn_frame = pd.get_dummies(df, prefix=self.categorical)

        # Save target and predictors
        self.X = self.churn_frame.drop(self.target, axis=1)
        self.y = self.churn_frame["Exited"]
        
        
        scaler = StandardScaler()
        X_array  = scaler.fit_transform(self.X)
        self.X = pd.DataFrame(X_array)

    def __len__(self):
        return len(self.churn_frame)

    def __getitem__(self, idx):
        # Convert idx from tensor to list due to pandas bug (that arises when using pytorch's random_split)
        if isinstance(idx, torch.Tensor):
            idx = idx.tolist()

        return [self.X.iloc[idx].values, self.y[idx]]

In [5]:
def get_CHURN_model():
    model = nn.Sequential(nn.Linear(13, 64), 
                    nn.ReLU(), 
                    nn.Linear(64, 64), 
                    nn.ReLU(), 
                    nn.Linear(64, 1)) 
    return model

In [6]:
def get_dataloader(csv_file, batch_size):
     # Load dataset
    dataset = ChurnDataset(csv_file)

    # Split into training and test
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    trainset, testset = random_split(dataset, [train_size, test_size])
    
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
    
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)
    
    return trainloader, testloader, trainset, testset

In [7]:
def train(trainloader, net, optimizer, n_epochs=100):
     
    device = "cpu"

    # Define the model
    #net = get_CHURN_model()
    net = net.to(device)
    
    #criterion = nn.CrossEntropyLoss() 
    criterion = nn.BCEWithLogitsLoss()


    # Train the net
    loss_per_iter = []
    loss_per_batch = []
    for epoch in range(n_epochs):

        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward + backward + optimize
            outputs = net(inputs.float())
            loss = criterion(outputs, labels.float().unsqueeze(1))
            loss.backward()
            optimizer.step()

            # Save loss to plot
            running_loss += loss.item()
            loss_per_iter.append(loss.item())

        
        print("Epoch {} - Training loss: {}".format(epoch, running_loss/len(trainloader))) 
        
        running_loss = 0.0
        
    return net


In [8]:
csv_file = "data/churn.csv"

trainloader, testloader, train_ds, test_ds = get_dataloader(csv_file, batch_size)

In [9]:
net = get_CHURN_model()

optimizer = optim.Adam(net.parameters(), weight_decay=0.0001, lr=0.003)

model = train(trainloader, net, optimizer, 50)

Epoch 0 - Training loss: 0.4372446881607175
Epoch 1 - Training loss: 0.3615658536553383
Epoch 2 - Training loss: 0.34411114174872637
Epoch 3 - Training loss: 0.3393215463496745
Epoch 4 - Training loss: 0.3357653792947531
Epoch 5 - Training loss: 0.33380249571055176
Epoch 6 - Training loss: 0.33079878855496647
Epoch 7 - Training loss: 0.3274117988534272
Epoch 8 - Training loss: 0.32857229094952345
Epoch 9 - Training loss: 0.3246911917813122
Epoch 10 - Training loss: 0.32426827838644384
Epoch 11 - Training loss: 0.3234902940690517
Epoch 12 - Training loss: 0.3196106592193246
Epoch 13 - Training loss: 0.31688680476509035
Epoch 14 - Training loss: 0.3178471109829843
Epoch 15 - Training loss: 0.31471656495705247
Epoch 16 - Training loss: 0.31377251804806294
Epoch 17 - Training loss: 0.31305785235017536
Epoch 18 - Training loss: 0.3117420727387071
Epoch 19 - Training loss: 0.30834818808361886
Epoch 20 - Training loss: 0.3049893509596586
Epoch 21 - Training loss: 0.30424610823392867
Epoch 22 

In [10]:
max_per_sample_grad_norm = 1.5
sample_rate = batch_size/len(train_ds)
noise_multiplier = 0.8

In [11]:
from opacus import PrivacyEngine

net = get_CHURN_model()

optimizer = optim.Adam(net.parameters(), weight_decay=0.0001, lr=0.003)

privacy_engine = PrivacyEngine(
    net,
    max_grad_norm=max_per_sample_grad_norm,
    noise_multiplier = noise_multiplier,
    sample_rate = sample_rate,
)

privacy_engine.attach(optimizer)

model = train(trainloader, net, optimizer, batch_size)



Epoch 0 - Training loss: 0.5503569555468857
Epoch 1 - Training loss: 0.5270387591794133
Epoch 2 - Training loss: 0.5277176320552825
Epoch 3 - Training loss: 0.5181171683594584
Epoch 4 - Training loss: 0.5345764994621277
Epoch 5 - Training loss: 0.5284739407710731
Epoch 6 - Training loss: 0.528376258444041
Epoch 7 - Training loss: 0.5217884532175958
Epoch 8 - Training loss: 0.5271113646216691
Epoch 9 - Training loss: 0.5200825051404536
Epoch 10 - Training loss: 0.5151959240902215
Epoch 11 - Training loss: 0.50616883514449
Epoch 12 - Training loss: 0.5010638677980751
Epoch 13 - Training loss: 0.5051685086451471
Epoch 14 - Training loss: 0.5023456629365682
Epoch 15 - Training loss: 0.4927736475132406
Epoch 16 - Training loss: 0.4892598757520318
Epoch 17 - Training loss: 0.5009411306120455
Epoch 18 - Training loss: 0.5002795286476612
Epoch 19 - Training loss: 0.499312580563128
Epoch 20 - Training loss: 0.5023985943291336
Epoch 21 - Training loss: 0.4957516428083181
Epoch 22 - Training loss

In [13]:
epsilon, best_alpha = privacy_engine.get_privacy_spent()
print (f" ε = {epsilon:.2f}, δ = {privacy_engine.target_delta}")


 ε = 6.39, δ = 1e-06
