In [123]:
!pip install codecarbon

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
[0m

In [124]:
!pip install captum

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
[0m

In [125]:
import json
import numpy as np
import logging
from codecarbon import EmissionsTracker as ET
from opacus import PrivacyEngine 
from captum.attr import IntegratedGradients

logger = logging.getLogger()
logger.setLevel(logging.INFO)

class responsibleModel:
    
    __modelname__ = ""
    __framework__ = ""
    __emissions__ = 0.0
    __classbalance__ = 0.0
    __interpretable_degree__ = 0.0
    __epsilon__ = 0.0
    __tracker__ = None
    __privacy_engine__ = None
    
    def __init__(self,):
        self.__modelname__ = ""
        self.__framework__ = ""
        self.__emissions__ = 0.0
        self.__classbalance__ = 0.0
        self.__interpretable_degree__ = 0.0
        self.__epsilon__ = 0.0
        
        self.__tracker__ = ET(project_name = "",
            measure_power_secs = 15,
            save_to_file = False)
        
        self.__privacy_engine__ = PrivacyEngine()

    def __init__(self, 
                 modelname: str,
                 framework:str,
                 interpretable_degree:float = 0.0,
                 emissions:float = 0.0,
                 classbalance:float= 0.0,
                 epsilon:float = 0.0):
        
        self.__modelname__ = modelname
        self.__framework__ = framework
        self.__emissions__ = emissions
        self.__classbalance__ = classbalance
        self.__interpretable_degree__ = interpretable_degree
        self.__epsilon__ = epsilon
    
        self.__tracker__ = ET(project_name = modelname,
            measure_power_secs = 15,
            save_to_file = False)
        
        self.__privacy_engine__ = PrivacyEngine()
    
    def set_interpretability(self, interpretable_degree: float):
        self.__interpretable_degree__ = interpretable_degree

    def set_emissions(self, carbon_emissions: float):
        self.__emissions__ = carbon_emissions

    def set_classbalance(self, minclass: float):
        self.__classbalance__ = minclass

    def set_epsilon(self, privacy_epsilon: bool):
        self.__epsilon__ = privacy_epsilon

    def set_framework(self, framework: str):
        self.__framework__ = framework
        
    def calculate_emissions_index(self):

        if self.__emissions__ <= 500:
            emissionIndex = 3
        elif self.__emissions__ > 500 and self.__emissions__ <= 10000:
            emissionIndex = 2
        else:
            emissionIndex = 1

        return emissionIndex

    def calculate_privacy_index(self):
        if self.__epsilon__ <= 1:
            privacyIndex = 3
        elif self.__epsilon__ > 1 and self.__epsilon__ <= 10:
            privacyIndex = 2
        else:
            privacyIndex = 1

        return privacyIndex

    def calculate_interpretability_index(self):

        interIndex = 1

        if self.__interpretable_degree__ > .70:
            interIndex = 3
        elif self.__interpretable_degree__ > .50 and self.__interpretable_degree__ < .70:
            interIndex = 2
        else:
            interIndex = 1

        return interIndex

    def calculate_bias_index(self):
        
        if self.__classbalance__ >= 0.4:
            bindex = 3
        elif self.__classbalance__ > 0.2 and self.__classbalance__ < 0.4:
            bindex = 2
        else:
            bindex = 1

        return bindex
    
    def describe_model(self):
        value = json.dumps({"model name": self.__modelname__,
                    "framework": self.__framework__,
                    "emissions": self.__emissions__,
                    "interpretability": self.__interpretable_degree__,
                    "privacy": self.__epsilon__,
                    "bias": self.__classbalance__,})        
        return value
    
    def model_rai_components(self):
        
        emission_index = self.calculate_emissions_index()
        privacy_index = self.calculate_privacy_index()
        bias_index = self.calculate_bias_index()
        interpret_index = self.calculate_interpretability_index()
        RAI_index = self.rai_index()
        
        value = json.dumps({"model name": self.__modelname__,
                            "framework": self.__framework__,
                            "rai index": RAI_index,
                            "emission_index": emission_index,
                            "privacy_index": privacy_index,
                            "bias_index": bias_index,
                            "interpretability_index": interpret_index})

        return value
        
    def rai_index(self):
    
        index = 0.0
        weights = 0.25

        emission_index = self.calculate_emissions_index()
        privacy_index = self.calculate_privacy_index()
        bias_index = self.calculate_bias_index()
        interpret_index = self.calculate_interpretability_index()

        index = weights * (emission_index + privacy_index + bias_index + interpret_index)

        return index

    def track_emissions(self):
        # Calculate Emissions
        self.__tracker__.start()
        
    def stop_tracking(self):
        self.__emissions__ =  self.__tracker__.stop()
        
    def calculate_bias(self, df_label: str):
        
        # Get the number of classes & samples
        label_classes = df_label.value_counts(ascending=True)
        totalvalues = label_classes.sum()
        min_class_count = label_classes.values[1]
        
        #calcualte the bias
        self.__classbalance__ = min_class_count / totalvalues
        
    def privatize(self, model, optimizer, dataloader, noise_multiplier, max_grad_norm):
        
        model, optimizer, dataloader = self.__privacy_engine__.make_private(module=model,
                                                                            optimizer=optimizer,
                                                                            data_loader=dataloader,
                                                                            noise_multiplier = noise_multiplier,
                                                                            max_grad_norm= max_grad_norm)

        return model, optimizer, dataloader
        
    def calculate_privacy_score(self, delta):
        self.__epsilon__ = self.__privacy_engine__.get_epsilon(delta)
    
    def interpret(self, input_tensor, model,target_class):
        
        ig = IntegratedGradients(model)
        input_tensor.requires_grad_()
        attr, delta = ig.attribute(input_tensor,target=target_class, return_convergence_delta=True)
        attr = attr.detach().numpy()
        importance = np.mean(attr, axis=0)
        
        importance = np.abs(importance)        
        importance[::-1].sort()
        
        total_weightage = np.sum(importance)
        key_features_weightage = importance[0] + importance[1] + importance[2]
        
        __interpretable_degree__ = key_features_weightage / total_weightage
            
class models:
    model_list = []
    
    def __init__(self):
        self.model_list = []
    
    def add_model(self, modelname, framework, intrepretability, emissions, bias, epsilon):
        model = responsibleModel(modelname, framework, intrepretability, emissions, bias, epsilon)
        self.model_list.append(model)
        
    def add_model(self, model):
        self.model_list.append(model)
        
    def remove_model(self, modelname):
        self.model_list.remove(modelname)
        
    def list_models(self):
        model_json = ""
        for model in self.model_list:
            model_json += model.describe() 
            if model != self.model_list[-1]:
                model_json += ","
                                
            model_json += "\n"
            
        model_json = "[" + model_json + "]"
        
        return model_json
    
    def get_model(self, modelname):
        for model in self.model_list:
            if model.__modelname__ == modelname:
                return model
        return None
    
    def rank_models(self, rank_by = "rai_index"):
        sorted_json = ""
        
        if rank_by == "rai_index":
            sorted_models = sorted(self.model_list, key=lambda x: x.rai_index(), reverse=True)
        elif rank_by == "emissions":
            sorted_models = sorted(self.model_list, key=lambda x: x.calculate_emissions_index(), reverse=True)
        elif rank_by == "privacy":
            sorted_models = sorted(self.model_list, key=lambda x: x.calculate_privay_index(), reverse=True)
        elif rank_by == "bias":
            sorted_models = sorted(self.model_list, key=lambda x: x.calculate_bias_index(), reverse=True)
        elif rank_by == "interpretability":
            sorted_models = sorted(self.model_list, key=lambda x: x.calculate_interpretability_index(), reverse=True)
            
        for model in sorted_models:
            sorted_json += model.model_rai_components()
            if(model != sorted_models[-1]):
                sorted_json += ","
            sorted_json += "\n"
            
        sorted_json = "[" + sorted_json + "]"
        return sorted_json

In [126]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F 
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [127]:
model2 = responsibleModel(modelname = 'test', framework = 'pytorch')
model_list = models()

[codecarbon INFO @ 14:39:49] [setup] RAM Tracking...
[codecarbon INFO @ 14:39:49] [setup] GPU Tracking...
[codecarbon INFO @ 14:39:49] No GPU found.
[codecarbon INFO @ 14:39:49] [setup] CPU Tracking...
[codecarbon INFO @ 14:39:51] CPU Model on constant consumption mode: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
[codecarbon INFO @ 14:39:51] >>> Tracker's metadata:
[codecarbon INFO @ 14:39:51]   Platform system: Linux-4.14.262-200.489.amzn2.x86_64-x86_64-with-debian-10.6
[codecarbon INFO @ 14:39:51]   Python version: 3.7.10
[codecarbon INFO @ 14:39:51]   Available RAM : 3.793 GB
[codecarbon INFO @ 14:39:51]   CPU count: 2
[codecarbon INFO @ 14:39:51]   CPU model: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
[codecarbon INFO @ 14:39:51]   GPU count: None
[codecarbon INFO @ 14:39:51]   GPU model: None
  "Secure RNG turned off. This is perfectly fine for experimentation as it allows "


In [128]:
# Load data from csv file
df = pd.read_csv('../data/titanic.csv')
df.drop(['PassengerId', 'Name'], axis=1, inplace=True)

categorical_columns = ['Sex', 'Embarked']
df_cleaned = pd.get_dummies(df, prefix=categorical_columns)
df_cleaned['Age'].fillna(inplace= True, method='bfill' )

df_cleaned.head()

Unnamed: 0,target,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,0,3,22.0,1,0,7.25,0,1,0,0,1
1,1,1,38.0,1,0,71.2833,1,0,1,0,0
2,1,3,26.0,0,0,7.925,1,0,0,0,1
3,1,1,35.0,1,0,53.1,1,0,0,0,1
4,0,3,35.0,0,0,8.05,0,1,0,0,1


In [129]:
df_cleaned.isna().sum()

target        0
Pclass        0
Age           0
SibSp         0
Parch         0
Fare          0
Sex_female    0
Sex_male      0
Embarked_C    0
Embarked_Q    0
Embarked_S    0
dtype: int64

In [130]:
# converting dataframe to numpy array
labels = df_cleaned["target"].to_numpy()
label_df = df_cleaned["target"]

df_cleaned = df_cleaned.drop(['target'], axis=1)
feature_names = list(df_cleaned.columns)
features = df_cleaned.to_numpy()

# loading data into torch tensor
feature_tensor = torch.from_numpy(features).type(torch.FloatTensor)
label_tensor = torch.from_numpy(labels)

# loading data into torch dataset
train_dataset = torch.utils.data.TensorDataset(feature_tensor, label_tensor)

# loading data into torch dataloader
batch_size = 32
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [131]:
model2.calculate_bias(label_df)
print(model2.calculate_bias_index())

3


In [132]:
feature_tensor.shape

torch.Size([891, 10])

In [133]:
class TitanicModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(10, 10)
        self.sigmoid1 = nn.Sigmoid()
        self.linear2 = nn.Linear(10, 8)
        self.sigmoid2 = nn.Sigmoid()
        self.linear3 = nn.Linear(8, 2)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        lin1_out = self.linear1(x)
        sigmoid_out1 = self.sigmoid1(lin1_out)
        sigmoid_out2 = self.sigmoid2(self.linear2(sigmoid_out1))
        return self.softmax(self.linear3(sigmoid_out2))

In [134]:
def train(trainloader, model, optimizer):
    
    loss_fn = nn.CrossEntropyLoss()
    epochs = 100
    
    for epoch in range(epochs):
        for i, (features, label) in enumerate(train_dataloader):
            # Forward pass
            y_pred = model(features)
            loss = loss_fn(y_pred, label)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % 10 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch+1, epochs, i+1, len(train_dataloader), loss.item()))
    
    return model
    

In [135]:
# start tracking Emissions
model2.track_emissions()

In [136]:
vanilla_model = TitanicModel()

optimizer = torch.optim.Adam(vanilla_model.parameters(), lr=0.001)

trained_vanilla_model = train(train_dataloader, vanilla_model, optimizer)

Epoch [1/100], Step [10/28], Loss: 0.6763
Epoch [1/100], Step [20/28], Loss: 0.6801
Epoch [2/100], Step [10/28], Loss: 0.6729
Epoch [2/100], Step [20/28], Loss: 0.6224
Epoch [3/100], Step [10/28], Loss: 0.6716
Epoch [3/100], Step [20/28], Loss: 0.6313
Epoch [4/100], Step [10/28], Loss: 0.6718
Epoch [4/100], Step [20/28], Loss: 0.6658
Epoch [5/100], Step [10/28], Loss: 0.6985
Epoch [5/100], Step [20/28], Loss: 0.6712
Epoch [6/100], Step [10/28], Loss: 0.6517
Epoch [6/100], Step [20/28], Loss: 0.6320
Epoch [7/100], Step [10/28], Loss: 0.6746
Epoch [7/100], Step [20/28], Loss: 0.6665
Epoch [8/100], Step [10/28], Loss: 0.6010
Epoch [8/100], Step [20/28], Loss: 0.6007
Epoch [9/100], Step [10/28], Loss: 0.6452
Epoch [9/100], Step [20/28], Loss: 0.6016
Epoch [10/100], Step [10/28], Loss: 0.5923
Epoch [10/100], Step [20/28], Loss: 0.6465
Epoch [11/100], Step [10/28], Loss: 0.6384
Epoch [11/100], Step [20/28], Loss: 0.6458
Epoch [12/100], Step [10/28], Loss: 0.6823
Epoch [12/100], Step [20/28],

In [137]:
model2.stop_tracking()


[codecarbon INFO @ 14:40:16] Energy consumed for RAM : 0.000004 kWh. RAM Power : 1.4222803115844729 W
[codecarbon INFO @ 14:40:16] Energy consumed for all CPUs : 0.000259 kWh. All CPUs Power : 105.0 W
[codecarbon INFO @ 14:40:16] 0.000263 kWh of electricity used since the begining.


In [138]:
print(model2.calculate_emissions_index())

3


In [139]:
model2.interpret(feature_tensor, trained_vanilla_model, target_class=1)

In [140]:
print(model2.calculate_interpretability_index())

1


In [119]:
noise_multiplier = 5
max_per_sample_grad_norm = 1.5
sample_rate = batch_size/len(train_dataset)

dp_model = TitanicModel()

optimizer = optim.Adam(dp_model.parameters(), weight_decay=0.0001, lr=0.003)

priv_model, priv_opt, priv_datasetloader = model2.privatize(dp_model, optimizer, train_dataloader, noise_multiplier, max_per_sample_grad_norm)

trained_dp_model = train(priv_datasetloader, priv_model, priv_opt)

04/29/2022 19:20:54:INFO:Despite set_to_none is set to False, opacus will set p.grad_sample and p.summed_grad to None due to non-trivial gradient accumulation behaviour


ValueError: Poisson sampling is not compatible with grad accumulation. You need to call optimizer.step() after every forward/backward pass or consider using BatchMemoryManager

In [120]:
# Calculate Differential Privacy
model2.calculate_privacy_score(delta=1e-5)

  f"Optimal order is the {extreme} alpha. Please consider expanding the range of alphas to get a tighter privacy bound."


In [49]:
model2.describe()

'{"model name": "test", "framework": "pytorch", "emissions": 7.962479560578028e-05, "explained": false, "privacy": 0.10466829124801816, "bias": 0.6161616161616161}'

In [52]:
model_list.add_model(model = model2)

In [54]:
models_json = model_list.rank_models()

jdata = json.loads(models_json)
df = pd.DataFrame(jdata)

df.head()

Unnamed: 0,model name,framework,rai index,emission_index,privacy_index,bias_index,explainability_index
0,test,pytorch,2.75,3,3,3,2
