In [None]:
!git clone https://github.com/microsoft/MICO.git
!pip install -r MICO/requirements.txt
!pip install -e MICO/
!pip install -r MICO/starting-kit/requirements-starting-kit.txt

In [None]:
!python -m pip uninstall matplotlib
!pip install matplotlib==3.1.3
!pip install catboost

In [None]:
import os
import urllib

from torchvision.datasets.utils import download_and_extract_archive

url = "https://membershipinference.blob.core.windows.net/mico/cifar10.zip?si=cifar10&spr=https&sv=2021-06-08&sr=b&sig=d7lmXZ7SFF4ZWusbueK%2Bnssm%2BsskRXsovy2%2F5RBzylg%3D" 
filename = "cifar10.zip"
md5 = "c615b172eb42aac01f3a0737540944b1"

# WARNING: this will download and extract a 2.1GiB file, if not already present. Please save the file and avoid re-downloading it.
try:
    download_and_extract_archive(url=url, download_root=os.curdir, extract_root=None, filename=filename, md5=md5, remove_finished=False)
except urllib.error.HTTPError as e:
    print(e)
    print("Have you replaced the URL above with the one you got after registering?")

Downloading https://membershipinference.blob.core.windows.net/mico/cifar10.zip?si=cifar10&spr=https&sv=2021-06-08&sr=b&sig=d7lmXZ7SFF4ZWusbueK%2Bnssm%2BsskRXsovy2%2F5RBzylg%3D to ./cifar10.zip


  0%|          | 0/2163267879 [00:00<?, ?it/s]

Extracting ./cifar10.zip to .


In [None]:
import os
import sys

import numpy as np
import torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import csv

from tqdm.notebook import tqdm

import lightgbm as lgb
from sklearn.model_selection import train_test_split

module_path = os.path.abspath(os.path.join('/content/MICO/src/mico-competition/'))
if module_path not in sys.path:
    sys.path.append(module_path)

from mico import ChallengeDataset, load_model
from challenge_datasets import load_cifar10

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
class MLPNet(nn.Module):
  def __init__(self):
    super(MLPNet,self).__init__()
    self.fc1 = nn.Linear(10,128)
    self.dropout1 = nn.Dropout(0.3)
    self.fc2 = nn.Linear(128,64)
    self.dropout2 = nn.Dropout(0.3)
    self.fc3 = nn.Linear(64,64)
    self.fc4 = nn.Linear(64,1)


  def forward(self,x):
    x = torch.relu(self.fc1(x))
    x = self.dropout1(x)   
    x = torch.relu(self.fc2(x))   
    x = self.dropout2(x)
    x = torch.relu(self.fc3(x))   
    x = torch.sigmoid(self.fc4(x))  
    return x

#defining dataset class
from torch.utils.data import Dataset, DataLoader
class attackDataset(Dataset):
  def __init__(self,x,y):
    self.x = torch.tensor(x,dtype=torch.float32)
    self.y = torch.tensor(y,dtype=torch.float32)
    self.length = self.x.shape[0]
 
  def __getitem__(self,idx):
    return self.x[idx],self.y[idx]
  def __len__(self):
    return self.length

def trainMLP(X,Y,evalData,evalLabel):
  trainData, testData, trainLabel, testLabel = train_test_split(X, Y, test_size=0.2, random_state=42)
  trainset = attackDataset(trainData,trainLabel)
  testset = attackDataset(testData,testLabel)

  #DataLoader
  trainloader = DataLoader(trainset,batch_size=128,shuffle=True)
  print("train done")
  testloader = DataLoader(testset,batch_size=128,shuffle=True)

  #hyper parameters
  learning_rate = 0.001
  epochs = 60
  #Model Optimizer, Loss
  #print("before model to GPU")
  MLPModel = MLPNet()
  #optimizer = torch.optim.SGD(MLPModel.parameters(),lr=learning_rate)
  optimizer = torch.optim.Adam(MLPModel.parameters(), lr=learning_rate,weight_decay=1e-5)
  #optimizer = torch.optim.Adam(MLPModel.parameters(), lr=learning_rate)

  loss_fn = nn.BCELoss()
  MLPModel = MLPModel.to(device)


  #forward loop
  losses = []
  accur = []
  accurTest = []
  accurEval = []
  for i in range(epochs):
    for j,(x_train,y_train) in enumerate(trainloader):
      x_train = x_train.to(device)
      y_train = y_train.to(device)
      optimizer.zero_grad()
      MLPModel.train()

      #print("sent to training")
      #calculate output
      #print("after model to GPU")

      output = MLPModel(x_train)
      #print("got output")
      #calculate loss
      #print(y_train.reshape(-1,1))
      loss = loss_fn(output,y_train.reshape(-1,1))
      #print("got loss")
      #accuracy
      
      #backprop
      loss.backward()
      optimizer.step()

    MLPModel.eval()
    with torch.no_grad():    
      predicted = MLPModel(torch.tensor(trainData,dtype=torch.float32).to(device))
      #print("got prediction")
      acc = (predicted.reshape(-1).cpu().detach().numpy().round() == trainLabel).mean()

        # test data
      predictedTest = MLPModel(torch.tensor(testData,dtype=torch.float32).to(device))
      #print("got prediction")
      accTest = (predictedTest.reshape(-1).cpu().detach().numpy().round() == testLabel).mean()

      # eval data
      predictedEval = MLPModel(torch.tensor(evalData,dtype=torch.float32).to(device))
      #print("got prediction")
      accEval = (predictedEval.reshape(-1).cpu().detach().numpy().round() == evalLabel).mean()
      


    losses.append(loss)
    accur.append(acc)
    accurTest.append(accTest)
    accurEval.append(accEval)
    print("epoch {}\t accuracy : {}".format(i,acc))
    print("loss: ",loss.item())
    print("test accuracy : ",accTest)
    print("eval accuracy : ",accEval)


  fpr, tpr, thresholds = roc_curve(testLabel, MLPModel(torch.tensor(testData,dtype=torch.float32).to(device)).reshape(-1).cpu().detach().numpy().round())
  plt.figure(figsize=(8, 8))
  plt.plot(fpr, tpr, label="MIA ROC curve (area = %0.2f)" % auc(fpr, tpr))
  plt.plot([0, 1], [0, 1], "k--")
  plt.xlim([0.0, 1.0])
  plt.ylim([0.0, 1.05])
  plt.xlabel("False Positive Rate")
  plt.ylabel("True Positive Rate")
  plt.title("ROC curve")
  plt.legend(loc="lower right")
  plt.show()

  plt.plot([0,1],accur,label="Train")
  plt.plot([0,1],accurTest,label="Test")
  plt.plot([0,1],accurEval,label="Eval")
  plt.legend()
  plt.show()
  return MLPModel

In [None]:
attackData =[]
attackLabel =[]

In [None]:
!mkdir attackDir
!unzip attackData42k.zip -d attackDir/

In [None]:
!mkdir /content/attackDir/attackData42k/rohit/data/
!unzip /content/attackDir/attackData42k/rohit/attackData.zip -d /content/attackDir/attackData42k/rohit/data/

In [None]:
directory = '/content/attackDir/attackData42k/navya/data/'

In [None]:
for filename in os.listdir(directory):
    csvFile = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(csvFile):
      with open(csvFile, "r") as f:
        reader = csv.reader(f, delimiter=',')
        for row in reader:
          attackData.append(row[0:10])
          attackLabel.append(row[10])
print(len(attackLabel))

149600


In [None]:
attackData = np.asarray(attackData)
attackData = attackData.astype(float)

attackLabel = np.asarray(attackLabel)
attackLabel = attackLabel.astype(float)

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

# get metric and train, test support
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, roc_curve, auc

# get classifier models
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier

def trainDecisionTree(attackData,attackLabel,evalData,evalLabel):
  print("attackdata len: ",len(attackData))
  print("attackLabel len: ",len(attackLabel))

  X_train, X_test, y_train, y_test = train_test_split(
    attackData, attackLabel, test_size=0.2, random_state=42)
  yc=0
  for lab in y_train:
    if lab ==0:
      yc+=1
  print("Zero:",yc)
  print("One:", len(y_train) - yc)
  catModel = CatBoostClassifier(
    iterations=500,
    depth=2,
    learning_rate=0.3,
    loss_function="Logloss",
    verbose=True,
  )  # https://catboost.ai/en/docs/concepts/loss-functions-classification

  
  catModel.fit(X_train, y_train)
  accuracy = catModel.score(X_test, y_test)
  precision, recall, f1_score, _ = precision_recall_fscore_support(
      y_test, catModel.predict(X_test), average="binary"
  )
  print("accuracy:", accuracy)
  print("precision:", precision)
  print("recall:", recall)
  print("f1_score:", f1_score)

  fpr, tpr, thresholds = roc_curve(y_test, catModel.predict_proba(X_test)[:, 1])
  print("mean fpr:", np.mean(fpr))
  print("mean tpr:", np.mean(tpr))

  # https://developers.google.com/machine-learning/crash-course/classification/roc-and-auc#:~:text=An%20ROC%20curve%20(receiver%20operating,False%20Positive%20Rate
  # plot and save roc curve
  plt.figure(figsize=(8, 8))
  plt.plot(fpr, tpr, label="MIA ROC curve (area = %0.2f)" % auc(fpr, tpr))
  plt.plot([0, 1], [0, 1], "k--")
  plt.xlim([0.0, 1.0])
  plt.ylim([0.0, 1.05])
  print("here")
  plt.xlabel("False Positive Rate")
  plt.ylabel("True Positive Rate")
  plt.title("ROC curve Test")
  plt.legend(loc="lower right")
  plt.show()

  fpr, tpr, thresholds = roc_curve(evalLabel, catModel.predict_proba(evalData)[:, 1])
  print("mean fpr:", np.mean(fpr))
  print("mean tpr:", np.mean(tpr))

  # https://developers.google.com/machine-learning/crash-course/classification/roc-and-auc#:~:text=An%20ROC%20curve%20(receiver%20operating,False%20Positive%20Rate
  # plot and save roc curve
  plt.figure(figsize=(8, 8))
  plt.plot(fpr, tpr, label="MIA ROC curve (area = %0.2f)" % auc(fpr, tpr))
  plt.plot([0, 1], [0, 1], "k--")
  plt.xlim([0.0, 1.0])
  plt.ylim([0.0, 1.05])
  print("here")
  plt.xlabel("False Positive Rate")
  plt.ylabel("True Positive Rate")
  plt.title("ROC curve Eval")
  plt.legend(loc="lower right")
  plt.show()

  return catModel

In [None]:
from numpy import dot
from numpy.linalg import norm
# Architecture of shadow model
class ShadowNet(nn.Module):
    def __init__(self):
      super(ShadowNet, self).__init__()
      self.shadowCnn = nn.Sequential(
            nn.Conv2d(3, 128, kernel_size=8, stride=2, padding=3), nn.Tanh(),
            nn.MaxPool2d(kernel_size=3, stride=1),
            nn.Conv2d(128, 256, kernel_size=3), nn.Tanh(),
            nn.Conv2d(256, 256, kernel_size=3), nn.Tanh(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Flatten()
           ,nn.Linear(in_features=6400, out_features=10)
        )
    def forward(self, x):
      output = self.shadowCnn(x)
      return output

In [None]:
CHALLENGE = "cifar10"
LEN_TRAINING = 50000
LEN_CHALLENGE = 100

scenarios = os.listdir(CHALLENGE)
phases = ['train','dev', 'final']
#phases = ['train']
#scenarios = ['cifar10_inf']
print(scenarios)

dataset = load_cifar10(dataset_dir="/data")

criterion = torch.nn.CrossEntropyLoss()
#predictionType = "MLP"
predictionType = "binary"

dirPath="/content/results/"
os.mkdir(dirPath)
modelType = "single"

for scenario in tqdm(scenarios, desc="scenario"):
    dirPath = "/content/results/" + scenario+ "/"
    os.mkdir(dirPath)
    challengeData = []
    challengeLabel = []
    for phase in tqdm(phases, desc="phase"):
        dirPath = dirPath + phase+ "/"
        os.mkdir(dirPath)
        root = os.path.join(CHALLENGE, scenario, phase)
        print(root)

        for model_folder in tqdm(sorted(os.listdir(root), key=lambda d: int(d.split('_')[1])), desc="model"):
            path = os.path.join(root, model_folder)
            dirPath = dirPath + model_folder+ "/"
            os.mkdir(dirPath)
            challenge_dataset = ChallengeDataset.from_path(path, dataset=dataset, len_training=LEN_TRAINING)
            challenge_points = challenge_dataset.get_challenges()

            if phase == "train":
              challenge_sol = challenge_dataset.get_solutions()
              train_dataset = challenge_dataset.get_train_dataset()
              eval_dataset = challenge_dataset.get_eval_dataset()

            # This is where you plug in your membership inference attack
            # As an example, here is a simple loss threshold attack
            print(path)
            # Loss Threshold Attack
            model = load_model('cifar10', path)
            #model.eval()
            shadowModel = ShadowNet()
            shadowModel.load_state_dict(torch.load("shadowModel106.pt"))
            shadowModel.eval()
            #print(model)

            # Load challenge points
            challenge_dataloader = torch.utils.data.DataLoader(challenge_points, batch_size=200)
            features, labels = next(iter(challenge_dataloader))

            features = features.to(device)
            labels = labels.to(device)
            # Model prediction
            model = model.to(device)
            shadowModel = shadowModel.to(device)

            output = model(features)
            Soutput = shadowModel(features)
            SOutNumpy = Soutput.cpu().detach().numpy()
            cos_sim =.0
            if phase == "train":
              for itCount, outpT in enumerate(output.cpu().detach().numpy()):
                #print("given: ",outpT)
                shadowFeature = SOutNumpy[itCount]
                #print("shadow: ",shadowFeature)
                mysim = dot(outpT, shadowFeature)/(norm(outpT)*norm(shadowFeature))
                #print("Similarity:", mysim)
                cos_sim+=mysim
                challengeData.append(outpT)
                #print("True label:",float(challenge_sol[itCount]))
                challengeLabel.append(float(challenge_sol[itCount]))
              print("Average cosine: ",cos_sim/200)
            #print(output[0])
            predictions = np.argmax(output.detach().cpu().numpy(), axis=1)
            # use the binary classifier here to predict the membership
            if not phase == "train":
              if predictionType == "binary":
                if modelType == "multi":
                  Predict =[]
                  for itrCount, outp in enumerate(output.cpu().detach().numpy()):
                    classLabel = predictions[itrCount]
                    outp = outp.reshape(1,-1)
                    #print("predicting using binary classifier for: ",classLabel )
                    binaryModel = treeModelDict[classLabel]
                    binaryPrediction = binaryModel.predict(outp)
                    Predict.append(binaryPrediction[0])
                elif modelType == "single":
                  #Predict = binaryModel.predict(output.cpu().detach().numpy())
                  Predict = catModel.predict(output.cpu().detach().numpy())

              elif predictionType == "MLP":
                print("predicting using MLP",type(output))
                MLPModel.eval()
                with torch.no_grad():
                  MLPModel = MLPModel.to(device)
                  Predict = MLPModel(output)
                  Predict = Predict.reshape(-1).cpu().detach().numpy().round()
                #print(type(Predict))

            # Normalize to unit interval
            #min_prediction = np.min(predictions)
            #max_prediction = np.max(predictions)
            #predictions = (predictions - min_prediction) / (max_prediction - min_prediction)
            #print("---------->",predictions[0])
            #exit()
            if not phase == "train":
              Predict = np.asarray(Predict)
              #print(Predict)
              assert np.all((0 <= Predict) & (Predict <= 1))
              print("saving:",dirPath)
              with open(os.path.join(dirPath,   "prediction.csv"), "w") as f:
                  csv.writer(f).writerow(Predict)
            dirPath= "/content/results/"+scenario +"/"+phase+"/";

        #if phase == "train":
        #  catModel =trainDecisionTree(attackData,attackLabel,challengeData,challengeLabel)
        #  trainMLP(attackData,attackLabel,challengeData,challengeLabel)
        break 

        dirPath= "/content/results/"+scenario +"/";  
    break

In [None]:
!rm -rf results

In [None]:
!rm -rf attackDir/

In [None]:
trainMLP(attackData,attackLabel,challengeData,challengeLabel)
catModel =trainDecisionTree(attackData,attackLabel,challengeData,challengeLabel)

In [None]:
!mv results drive/MyDrive/MICO/42k/