In [None]:
import torch
from torch.utils.data import Dataset
from typing import Tuple
import numpy as np
import requests
import pandas as pd


import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np

import matplotlib.pyplot as plt
import matplotlib

%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [None]:
import gdown

pub_id = '1OLZsYJteuUpnQnSoZTHC617zmSRPsSK0'
priv_id = '1wGNkKdKRn2ZpQ-GtP3l8UCpNHUWBgHyN'
model_id = '1-rFEKopl4PZ4e3FR_dKcLbO_Y4pXOgLo'


gdown.download(f'https://drive.google.com/uc?id={pub_id}', 'pub.pt', quiet=False)
gdown.download(f'https://drive.google.com/uc?id={priv_id}', 'priv.pt', quiet=False)
gdown.download(f'https://drive.google.com/uc?id={model_id}', 'model.pt', quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=1OLZsYJteuUpnQnSoZTHC617zmSRPsSK0
From (redirected): https://drive.google.com/uc?id=1OLZsYJteuUpnQnSoZTHC617zmSRPsSK0&confirm=t&uuid=d7dfc07b-6c7a-440f-a68b-0488f3bccfa7
To: /content/pub.pt
100%|██████████| 250M/250M [00:03<00:00, 64.3MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1wGNkKdKRn2ZpQ-GtP3l8UCpNHUWBgHyN
From (redirected): https://drive.google.com/uc?id=1wGNkKdKRn2ZpQ-GtP3l8UCpNHUWBgHyN&confirm=t&uuid=5cac18a2-8421-40a9-99d5-42f7f2a9172e
To: /content/priv.pt
100%|██████████| 251M/251M [00:02<00:00, 105MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1-rFEKopl4PZ4e3FR_dKcLbO_Y4pXOgLo
From (redirected): https://drive.google.com/uc?id=1-rFEKopl4PZ4e3FR_dKcLbO_Y4pXOgLo&confirm=t&uuid=61326249-95c4-41a1-a34a-9f63c6bf9803
To: /content/model.pt
100%|██████████| 44.9M/44.9M [00:01<00:00, 37.2MB/s]


'model.pt'

In [None]:

#### LOADING THE MODEL

from torchvision.models import resnet18

model = resnet18(pretrained=False)
model.fc = torch.nn.Linear(512, 44)

ckpt = torch.load("/content/model.pt", map_location="cpu")

model.load_state_dict(ckpt)



<All keys matched successfully>

In [None]:

#### DATASETS

class TaskDataset(Dataset):
    def __init__(self, transform=None):

        self.ids = []
        self.imgs = []
        self.labels = []

        self.transform = transform

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int]:
        id_ = self.ids[index]
        img = self.imgs[index]
        if not self.transform is None:
            img = self.transform(img)
        label = self.labels[index]
        return id_, img, label

    def __len__(self):
        return len(self.ids)


class MembershipDataset(TaskDataset):
    def __init__(self, transform=None):
        super().__init__(transform)
        self.membership = []

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int, int]:
        id_, img, label = super().__getitem__(index)
        return id_, img, label, self.membership[index]


privData: MembershipDataset = torch.load("/content/priv.pt")
pubData: MembershipDataset = torch.load("/content/pub.pt")

In [None]:
# Ignore the membership feature
def custom_collate(batch):
    batch = [(item[0], item[1], item[2]) for item in batch]
    return torch.utils.data.dataloader.default_collate(batch)

In [None]:
import torch
from torchvision import transforms

class NormalizedDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.normalize = transforms.Normalize(mean=[0.298, 0.2962, 0.2987], std=[0.2886, 0.2875, 0.2889])

        self.ids = dataset.ids
        self.imgs = []
        self.labels = dataset.labels
        self.membership = dataset.membership

        self.normalize_images()

    def normalize_images(self):

        for id_, img, label, membership in self.dataset:
            img_normalized = self.normalize(img)
            self.imgs.append(img_normalized)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        id_, img, label, membership = self.dataset[idx]
        img = self.imgs[idx]
        return id_, img, label, membership


privData = NormalizedDataset(privData)
pubData = NormalizedDataset(pubData)

In [None]:
member_images = []
non_member_images = []

for id_, image, label, membership in pubData:
    if membership == 1:
        member_images.append((id_, image, label, 1))
    elif membership == 0:
        non_member_images.append((id_, image, label, 0))

memberData = MembershipDataset()
memberData.ids, memberData.imgs, memberData.labels, memberData.membership = zip(*member_images)

nonMemberData = MembershipDataset()
nonMemberData.ids, nonMemberData.imgs, nonMemberData.labels, nonMemberData.membership = zip(*non_member_images)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def evaluateModel(dataset, model, isPriv):


    model.eval()
    model.to(device)

    correct = 0
    total = 0

    if isPriv:
      data_loader = DataLoader(dataset, batch_size=32, shuffle=False, collate_fn=custom_collate)


      with torch.no_grad():
        for _, images, labels in data_loader:

            images, labels = images.to(device), labels.to(device)

            outputs = model(images)

            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)

            correct += (predicted == labels).sum().item()
    else:
      data_loader = DataLoader(dataset, batch_size=32, shuffle=False)

      with torch.no_grad():
        for _, images, labels,_ in data_loader:

            images, labels = images.to(device), labels.to(device)

            outputs = model(images)

            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)

            correct += (predicted == labels).sum().item()

    accuracy = correct / total

    print(f'Accuracy on privData: {accuracy:.2%}')


In [None]:
def initialize_shadow_model(num_classes=44):
    shadow_model = resnet18(pretrained=False)
    shadow_model.fc = nn.Linear(512, num_classes)
    return shadow_model.to(device)

In [None]:
def train_model(model, data_loader, epochs=20):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = torch.nn.CrossEntropyLoss()
    model.train()
    for epoch in range(epochs):
        for _, images, labels, _ in data_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}, Loss: {loss.item()}")
        evaluateModel(memberData, member_shadow_model, False)

In [None]:
numModels=5
models = []
for i in range(numModels):
  member_shadow_model = initialize_shadow_model()
  member_loader = DataLoader(memberData, batch_size=64, shuffle=True)
  train_model(member_shadow_model, member_loader)

  models.append(member_shadow_model)


Epoch 1, Loss: 2.811387062072754
Accuracy on privData: 44.78%
Epoch 2, Loss: 1.0640230178833008
Accuracy on privData: 47.95%
Epoch 3, Loss: 1.2688685655593872
Accuracy on privData: 51.74%
Epoch 4, Loss: 0.9406952857971191
Accuracy on privData: 55.87%
Epoch 5, Loss: 1.0398545265197754
Accuracy on privData: 60.16%
Epoch 6, Loss: 1.484991192817688
Accuracy on privData: 62.35%
Epoch 7, Loss: 1.322201132774353
Accuracy on privData: 63.93%
Epoch 8, Loss: 1.442322015762329
Accuracy on privData: 67.40%
Epoch 9, Loss: 0.5353475213050842
Accuracy on privData: 66.01%
Epoch 10, Loss: 1.3168702125549316
Accuracy on privData: 68.72%
Epoch 11, Loss: 0.9807019829750061
Accuracy on privData: 70.75%
Epoch 12, Loss: 0.8567749261856079
Accuracy on privData: 70.81%
Epoch 13, Loss: 0.8695420026779175
Accuracy on privData: 71.63%
Epoch 14, Loss: 0.8971313238143921
Accuracy on privData: 75.67%
Epoch 15, Loss: 0.37715262174606323
Accuracy on privData: 78.34%
Epoch 16, Loss: 0.8136234879493713
Accuracy on privD

In [None]:
# member_shadow_model = initialize_shadow_model()

# member_loader = DataLoader(memberData, batch_size=64, shuffle=True)

# train_model(member_shadow_model, member_loader)

In [None]:
# evaluateModel(memberData, model, False)

In [None]:
#Save shadow model weights
# torch.save(models[0].state_dict(), "shadow1.pt")
# torch.save(models[1].state_dict(), "shadow2.pt")
# torch.save(models[2].state_dict(), "shadow3.pt")
# torch.save(models[3].state_dict(), "shadow4.pt")
# torch.save(models[4].state_dict(), "shadow5.pt")

In [None]:
evaluateModel(nonMemberData, models[0], False)

Accuracy on privData: 60.68%


In [None]:
def extract_logits(dataset, model,isPriv):

    model.eval()

    logits_list = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    if isPriv:
      data_loader = DataLoader(dataset, batch_size=32, shuffle=False, collate_fn=custom_collate)

      with torch.no_grad():
          for _, images, labels in data_loader:

              images = images.to(device)


              logits = model(images)

              logits_np = logits.cpu().numpy()

              logits_list.append(logits_np)

      logits_array = np.concatenate(logits_list)

      return logits_array

    else:

      data_loader = DataLoader(dataset, batch_size=32, shuffle=False)

      with torch.no_grad():
          for _, images, labels,_ in data_loader:

              images = images.to(device)

              logits = model(images)

              logits_np = logits.cpu().numpy()

              logits_list.append(logits_np)

      logits_array = np.concatenate(logits_list)

      return logits_array

In [None]:
allPubLogits = []
for mo in models:
  pubLogits = extract_logits(pubData, mo,False)
  allPubLogits.append(pubLogits)

logits = np.concatenate([allPubLogits[0],allPubLogits[1],allPubLogits[2],allPubLogits[3],allPubLogits[4]],axis=0)

In [None]:
logits.shape

(100000, 44)

In [None]:
membershipList = np.concatenate([pubData.membership,pubData.membership,pubData.membership,pubData.membership,pubData.membership],axis=0)
print(membershipList.shape)
print(len(pubData.membership))

(100000,)
20000


In [None]:
# pub_logits = extract_logits(pubData, member_shadow_model,False)

In [None]:
# pub_logits

In [None]:
# pub_logits.shape

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch.nn.functional as F

membership_array = np.array(pubData.membership)

# probabilities = F.softmax(torch.tensor(pub_logits), dim=1).numpy()

# X_train, X_test, y_train, y_test = train_test_split(pub_logits, membership_array, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(logits, membershipList, test_size=0.2, random_state=42)

log_reg_model = LogisticRegression(max_iter=1000)
log_reg_model.fit(X_train, y_train)

y_pred_proba = log_reg_model.predict_proba(X_test)[:, 1]  # Probability of class 1 (membership)

y_pred = log_reg_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")

print("Predicted probabilities:", y_pred_proba)

Accuracy: 0.5329
Predicted probabilities: [0.4512156  0.53838541 0.48327602 ... 0.4522738  0.50330504 0.48295818]


In [None]:
from sklearn.metrics import roc_auc_score, roc_curve

# Calculate AUC score
auc_score = roc_auc_score(y_test, y_pred_proba)
print("AUC Score:", auc_score)

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)

# Find TPR at FPR = 0.05
target_fpr = 0.05
closest_index = np.argmin(np.abs(fpr - target_fpr))
tpr_at_target_fpr = tpr[closest_index]

print(f"TPR at FPR = {target_fpr}: {tpr_at_target_fpr}")

AUC Score: 0.5418909506263228
TPR at FPR = 0.05: 0.06328600405679513


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
from sklearn.model_selection import cross_val_score

# Initialize and train KNN model
knn_model = KNeighborsClassifier()
knn_cv_accuracy = cross_val_score(knn_model, X_train, y_train, cv=5, scoring='accuracy').mean()
print("KNN Cross-Validation Accuracy:", knn_cv_accuracy)

knn_model.fit(X_train, y_train)

y_pred_proba_knn = knn_model.predict_proba(X_test)[:, 1]

accuracy_knn = knn_model.score(X_test, y_test)
print("KNN Accuracy:", accuracy_knn)

auc_score_knn = roc_auc_score(y_test, y_pred_proba_knn)
print("KNN AUC Score:", auc_score_knn)

fpr_knn, tpr_knn, thresholds_knn = roc_curve(y_test, y_pred_proba_knn)

target_fpr = 0.05
closest_index_knn = np.argmin(np.abs(fpr_knn - target_fpr))
tpr_at_target_fpr_knn = tpr_knn[closest_index_knn]
print(f"KNN TPR at FPR = {target_fpr}: {tpr_at_target_fpr_knn}")

# Initialize and train Random Forest model
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

y_pred_proba_rf = rf_model.predict_proba(X_test)[:, 1]

accuracy_rf = rf_model.score(X_test, y_test)
print("Random Forest Accuracy:", accuracy_rf)

auc_score_rf = roc_auc_score(y_test, y_pred_proba_rf)
print("Random Forest AUC Score:", auc_score_rf)

fpr_rf, tpr_rf, thresholds_rf = roc_curve(y_test, y_pred_proba_rf)

closest_index_rf = np.argmin(np.abs(fpr_rf - target_fpr))
tpr_at_target_fpr_rf = tpr_rf[closest_index_rf]
print(f"Random Forest TPR at FPR = {target_fpr}: {tpr_at_target_fpr_rf}")

# Initialize and train LDA model
lda_model = LinearDiscriminantAnalysis()
lda_model.fit(X_train, y_train)

y_pred_proba_lda = lda_model.predict_proba(X_test)[:, 1]

accuracy_lda = lda_model.score(X_test, y_test)
print("LDA Accuracy:", accuracy_lda)

auc_score_lda = roc_auc_score(y_test, y_pred_proba_lda)
print("LDA AUC Score:", auc_score_lda)

fpr_lda, tpr_lda, thresholds_lda = roc_curve(y_test, y_pred_proba_lda)

# Find TPR at FPR = 0.05
closest_index_lda = np.argmin(np.abs(fpr_lda - target_fpr))
tpr_at_target_fpr_lda = tpr_lda[closest_index_lda]
print(f"LDA TPR at FPR = {target_fpr}: {tpr_at_target_fpr_lda}")


KNN Cross-Validation Accuracy: 0.5385624999999999
KNN Accuracy: 0.5437
KNN AUC Score: 0.5599606722917692
KNN TPR at FPR = 0.05: 0.05354969574036511
Random Forest Accuracy: 0.55605
Random Forest AUC Score: 0.578261169189161
Random Forest TPR at FPR = 0.05: 0.08924949290060852
LDA Accuracy: 0.5331
LDA AUC Score: 0.541954708122792
LDA TPR at FPR = 0.05: 0.06308316430020285


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split


class AttackNN(nn.Module):
    def __init__(self, input_dim):
        super(AttackNN, self).__init__()
        self.layer1 = nn.Linear(input_dim, 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = self.layer3(x)
        x = self.sigmoid(x)
        return x

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

input_dim = X_train.shape[1]
NNmodel = AttackNN(input_dim)
criterion = nn.BCELoss()
optimizer = optim.Adam(NNmodel.parameters(), lr=0.001)

num_epochs = 200
for epoch in range(num_epochs):
    NNmodel.train()
    optimizer.zero_grad()
    outputs = NNmodel(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

NNmodel.eval()
with torch.no_grad():
    y_pred_proba_nn = NNmodel(X_test_tensor).numpy().flatten()

y_pred_nn = (y_pred_proba_nn > 0.5).astype(int)
accuracy_nn = accuracy_score(y_test, y_pred_nn)
print(f"Neural Network Accuracy: {accuracy_nn:.4f}")

auc_score_nn = roc_auc_score(y_test, y_pred_proba_nn)
print(f"Neural Network AUC Score: {auc_score_nn:.4f}")

fpr_nn, tpr_nn, thresholds_nn = roc_curve(y_test, y_pred_proba_nn)

target_fpr = 0.05
closest_index_nn = np.argmin(np.abs(fpr_nn - target_fpr))
tpr_at_target_fpr_nn = tpr_nn[closest_index_nn]
print(f"Neural Network TPR at FPR = {target_fpr}: {tpr_at_target_fpr_nn:.4f}")


Epoch [10/200], Loss: 0.6946
Epoch [20/200], Loss: 0.6931
Epoch [30/200], Loss: 0.6921
Epoch [40/200], Loss: 0.6912
Epoch [50/200], Loss: 0.6904
Epoch [60/200], Loss: 0.6895
Epoch [70/200], Loss: 0.6886
Epoch [80/200], Loss: 0.6876
Epoch [90/200], Loss: 0.6866
Epoch [100/200], Loss: 0.6855
Epoch [110/200], Loss: 0.6846
Epoch [120/200], Loss: 0.6836
Epoch [130/200], Loss: 0.6826
Epoch [140/200], Loss: 0.6817
Epoch [150/200], Loss: 0.6806
Epoch [160/200], Loss: 0.6797
Epoch [170/200], Loss: 0.6788
Epoch [180/200], Loss: 0.6778
Epoch [190/200], Loss: 0.6772
Epoch [200/200], Loss: 0.6762
Neural Network Accuracy: 0.5551
Neural Network AUC Score: 0.5837
Neural Network TPR at FPR = 0.05: 0.0904


In [None]:
priv_logits = extract_logits(privData, model,True)

In [None]:
priv_logits

array([[-20.503178 ,  -9.192432 ,  -2.731459 , ..., -17.683952 ,
        -18.533136 , -23.640701 ],
       [-19.986586 ,  -7.2347794,  -4.976416 , ..., -16.598358 ,
        -17.221487 , -18.416847 ],
       [-11.773463 , -15.171167 , -11.9648485, ..., -12.894601 ,
        -15.651463 , -11.394891 ],
       ...,
       [ -6.6044464,  -8.0064   , -18.3869   , ...,   8.628861 ,
        -17.67     , -16.866112 ],
       [-13.685062 , -18.500702 , -25.551348 , ...,   4.6387224,
        -17.442503 , -18.056301 ],
       [ -9.8048525, -12.730086 , -22.050993 , ...,   2.183865 ,
        -18.27964  , -16.122305 ]], dtype=float32)

In [None]:
# Predict membership probabilities
# Extracting for random forest
priv_scores1 = rf_model.predict_proba(priv_logits)[:, 1]

#Extracting for Logistic Regression
priv_scores2 = log_reg_model.predict_proba(priv_logits)[:, 1]

#Extracting for LDA
priv_scores3 = lda_model.predict_proba(priv_logits)[:, 1]


#Extracting for NN
NNmodel.eval()
with torch.no_grad():
    priv_scores4 = NNmodel(X_test_tensor).numpy().flatten()

In [None]:
priv_scores1

array([0.5 , 0.53, 0.63, ..., 0.58, 0.57, 0.54])

In [None]:

#### EXAMPLE SUBMISSION

df = pd.DataFrame(
    {
        "ids": privData.ids,
        "score": priv_scores2,
    }
)
df.to_csv("check14.csv", index=None)

In [None]:
import requests

response = requests.post("http://34.71.138.79:9090/mia", files={"file": open("check14.csv", "rb")}, headers={"token": "76282151"})
print(response.json())

{'detail': 'Exceeded submissions. Only 1/h allowed.'}
