In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
from taskdataset import TaskDataset
import matplotlib.pyplot as plt
import os
import time

from t2_functions import partition_ids

In [None]:
import sys

path = os.getcwd()
print(path)
os.chdir('..')
print(os.getcwd())
dataset_t1 = torch.load("task_1_modelstealing/data/ModelStealingPub.pt")

sys.path.append(os.path.join(os.getcwd(), "task_2_sybilattack/"))

from endpoints.requests import sybil, sybil_reset


In [None]:
os.chdir(path)
print(os.getcwd())
dataset = torch.load("data/SybilAttack.pt")

In [None]:
ids = np.array(dataset.ids)
print(len(ids))
print(ids[:10])
binned_ids = partition_ids(ids, main_bin_num=10, train=0.1, test=0.9)

In [None]:
task = 'binary'

sybil_reset(home_or_defense='home', binary_or_affine=task)
sybil_reset(home_or_defense='defense', binary_or_affine=task)


In [None]:
ids_train, ids_test = binned_ids[0]
print(len(ids_train))
print(len(ids_test))
A_train_reps = sybil(ids=ids_train,
                 home_or_defense='home',
                 binary_or_affine=task)

B_train_reps = sybil(ids=ids_train,
                 home_or_defense='defense',
                 binary_or_affine=task)
print(f"A train reps: {len(A_train_reps)}")

A_test_reps = sybil(ids=ids_test,
                 home_or_defense='home',
                 binary_or_affine=task)

B_test_reps = sybil(ids=ids_test,
                 home_or_defense='defense',
                 binary_or_affine=task)
print(f"A test reps: {len(A_test_reps)}")


In [None]:
class RepresentationsDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        x, y = self.x[idx], self.y[idx]
        return torch.tensor(x), torch.tensor(y)


In [None]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
class Linear(nn.Module):
    def __init__(self, input_size, output_size):
        super(Linear, self).__init__()
        self.fc1 = nn.Linear(input_size, output_size)
    
    def forward(self, x):
        x = self.fc1(x)
        return x

In [None]:
def validate(criterion, loader, net):
    net.eval()

    with torch.no_grad():
        true = []
        pred = []
        for x, y in loader:
            y_hat = net(x)
            true.append(y.numpy())
            pred.append(y_hat.numpy())

        true = torch.tensor(np.concatenate(true, axis=0))
        pred = torch.tensor(np.concatenate(pred, axis=0))

        loss = criterion(pred, true)

    return loss


In [None]:
def l1_reg(model, reg_lambda):
    l1_regularization = torch.tensor(0., device=model.parameters().__next__().device)
    for param in model.parameters():
        l1_regularization += torch.norm(param, p=1)
    return reg_lambda * l1_regularization


In [None]:
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

def train(epochs, optim, criterion, regularise, trainloader, valloader, net, empty_net, reg_lambda=None):
    best_val_loss = np.inf
    best_net = empty_net
    writer = SummaryWriter()


    for epoch in range(epochs):  # loop over the dataset multiple times
        train_loss = 0

        progress_bar = tqdm(trainloader)

        for iter, (x, y) in enumerate(progress_bar):
            net.train()
            optim.zero_grad()

            y_hat = net(x)
            loss = criterion(y_hat, y)
            if reg_lambda is not None:
                loss += regularise(net, reg_lambda)
            loss.backward()
            optim.step()

            batch_loss = loss.item()
            train_loss += batch_loss
            if iter % 20 == 0:
                progress_bar.set_description(f"train | loss: {batch_loss:.4f}")

            writer.add_scalar('Training Loss', batch_loss, epoch)

        train_loss /= len(trainloader)
        print(f"Epoch [{epoch + 1}/{epochs}], Train Loss: {train_loss:.4f}")

        net.eval()

        val_loss = validate(criterion, valloader, net)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_net.load_state_dict(net.state_dict())

        if writer is not None:
            # log the validation loss and accuracy
            writer.add_scalar('Validation Loss', val_loss, epoch)

        print(f"Epoch [{epoch + 1}/{epochs}], Val Loss: {val_loss:.4f}")

    return net, best_net



In [None]:
lin_net = Linear(384, 384)
lin_empty_net = Linear(384, 384)

mlp = MLP(384, 384, 384)
empty_mpl = MLP(384, 384, 384)

criterion = nn.MSELoss()
batch_size = 16
epochs = 100
lr = 0.001


In [None]:
train_dataset = RepresentationsDataset(x=A_train_reps, y=B_train_reps)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = RepresentationsDataset(x=A_test_reps, y=B_test_reps)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
optim = torch.optim.Adam(lr=lr, params=lin_net.parameters())

lin_last_net, lin_best_net = train(epochs, optim, criterion, l1_reg, train_loader, test_loader, lin_net, lin_empty_net, reg_lambda=0.0001)

In [None]:
optim = torch.optim.Adam(lr=lr, params=mlp.parameters())

mlp_last_net, mlp_best_net = train(epochs, optim, criterion, l1_reg, train_loader, test_loader, mlp, empty_mpl, reg_lambda=0.001)

In [None]:
test_loss_lin = validate(criterion, test_loader, lin_last_net)
print(f"lin test: {test_loss_lin}")

test_loss_mlp = validate(criterion, test_loader, mlp_last_net)
print(f"lin test: {test_loss_mlp}")
