In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
from taskdataset import TaskDataset
import matplotlib.pyplot as plt
import os
import time
import pickle as pkl
from tqdm import tqdm
import argparse

from t2_functions import train, validate, l1_reg, lin_augment_affine

class RepresentationsDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        x, y = self.x[idx], self.y[idx]
        return torch.tensor(x), torch.tensor(y)

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
class Linear(nn.Module):
    def __init__(self, input_size, output_size):
        super(Linear, self).__init__()
        self.fc1 = nn.Linear(input_size, output_size)
    
    def forward(self, x):
        x = self.fc1(x)
        return x


criterion = nn.MSELoss()
batch_size = 32
epochs = 1
lr = 0.001

preds = []
ids = []

for i in range(10):
    print(f"big loop: {i}")
    folder = os.path.join('./data', 'submit', 'affine', str(1), f'partition_{i}')
    with open(f'{folder}/A_train', 'rb') as f:
        A_train_reps = pkl.load(f)
    with open(f'{folder}/B_train', 'rb') as f:
        B_train_reps = pkl.load(f)
    with open(f'{folder}/B_test', 'rb') as f:
        B_test_reps = pkl.load(f)
    with open(f'{folder}/ids_train', 'rb') as f:
        ids_train = pkl.load(f)
    with open(f'{folder}/ids_test', 'rb') as f:
        ids_test = pkl.load(f)

    A_train_aug = torch.tensor(A_train_reps, dtype=torch.float32)
    B_train_aug = torch.tensor(B_train_reps, dtype=torch.float32)
    A_train_aug, B_train_aug = lin_augment_affine(A_train_aug, B_train_aug, new_for_each_pair=1)

    train_dataset = RepresentationsDataset(x=A_train_aug, y=B_train_aug)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    lin_net = Linear(384, 384)
    lin_empty_net = Linear(384, 384)

    optim = torch.optim.Adam(lr=lr, params=lin_net.parameters())

    lin_last_net, lin_best_net = train(epochs, optim, criterion, l1_reg, train_loader, train_loader, lin_net, lin_empty_net, reg_lambda=0.0001)

    B_reps = np.concatenate([B_train_reps, B_test_reps], axis=0)
    B_reps = torch.tensor(B_reps, dtype=torch.float32)

    A_preds = lin_last_net(B_reps)
    A_preds = A_preds.detach().numpy()
    A_preds[:200] = np.array(A_train_reps)
    
    preds.append(A_preds)

    # to delete
    # ids_train = np.arange(B_train_reps.shape[0])
    # ids_test = np.arange(B_test_reps.shape[0])

    ids_iter = np.concatenate([ids_train, ids_test], axis=0)
    ids.append(ids_iter)

    # Convert the concatenated tensor to a NumPy array

preds = np.concatenate(preds, axis=0)
ids = np.concatenate(ids, axis=0)

id_rep_map = {id: rep for id, rep in zip(ids, preds)}

dataset = torch.load("./data/SybilAttack.pt")

representations = []
for id in dataset.ids:
    if id in id_rep_map:
        representations.append(id_rep_map[id])
    else:
        representations.append(preds[0])

np.savez("./task2_submission.npz", ids=dataset.ids, representations=representations)
    


big loop: 0


200it [00:04, 49.61it/s]
  from .autonotebook import tqdm as notebook_tqdm
train | loss: 0.9587: 100%|██████████| 1250/1250 [00:08<00:00, 153.19it/s]


Epoch [1/1], Train Loss: 1.0083
Epoch [1/1], Val Loss: 0.7485
big loop: 1


200it [00:03, 55.29it/s]
train | loss: 0.9280: 100%|██████████| 1250/1250 [00:09<00:00, 138.07it/s]


Epoch [1/1], Train Loss: 1.0191
Epoch [1/1], Val Loss: 0.7571
big loop: 2


200it [00:03, 51.66it/s]
train | loss: 0.9496: 100%|██████████| 1250/1250 [00:07<00:00, 178.20it/s]


Epoch [1/1], Train Loss: 1.0871
Epoch [1/1], Val Loss: 0.8109
big loop: 3


200it [00:03, 54.39it/s]
train | loss: 0.8933: 100%|██████████| 1250/1250 [00:08<00:00, 139.34it/s]


Epoch [1/1], Train Loss: 1.0903
Epoch [1/1], Val Loss: 0.8232
big loop: 4


200it [00:03, 52.08it/s]
train | loss: 1.0903: 100%|██████████| 1250/1250 [00:06<00:00, 179.77it/s]


Epoch [1/1], Train Loss: 1.0838
Epoch [1/1], Val Loss: 0.8077
big loop: 5


200it [00:03, 50.52it/s]
train | loss: 0.8738: 100%|██████████| 1250/1250 [00:08<00:00, 156.18it/s]


Epoch [1/1], Train Loss: 1.0946
Epoch [1/1], Val Loss: 0.8140
big loop: 6


200it [00:03, 55.12it/s]
train | loss: 1.0584: 100%|██████████| 1250/1250 [00:08<00:00, 145.37it/s]


Epoch [1/1], Train Loss: 1.0863
Epoch [1/1], Val Loss: 0.7987
big loop: 7


200it [00:02, 70.05it/s]
train | loss: 0.9271: 100%|██████████| 1250/1250 [00:08<00:00, 150.75it/s]


Epoch [1/1], Train Loss: 1.1018
Epoch [1/1], Val Loss: 0.8343
big loop: 8


200it [00:03, 50.46it/s]
train | loss: 0.7240: 100%|██████████| 1250/1250 [00:07<00:00, 168.75it/s]


Epoch [1/1], Train Loss: 0.9175
Epoch [1/1], Val Loss: 0.6545
big loop: 9


200it [00:03, 53.03it/s]
train | loss: 0.6899: 100%|██████████| 1250/1250 [00:07<00:00, 170.97it/s]


Epoch [1/1], Train Loss: 1.0177
Epoch [1/1], Val Loss: 0.7533


KeyError: 101031

In [2]:
representations = []
for i, id in enumerate(dataset.ids):
    if id in id_rep_map:
        representations.append(id_rep_map[id])
    else:
        representations.append(preds[i])

np.savez("./task2_submission.npz", ids=dataset.ids, representations=representations)
    