In [1]:
import numpy as np
import pandas as pd

In [2]:
import consts
import util

In [3]:
dataset = pd.read_csv('data/arc_red_zero.csv', index_col=0)

possible_target_cols = consts.POSSIBLE_TARGET_COLS
# target_cols = consts.TARGET_COLS
target_cols = possible_target_cols

# feature_cols = dataset.columns[possible_target_cols.shape[0]:possible_target_cols.shape[0]+consts.ZERO_NUM_FEATURE_COLS_999]
feature_cols = dataset.columns[possible_target_cols.shape[0]:]
feature_cols

positive_dataset = dataset.loc[(dataset[target_cols] != 0).any(axis=1)]
features = positive_dataset[feature_cols].apply(util.normalize_column, axis=0).fillna(0).values
labels = positive_dataset[target_cols].values

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset

from hypll import nn as hnn
from hypll.tensors import TangentTensor

# Define your MLP model
class MLP(nn.Module):
    def __init__(self, input_size, output_size, layer_size, num_hidden_layers, manifold):
        super(MLP, self).__init__()
        torch.manual_seed(consts.TORCH_MANUAL_SEED)
        self.fc_in = hnn.HLinear(input_size, layer_size, manifold=manifold)
        self.relu = hnn.HReLU(manifold=manifold)
        self.hidden_fcs = nn.ModuleList([hnn.HLinear(layer_size, layer_size, manifold=manifold) for _ in range(num_hidden_layers)])
        self.fc_out = hnn.HLinear(layer_size, output_size, manifold=manifold)
        # self.sigmoid = nn.Sigmoid()  # Sigmoid activation for multi-label classification

    def forward(self, x):
        x = self.fc_in(x)
        x = self.relu(x)
        for fc in self.hidden_fcs:
            x = fc(x)
            x = self.relu(x)
        x = self.fc_out(x)
        # x = self.sigmoid(x)  # Sigmoid activation for multi-label classification
        return x

# Define custom PyTorch dataset
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Define training function
def train_model(model, train_loader, criterion, optimizer, manifold, device):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()

        tangents = TangentTensor(data=inputs, man_dim=-1, manifold=manifold)
        manifold_inputs = manifold.expmap(tangents)

        outputs = model(manifold_inputs)

        loss = criterion(outputs.tensor, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(train_loader.dataset)

In [29]:
# BAD DATA
# tried_param_grid = {
#               'num_hidden_layers': [0,1,2,3],
#               'layer_size': [64, 128, 256, 384],
#               'weight_decay': [0, 0.0001, 0.001, 0.01],
#               'lr': [0.001, 0.002, 0.003, 0.005, 0.01],
#               'batch_size': [16, 32, 64, 48, 64, 80, 96 ,128],
#               'BCE_weights_power': [0.5, 0.65, 0.8],
#               'BCE_weights_factor': [1.2, 1.6, 2],
#               'epochs': [100, 200, 400],
#               'curvature': [-1, -2]}

# best_params = {'num_hidden_layers': [2],
#               'layer_size': [256],
#               'lr': [0.001],
#               'weight_decay': [0.001],
#               'batch_size': [48, 64, 80],
#               'BCE_weights_power': [0.5],
#               'BCE_weights_factor': [1.2],
#               'epochs': [400],
#               'curvature': [-1]}

param_grid = {'num_hidden_layers': [1],
              'layer_size': [384],
              'lr': [0.001],
              'weight_decay': [0.005, 0.01],
              'batch_size': [64, 80, 96],
              'BCE_weights_power': [0.5],
              'BCE_weights_factor': [1.2],
              'epochs': [100],
              'curvature': [-1]}

best_params = {'num_hidden_layers': [0, 1],
              'layer_size': [128, 256],
              'lr': [0.001],
              'weight_decay': [0.01],
              'batch_size': [64],
              'BCE_weights_power': [0.5],
              'BCE_weights_factor': [1.2],
              'epochs': [80],
              'curvature': [-1]}


np.prod([len(p) for p in param_grid.values()])

72

In [30]:
import itertools
param_combinations = list(itertools.product(*param_grid.values()))

In [31]:
combination_stats = {params: [] for params in param_combinations}

In [32]:
from hypll.manifolds.poincare_ball import Curvature, PoincareBall

from hypll.optim import RiemannianAdam
from torch.utils.data import DataLoader
from sklearn.model_selection import KFold
from tqdm.notebook import tqdm

THRESHOLD = 0.6

for i, params in enumerate(param_combinations):
    print(f'----- Combination {i} -----')
    print(*zip(param_grid.keys(), params))
    num_hidden_layers, layer_size, lr, weight_decay, batch_size, BCE_weights_power, BCE_weights_factor, epochs, curvature = params

    manifold = PoincareBall(c=Curvature(curvature))

    METRICS = ['acc', 'hamm', 'prec', 'sens', 'f1', 'aps']

    eval_stats = {'loss': {'train': [], 'val': []}}
    for metric in METRICS:
        eval_stats[metric] = {'train': [],
                            'val': []}

    # Initialize cross-validation
    kf = KFold(n_splits=consts.KFOLDS, shuffle=True, random_state=consts.KFOLD_SEED)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Cross-validation loop
    for fold, (train_idx, val_idx) in enumerate(kf.split(features, labels)):
        print(f'Fold {fold + 1}')

        # Split data into train and validation sets
        train_features, val_features = features[train_idx], features[val_idx]
        train_labels, val_labels = labels[train_idx], labels[val_idx]

        # Create DataLoader for training and validation
        train_dataset = CustomDataset(train_features, train_labels)
        val_dataset = CustomDataset(val_features, val_labels)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)

        # Initialize model, criterion, and optimizer
        model = MLP(input_size=len(feature_cols), output_size=len(target_cols),
                    layer_size=layer_size, num_hidden_layers=num_hidden_layers, manifold=manifold).to(device)
        # criterion = nn.BCELoss()
        possible_label_occurences = torch.tensor(positive_dataset[possible_target_cols].mean())
        indices = np.where(np.isin(possible_target_cols, target_cols))[0]
        label_occurences = possible_label_occurences[indices]

        bce_positive_weights = util.calc_bce_positive_weights(label_occurences, BCE_weights_power, BCE_weights_factor)

        criterion = nn.BCEWithLogitsLoss(pos_weight=bce_positive_weights)
        optimizer = RiemannianAdam(model.parameters(), lr=lr, weight_decay=weight_decay)

        # Training loop
        eval_stats_fold = {'loss': {'train': [], 'val': []}}
        for metric in METRICS:
            eval_stats_fold[metric] = {'train': [],
                                'val': []}

        # for epoch in tqdm(range(epochs), desc="Epoch"):
        for epoch in range(epochs):
            eval_stats_fold['loss']['train'].append(train_model(model, train_loader, criterion, optimizer, manifold, device))
            eval_stats_fold['loss']['val'].append(util.h_evaluate_loss(model, val_loader, criterion, manifold, device))

            # Calculate and store specuracy
            # accuracy, hamm, precision, sensitivity, f1, aps
            eval_train = util.h_evaluate_metrics(model, train_loader, manifold, device, THRESHOLD)
            eval_val = util.h_evaluate_metrics(model, val_loader, manifold, device, THRESHOLD)
            for i, metric in enumerate(METRICS):
                eval_stats_fold[metric]['train'].append(eval_train[i])
                eval_stats_fold[metric]['val'].append(eval_val[i])

            # print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {eval_stats_fold['loss']['train'][-1]:.4f}, Val Loss: {eval_stats_fold['loss']['val'][-1]:.4f}")

        for metric in METRICS:
            stat = eval_stats_fold[metric]['val'][-1]
            if type(stat) == np.ndarray:
                stat = list(stat)

            print(f'{metric}:\t', stat)

        eval_stats['loss']['train'].append(eval_stats_fold['loss']['train'])
        eval_stats['loss']['val'].append(eval_stats_fold['loss']['val'])
        for metric in METRICS:
            eval_stats[metric]['train'].append(eval_stats_fold[metric]['train'])
            eval_stats[metric]['val'].append(eval_stats_fold[metric]['val'])

    combination_stats[params] = eval_stats

----- Combination 0 -----
('num_hidden_layers', 1) ('layer_size', 256) ('lr', 0.001) ('weight_decay', 0.005) ('batch_size', 64) ('BCE_weights_power', 0.5) ('BCE_weights_factor', 1.2) ('epochs', 100) ('curvature', -1)
Fold 1
acc:	 0.16317991631799164
hamm:	 0.14905857740585773
prec:	 0.6497890295358649
sens:	 0.43258426966292135
f1:	 0.5193929173693086
aps:	 [0.7287700548609101, 0.7101141064575602, 0.5212652862764183, 0.5675524809702699, 0.6390953892660285, 0.4020719644282947, 0.3393797116919195, 0.2310377002629741, 0.4406630141373707, 0.39183785839001656, 0.5577166509856268, 0.1983516501990804]
Fold 2
acc:	 0.19665271966527198
hamm:	 0.1448744769874477
prec:	 0.6224350205198358
sens:	 0.4504950495049505
f1:	 0.5226881102814475
aps:	 [0.7035810342105588, 0.7503956825073579, 0.4196825276532822, 0.6311929247958291, 0.49735756738601855, 0.5158639488101987, 0.3558111331204143, 0.2221445170064073, 0.275791122796497, 0.36855120353523324, 0.5897404707807454, 0.11812482450627831]
Fold 3
acc:	 0

In [17]:
import ast

In [33]:
grid_search_results = []
with open('data/grid_search_arc_all_smells4.txt', 'r') as file:
    inlist = 0
    for line in file.readlines():
        if line[0:4] == '----':
            grid_search_results.append([])
            continue

        if line[0] == '(':
            continue

        if line[0:4] == 'Fold':
            grid_search_results[-1].append([])
            continue

        result = line.split('\t ')[1]

        if result[0] == '[':
            grid_search_results[-1][-1].append(np.array(ast.literal_eval(result)).mean())
            continue

        grid_search_results[-1][-1].append(float(result))


grid_search_results = np.array(grid_search_results)
grid_search_results.shape

(72, 5, 6)

In [34]:
grid_search_results.mean(axis=1)

array([[0.18158996, 0.14459554, 0.63165408, 0.45565672, 0.52909493,
        0.47306595],
       [0.17405858, 0.1460251 , 0.6257321 , 0.45297812, 0.52524659,
        0.4743857 ],
       [0.17322176, 0.14630404, 0.62575032, 0.44884695, 0.5225383 ,
        0.47336012],
       [0.17238494, 0.14473501, 0.63913534, 0.43460714, 0.5172219 ,
        0.4693919 ],
       [0.17112971, 0.1443166 , 0.64076115, 0.43590334, 0.51868559,
        0.46971459],
       [0.17029289, 0.14438633, 0.6425047 , 0.4308907 , 0.51562155,
        0.47074924],
       [0.14016736, 0.14853556, 0.6612227 , 0.34504327, 0.45308793,
        0.42547338],
       [0.14016736, 0.14916318, 0.66138913, 0.33741064, 0.44649599,
        0.4283092 ],
       [0.14100418, 0.14874477, 0.66535798, 0.33600649, 0.44624092,
        0.42997328],
       [0.10962343, 0.15857741, 0.62374895, 0.28490981, 0.39057824,
        0.33123475],
       [0.11129707, 0.15781032, 0.63302435, 0.27987493, 0.38743117,
        0.33833574],
       [0.10920502, 0

In [35]:
res_acc = grid_search_results.mean(axis=1)[:,0]
res_hamm = grid_search_results.mean(axis=1)[:,1]
res_prec = grid_search_results.mean(axis=1)[:,2]
res_sens = grid_search_results.mean(axis=1)[:,3]
res_f1 = grid_search_results.mean(axis=1)[:,4]
res_aps = grid_search_results.mean(axis=1)[:,5]

In [36]:
rank_acc = (-res_acc).argsort()
rank_hamm = (res_hamm).argsort()
rank_prec = (-res_prec).argsort()
rank_sens = (-res_sens).argsort()
rank_f1 = (-res_f1).argsort()
rank_aps = (-res_aps).argsort()

In [41]:
rank_sums = np.array([np.where(rank_acc == i)[0][0] +
             np.where(rank_hamm == i)[0][0] +
             np.where(rank_prec == i)[0][0] +
             np.where(rank_sens == i)[0][0] +
             np.where(rank_f1 == i)[0][0] +
             np.where(rank_aps == i)[0][0]
              for i in range(rank_acc.shape[0])])



print(rank_sums[:36][:12])
print(rank_sums[:36][12:24])
print(rank_sums[:36][24:])
print()
print(rank_sums[36:][:12])
print(rank_sums[36:][12:24])
print(rank_sums[36:][24:])
print()
print(rank_sums)

[ 35  57  74  99  91 100 204 224 216 333 316 319]
[211 141 171 130  97  69 280 276 254 378 384 342]
[203 208 169 166 175 158 301 270 265 378 392 383]

[ 54  42  77  52  53  52 219 201 208 326 311 313]
[166 193 172 115  97  57 266 236 234 387 354 341]
[194 179 177 158 151 109 313 281 252 405 371 351]

[ 35  57  74  99  91 100 204 224 216 333 316 319 211 141 171 130  97  69
 280 276 254 378 384 342 203 208 169 166 175 158 301 270 265 378 392 383
  54  42  77  52  53  52 219 201 208 326 311 313 166 193 172 115  97  57
 266 236 234 387 354 341 194 179 177 158 151 109 313 281 252 405 371 351]


In [75]:
param_grid = {'num_hidden_layers': [1],
              'layer_size': [384],
              'lr': [0.001],
              'weight_decay': [0.005, 0.01],
              'batch_size': [64, 80, 96],
              'BCE_weights_power': [0.5],
              'BCE_weights_factor': [1.2],
              'epochs': [100],
              'curvature': [-1]}

In [None]:
----- Combination 0 -----
('num_hidden_layers,layer_size', (1, 128)), ('lr', 0.002), ('batch_size', 64), ('BCE_weights_power', 0.5), ('BCE_weights_factor', 1.2), ('epochs', 200), ('curvature', -1)
Fold 1
acc:	 0.10743801652892562
hamm:	 0.27508854781582054
sens:	 0.5251396648044693
f1:	 0.4465558194774346
Fold 2

In [30]:
ast.literal_eval("(('num_hidden_layers,layer_size', (1, 128)), ('lr', 0.002), ('batch_size', 64), ('BCE_weights_power', 0.5), ('BCE_weights_factor', 1.2), ('epochs', 200), ('curvature', -1))")

(('num_hidden_layers,layer_size', (1, 128)),
 ('lr', 0.002),
 ('batch_size', 64),
 ('BCE_weights_power', 0.5),
 ('BCE_weights_factor', 1.2),
 ('epochs', 200),
 ('curvature', -1))

In [61]:
ast.literal_eval

grid_search_results = []
with open('grid_search_all.txt', 'r') as file:
    for line in file.readlines():
        if line[0:4] == '----':
            grid_search_results.append([])
            continue

        if line[0] == '(':
            params = ast.literal_eval(line)
            nested = [list(zip(key.split(','), np.array(val).flatten())) for (key, val) in params]
            unnested = [item for sublist in nested for item in sublist]
            grid_search_results[-1].append({key: val for (key, val) in unnested})
            continue

        if line[0:4] == 'Fold':
            grid_search_results[-1].append([])
            continue

        result = line.split('\t ')[1]

        if result[0] == '[':
            grid_search_results[-1][-1].append(np.array(ast.literal_eval(result)).mean())
            continue

        grid_search_results[-1][-1].append(float(line.split('\t ')[1]))

grid_search_results = [(params, np.array([fd1, fd2, fd3, fd4, fd5]).mean(axis=0)) for (params, fd1, fd2, fd3, fd4, fd5) in grid_search_results]
# grid_search_results = np.array(grid_search_results)
# grid_search_results.shape

In [62]:
grid_search_results

[({'num_hidden_layers': 1,
   'layer_size': 128,
   'lr': 0.002,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 200,
   'curvature': -1},
  array([0.07839368, 0.29219214, 0.57370791, 0.46126235])),
 ({'num_hidden_layers': 1,
   'layer_size': 128,
   'lr': 0.002,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.6,
   'epochs': 200,
   'curvature': -1},
  array([0.03863903, 0.33448244, 0.66740088, 0.46599431])),
 ({'num_hidden_layers': 1,
   'layer_size': 128,
   'lr': 0.002,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 2,
   'epochs': 200,
   'curvature': -1},
  array([0.01766738, 0.36713675, 0.71054298, 0.45833707])),
 ({'num_hidden_layers': 1,
   'layer_size': 128,
   'lr': 0.002,
   'batch_size': 64,
   'BCE_weights_power': 0.65,
   'BCE_weights_factor': 1.2,
   'epochs': 200,
   'curvature': -1},
  array([0.08004657, 0.30032571, 0.58783675, 0.46114133])),
 ({'num_hidden_layers': 1

In [72]:
sorted(grid_search_results, key=lambda x: -x[1][3])

[({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.002,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.6,
   'epochs': 200,
   'curvature': -1},
  array([0.05633228, 0.3252239 , 0.67656905, 0.47625214])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.14190372, 0.25479267, 0.52407953, 0.47346049])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0,
   'batch_size': 128,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.14190372, 0.25479267, 0.52407953, 0.47346049])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.005,
   'weight_decay': 0,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.141903

In [None]:
# acc
[({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 400,
   'curvature': -1},
  array([0.16512184, 0.24997054, 0.46716364, 0.44948915])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0,
   'batch_size': 48,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 400,
   'curvature': -1},
  array([0.16508379, 0.25290604, 0.45667032, 0.44072786])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0.0001,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 400,
   'curvature': -1},
  array([0.15956958, 0.25321697, 0.48211217, 0.45417767])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0,
   'batch_size': 32,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.15348766, 0.25566456, 0.48358674, 0.45150742])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0,
   'batch_size': 32,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.15348766, 0.25566456, 0.48358674, 0.45150742])),]

In [None]:
# hamm
[({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 400,
   'curvature': -1},
  array([0.16512184, 0.24997054, 0.46716364, 0.44948915])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0,
   'batch_size': 32,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 400,
   'curvature': -1},
  array([0.14963548, 0.25218918, 0.45504483, 0.44098669])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0.0001,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.15239639, 0.25290104, 0.51845695, 0.47253441])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0.0001,
   'batch_size': 128,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.15239639, 0.25290104, 0.51845695, 0.47253441])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.005,
   'weight_decay': 0.0001,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.15239639, 0.25290104, 0.51845695, 0.47253441])),]