In [1]:
import numpy as np
import pandas as pd

In [2]:
import consts
import util

In [3]:
dataset = pd.read_csv('data/arc_mini_comb_red.csv', index_col=0)

possible_target_cols = np.array(['Floral', 'Fruity', 'Woody', 'Odourless'])
# target_cols = consts.TARGET_COLS
target_cols = possible_target_cols

# feature_cols = dataset.columns[possible_target_cols.shape[0]:]
feature_cols = dataset.columns[possible_target_cols.shape[0]:possible_target_cols.shape[0]+326]
feature_cols

features = dataset[feature_cols].apply(util.normalize_column, axis=0).fillna(0).values
labels = dataset[target_cols].values

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset

from hypll import nn as hnn
from hypll.tensors import TangentTensor

# Define your MLP model
class MLP(nn.Module):
    def __init__(self, input_size, output_size, layer_size, num_hidden_layers, manifold):
        super(MLP, self).__init__()
        torch.manual_seed(consts.TORCH_MANUAL_SEED)
        self.fc_in = hnn.HLinear(input_size, layer_size, manifold=manifold)
        self.relu = hnn.HReLU(manifold=manifold)
        self.hidden_fcs = nn.ModuleList([hnn.HLinear(layer_size, layer_size, manifold=manifold) for _ in range(num_hidden_layers)])
        self.fc_out = hnn.HLinear(layer_size, output_size, manifold=manifold)

    def forward(self, x):
        x = self.fc_in(x)
        x = self.relu(x)
        for fc in self.hidden_fcs:
            x = fc(x)
            x = self.relu(x)
        x = self.fc_out(x)
        x = F.softmax(x.tensor, dim=1)
        return x

# Define custom PyTorch dataset
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Define training function
def train_model(model, train_loader, criterion, optimizer, manifold, device):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()

        tangents = TangentTensor(data=inputs, man_dim=-1, manifold=manifold)
        manifold_inputs = manifold.expmap(tangents)

        outputs = model(manifold_inputs)

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    return running_loss / len(train_loader.dataset)

In [21]:
param_grid = {'num_hidden_layers': [0,1,2],
              'layer_size': [64, 128],
              'lr': [0.001, 0.002, 0.003],
              'weight_decay': [0.001, 0.005, 0.01],
              'batch_size': [32, 48, 64, 80, 96],
              'BCE_weights_power': [0.5],
              'BCE_weights_factor': [1.2],
              'epochs': [20],
              'curvature': [-1]}

best_grid = {'num_hidden_layers': [0],
              'layer_size': [128],
              'lr': [0.003],
              'weight_decay': [0.01],
              'batch_size': [96],
              'BCE_weights_power': [0.5],
              'BCE_weights_factor': [1.2],
              'epochs': [20],
              'curvature': [-1]}
# 0.002, 48
# 0.003, 96+



np.prod([len(p) for p in param_grid.values()])

270

In [22]:
import itertools
param_combinations = list(itertools.product(*param_grid.values()))

In [23]:
combination_stats = {params: [] for params in param_combinations}

In [24]:
from hypll.manifolds.poincare_ball import Curvature, PoincareBall

from hypll.optim import RiemannianAdam
from torch.utils.data import DataLoader
from sklearn.model_selection import KFold
from tqdm.notebook import tqdm

for i, params in enumerate(param_combinations):
    print(f'----- Combination {i} -----')
    print(*zip(param_grid.keys(), params))
    num_hidden_layers, layer_size, lr, weight_decay, batch_size, BCE_weights_power, BCE_weights_factor, epochs, curvature = params

    manifold = PoincareBall(c=Curvature(curvature))

    METRICS = ['acc', 'hamm', 'prec', 'sens', 'f1', 'aps']

    eval_stats = {'loss': {'train': [], 'val': []}}
    for metric in METRICS:
        eval_stats[metric] = {'train': [],
                            'val': []}

    # Initialize cross-validation
    kf = KFold(n_splits=consts.KFOLDS, shuffle=True, random_state=consts.KFOLD_SEED)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Create an empty list to store parameters
    all_model_params = []
    # Cross-validation loop
    for fold, (train_idx, val_idx) in enumerate(kf.split(features, labels)):
        print(f'Fold {fold + 1}')

        # Split data into train and validation sets
        train_features, val_features = features[train_idx], features[val_idx]
        train_labels, val_labels = labels[train_idx], labels[val_idx]

        # Create DataLoader for training and validation
        train_dataset = CustomDataset(train_features, train_labels)
        val_dataset = CustomDataset(val_features, val_labels)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)

        # Initialize model, criterion, and optimizer
        model = MLP(input_size=len(feature_cols), output_size=len(target_cols), layer_size=layer_size, num_hidden_layers=num_hidden_layers, manifold=manifold).to(device)
        # criterion = nn.BCELoss()

        possible_label_occurences = torch.tensor(dataset[possible_target_cols].mean())
        indices = np.where(np.isin(possible_target_cols, target_cols))[0]
        label_occurences = possible_label_occurences[indices]

        bce_positive_weights = util.calc_bce_positive_weights(label_occurences, BCE_weights_power, BCE_weights_factor)

        criterion = nn.BCEWithLogitsLoss(pos_weight=bce_positive_weights)

        optimizer = RiemannianAdam(model.parameters(), lr=lr, weight_decay=weight_decay)

        # Training loop
        eval_stats_fold = {'loss': {'train': [], 'val': []}}
        for metric in METRICS:
            eval_stats_fold[metric] = {'train': [],
                                        'val': []}

        for epoch in range(epochs):
            eval_stats_fold['loss']['train'].append(train_model(model, train_loader, criterion, optimizer, manifold, device))
            eval_stats_fold['loss']['val'].append(util.h_mini_evaluate_loss(model, val_loader, criterion, manifold, device))

            # Calculate and store specuracy
            # accuracy, hamm, precision, sensitivity, f1, aps
            eval_train = util.h_mini_evaluate_metrics(model, train_loader, manifold, device)
            eval_val = util.h_mini_evaluate_metrics(model, val_loader, manifold, device)
            for i, metric in enumerate(METRICS):
                eval_stats_fold[metric]['train'].append(eval_train[i])
                eval_stats_fold[metric]['val'].append(eval_val[i])

            # print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {eval_stats_fold['loss']['train'][-1]:.4f}, Val Loss: {eval_stats_fold['loss']['val'][-1]:.4f}")

        for metric in METRICS:
            stat = eval_stats_fold[metric]['val'][-1]
            if type(stat) == np.ndarray:
                stat = list(stat)

            print(f'{metric}:\t', stat)

        eval_stats['loss']['train'].append(eval_stats_fold['loss']['train'])
        eval_stats['loss']['val'].append(eval_stats_fold['loss']['val'])
        for metric in METRICS:
            eval_stats[metric]['train'].append(eval_stats_fold[metric]['train'])
            eval_stats[metric]['val'].append(eval_stats_fold[metric]['val'])


----- Combination 0 -----
('num_hidden_layers', 0) ('layer_size', 64) ('lr', 0.001) ('weight_decay', 0.001) ('batch_size', 32) ('BCE_weights_power', 0.5) ('BCE_weights_factor', 1.2) ('epochs', 20) ('curvature', -1)
Fold 1
acc:	 0.592391304347826
hamm:	 0.20380434782608695
prec:	 0.592391304347826
sens:	 0.592391304347826
f1:	 0.592391304347826
aps:	 [0.5780179747615695, 0.6842870969698117, 0.6171819795399283, 0.5302662152649852]
Fold 2
acc:	 0.6267029972752044
hamm:	 0.18664850136239783
prec:	 0.6267029972752044
sens:	 0.6267029972752044
f1:	 0.6267029972752044
aps:	 [0.6416219380667229, 0.7788137080490831, 0.6186980852070558, 0.6335301285641048]
Fold 3
acc:	 0.553133514986376
hamm:	 0.22343324250681199
prec:	 0.553133514986376
sens:	 0.553133514986376
f1:	 0.553133514986376
aps:	 [0.6013726664554953, 0.7045580013733101, 0.5370032778816403, 0.44292952146419673]
Fold 4
acc:	 0.5994550408719346
hamm:	 0.20027247956403268
prec:	 0.5994550408719346
sens:	 0.5994550408719346
f1:	 0.59945504

In [25]:
import ast

In [27]:
grid_search_results = []
with open('data/grid_search_arc_mini.txt', 'r') as file:
    inlist = 0
    for line in file.readlines():
        if line[0:4] == '----':
            grid_search_results.append([])
            continue

        if line[0] == '(':
            continue

        if line[0:4] == 'Fold':
            grid_search_results[-1].append([])
            continue

        result = line.split('\t ')[1]

        if result[0] == '[':
            grid_search_results[-1][-1].append(np.array(ast.literal_eval(result)).mean())
            continue

        grid_search_results[-1][-1].append(float(result))


grid_search_results = np.array(grid_search_results)
grid_search_results.shape

(270, 5, 6)

In [28]:
grid_search_results.mean(axis=1)

array([[0.59313766, 0.20343117, 0.59313766, 0.59313766, 0.59313766,
        0.60317213],
       [0.57461646, 0.21269177, 0.57461646, 0.57461646, 0.57461646,
        0.59815192],
       [0.57625726, 0.21187137, 0.57625726, 0.57625726, 0.57625726,
        0.59500049],
       ...,
       [0.57515105, 0.21242448, 0.57515105, 0.57515105, 0.57515105,
        0.59181116],
       [0.57515105, 0.21242448, 0.57515105, 0.57515105, 0.57515105,
        0.60154586],
       [0.57624097, 0.21187952, 0.57624097, 0.57624097, 0.57624097,
        0.60304872]])

In [29]:
res_acc = grid_search_results.mean(axis=1)[:,0]
res_hamm = grid_search_results.mean(axis=1)[:,1]
res_prec = grid_search_results.mean(axis=1)[:,2]
res_sens = grid_search_results.mean(axis=1)[:,3]
res_f1 = grid_search_results.mean(axis=1)[:,4]
res_aps = grid_search_results.mean(axis=1)[:,5]

In [30]:
rank_acc = (-res_acc).argsort()
rank_hamm = (res_hamm).argsort()
rank_prec = (-res_prec).argsort()
rank_sens = (-res_sens).argsort()
rank_f1 = (-res_f1).argsort()
rank_aps = (-res_aps).argsort()

In [58]:
rank_sums = np.array([np.where(rank_acc == i)[0][0] +
             np.where(rank_hamm == i)[0][0] +
            #  np.where(rank_prec == i)[0][0] +
            #  np.where(rank_sens == i)[0][0] +
            #  np.where(rank_f1 == i)[0][0] +
             np.where(rank_aps == i)[0][0]
              for i in range(rank_acc.shape[0])])



print(rank_sums[:90][45:][15:30][:5])
print(rank_sums[:90][45:][15:30][5:10])
print(rank_sums[:90][45:][15:30][10:])
print(rank_sums[:90][45:][30:][:5])
print(rank_sums[:90][45:][30:][5:10])
print(rank_sums[:90][45:][30:][10:])
print()
print()
# print(rank_sums)

[333  52  56 135 135]
[238  54  90 116 159]
[101  63  80 184 118]
[447 165 275  84  56]
[617 212 235  95  36]
[438 204 237  55   3]




In [72]:
print(res_acc[61], res_hamm[61], res_aps[61])
print(res_acc[89], res_hamm[89], res_aps[89])

0.5925971448880464 0.2037014275559768 0.6089536124431318
0.596953856178178 0.20152307191091104 0.6109228077296388


In [None]:
param_grid = {'num_hidden_layers': [0],
              'layer_size': [128],
              'lr': [0.002, 0.003],
              'weight_decay': [0.01],
              'batch_size': [48, 96],
              'BCE_weights_power': [0.5],
              'BCE_weights_factor': [1.2],
              'epochs': [20],
              'curvature': [-1]}

0.002, 48
0.003, 96+
# param_grid = {'num_hidden_layers': [0,1,2],
#               'layer_size': [64, 128],
#               'lr': [0.001, 0.002, 0.003],
#               'weight_decay': [0.001, 0.005, 0.01],
#               'batch_size': [32, 48, 64, 80, 96],
#               'BCE_weights_power': [0.5],
#               'BCE_weights_factor': [1.2],
#               'epochs': [20],
#               'curvature': [-1]}

In [None]:
grid_search_results = []
with open('grid_search_all.txt', 'r') as file:
    for line in file.readlines():
        if line[0:4] == '----':
            grid_search_results.append([])
            continue

        if line[0] == '(':
            params = ast.literal_eval(line)
            nested = [list(zip(key.split(','), np.array(val).flatten())) for (key, val) in params]
            unnested = [item for sublist in nested for item in sublist]
            grid_search_results[-1].append({key: val for (key, val) in unnested})
            continue

        if line[0:4] == 'Fold':
            grid_search_results[-1].append([])
            continue

        if result[0] == '[':
            grid_search_results[-1][-1].append(np.array(ast.literal_eval(result)).mean())
            continue

        grid_search_results[-1][-1].append(float(line.split('\t ')[1]))

grid_search_results = [(params, np.array([fd1, fd2, fd3, fd4, fd5]).mean(axis=0)) for (params, fd1, fd2, fd3, fd4, fd5) in grid_search_results]
# grid_search_results = np.array(grid_search_results)
# grid_search_results.shape

In [None]:
grid_search_results

[({'num_hidden_layers': 1,
   'layer_size': 128,
   'lr': 0.002,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 200,
   'curvature': -1},
  array([0.07839368, 0.29219214, 0.57370791, 0.46126235])),
 ({'num_hidden_layers': 1,
   'layer_size': 128,
   'lr': 0.002,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.6,
   'epochs': 200,
   'curvature': -1},
  array([0.03863903, 0.33448244, 0.66740088, 0.46599431])),
 ({'num_hidden_layers': 1,
   'layer_size': 128,
   'lr': 0.002,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 2,
   'epochs': 200,
   'curvature': -1},
  array([0.01766738, 0.36713675, 0.71054298, 0.45833707])),
 ({'num_hidden_layers': 1,
   'layer_size': 128,
   'lr': 0.002,
   'batch_size': 64,
   'BCE_weights_power': 0.65,
   'BCE_weights_factor': 1.2,
   'epochs': 200,
   'curvature': -1},
  array([0.08004657, 0.30032571, 0.58783675, 0.46114133])),
 ({'num_hidden_layers': 1

In [None]:
sorted(grid_search_results, key=lambda x: -x[1][3])

[({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.002,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.6,
   'epochs': 200,
   'curvature': -1},
  array([0.05633228, 0.3252239 , 0.67656905, 0.47625214])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.14190372, 0.25479267, 0.52407953, 0.47346049])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.003,
   'weight_decay': 0,
   'batch_size': 128,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.14190372, 0.25479267, 0.52407953, 0.47346049])),
 ({'num_hidden_layers': 2,
   'layer_size': 256,
   'lr': 0.005,
   'weight_decay': 0,
   'batch_size': 64,
   'BCE_weights_power': 0.5,
   'BCE_weights_factor': 1.2,
   'epochs': 250,
   'curvature': -1},
  array([0.141903