In [None]:
import numpy as np
import pandas as pd
import os
import copy
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

from utils import *
from utils import load_data, prepare_data

In [None]:
train_data = "CelebA"  # or "CivilComments"
eval_data = "MetaShift" # or "MultiNLI", "MetaShift", "OfficeHome", "ColoredMNIST"
model = "resnet" # or "clip", "bert-ft"
classifier = "linear"
learner = "MLP" # mlp or linear, kNN for the ablation study purpose
seed = 0
input_feats = ['n', 'sc', 'ci', 'ai', 'c_intra', 'a_intra']

MARGIN = 0.05
ALGORITHMS = ['ERM', 'GroupDRO', 'oversample', 'remax-margin', 'undersample']
OUTPUT_DIR = "YOUR_PATH"  # should be the same as OUTPUT_DIR in configs/DATA_PATH.sh
TRAIN_PATH = os.path.join(OUTPUT_DIR, train_data.lower())
EVAL_PATH = os.path.join(OUTPUT_DIR, eval_data.lower())
IDENTIFIER = ['n', 'sc', 'ci', 'ai', 'y_task', 'a_task']  # the statistics that jointly identify a task
METRIC = 'wga_te_err'  # the metric that we want to optimize

# set random seed
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [None]:
# load the task statistics and the performance of the algorithms
train_df = load_data(model, TRAIN_PATH, classifier, ALGORITHMS, IDENTIFIER, METRIC, MARGIN)
eval_df = load_data(model, EVAL_PATH, classifier, ALGORITHMS, IDENTIFIER, METRIC, MARGIN)
num_datasets = len(train_df)//len(ALGORITHMS)

tr_idx = np.random.choice(num_datasets, int(num_datasets*0.8), replace=False)
val_idx = np.array([i for i in range(num_datasets) if i not in tr_idx])

# sanity check
print(model)
print(train_df.shape, eval_df.shape)
val_df = train_df.iloc[np.concatenate([np.arange(len(ALGORITHMS)*i, len(ALGORITHMS)*i+len(ALGORITHMS)) for i in val_idx])]

print(val_df[METRIC].mean(), val_df[val_df["rank"]==1.0][METRIC].mean())
print(eval_df[METRIC].mean(), eval_df[eval_df["rank"]==1.0][METRIC].mean())
print()
for alg in ALGORITHMS:
    print(alg, val_df[val_df["algorithm"]==alg][METRIC].mean())
print()
for alg in ALGORITHMS:
    print(alg, eval_df[eval_df["algorithm"]==alg][METRIC].mean())

### Multi-label Classification

In [None]:
def train_mlc(X_train, y_train, num_epochs=800, learner="MLP", verbose=True):
    # Hyperparameters and data
    input_size = X_train.shape[1]
    hidden_layer_sizes = (100,)
    output_size = len(ALGORITHMS)
    patience = 2000
    tol = 1e-4
    alpha=0.0001
    batch_size = len(X_train)

    # convert to torch tensors
    X_train = torch.tensor(X_train).float()
    y_train = torch.tensor(y_train).float()

    # Create dataloader
    tr_dataset = TensorDataset(X_train, y_train)
    tr_dataloader = DataLoader(tr_dataset, batch_size=batch_size, shuffle=True)

    # Initialize model, criterion, and optimizer
    trained_model = None
    if learner == "MLP":
        model = MLPTorch(input_size, hidden_layer_sizes, output_size)
    elif learner == "Linear":
        model = LinearTorch(input_size, output_size)
    elif learner == "kNN":
        k = 5
        trained_model = KNN(k)
        trained_model.fit(X_train, y_train)
    elif learner == "DecisionTree":
        from sklearn import tree
        trained_model = tree.DecisionTreeClassifier(criterion="gini", max_depth=3)
        trained_model.fit(X_train, y_train)

    if trained_model is None:
        criterion = nn.BCEWithLogitsLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=alpha)
        trained_model = train_model(model, tr_dataloader, criterion, optimizer, num_epochs, patience, tol, verbose=verbose)

    return trained_model

def predict_mlc(model, X):
    if not torch.is_tensor(X):
        X = torch.tensor(X).float()

    # handle torch and sklearn models differently
    try:
        model.eval()
        with torch.no_grad():
            y_logits = model(X)
            y_prob = torch.sigmoid(y_logits)
            y_pred = torch.argmax(y_prob, dim=1)
            y_pred = torch.nn.functional.one_hot(y_pred, num_classes=len(ALGORITHMS)).float()
            return y_pred.numpy(), y_prob.numpy()
    except:
        y_pred = model.predict(X)
        y_pred = np.array(y_pred)
        return y_pred, None

In [None]:
data = prepare_data("mlc", train_df, eval_df, tr_idx, val_idx, input_feats, ALGORITHMS, METRIC, MARGIN)
trained_model = train_mlc(data["X_train"], data["y_train"], learner="Linear", verbose=True, num_epochs=1000)

In [None]:
y_pred, y_prob = predict_mlc(trained_model, data["X_val"])
_ = eval_acc(data["y_val"], y_pred, mode='soft 0-1', verbose=True)
expanded_val_idx = np.concatenate([np.arange(len(ALGORITHMS)*i, len(ALGORITHMS)*i+len(ALGORITHMS)) for i in val_idx])
_ = eval_wga_err(train_df.iloc[expanded_val_idx, :], y_pred, IDENTIFIER, ALGORITHMS, METRIC, y_prob)

In [None]:
y_pred, y_prob = predict_mlc(trained_model, data["X_test"])
_ = eval_acc(data["y_test"], y_pred, mode='soft 0-1', verbose=True)
_ = eval_wga_err(eval_df, y_pred, IDENTIFIER, ALGORITHMS, METRIC, y_prob)

### Regression

In [None]:
def train_regression(X_train, y_train, num_epochs=800, verbose=True):
    from sklearn.neural_network import MLPRegressor

    trained_model = MLPRegressor(random_state=0, max_iter=num_epochs, verbose=verbose, tol=1e-4, alpha=0.1, hidden_layer_sizes=(100,)).fit(X_train, y_train)

    return trained_model

def predict_regression(model, X):
    def get_rank(x):
        min_err = x.min()
        return (x <= min_err + MARGIN).astype(int)

    y_pred = model.predict(X)
    y_pred_agg = []
    assert len(y_pred) % len(ALGORITHMS) == 0
    for i in range(len(y_pred)//len(ALGORITHMS)):
        curr_pred = y_pred[len(ALGORITHMS)*i:len(ALGORITHMS)*i+len(ALGORITHMS)]
        y_pred_agg.append(get_rank(curr_pred))
    return y_pred_agg

In [None]:
data = prepare_data("regression", train_df, eval_df, tr_idx, val_idx, input_feats, ALGORITHMS, METRIC, MARGIN)
trained_model = train_regression(data["X_train"], data["y_train"], num_epochs=1000, verbose=True)

In [None]:
y_pred = predict_regression(trained_model, data["X_val"])
_ = eval_acc(data["y_val"], y_pred, mode='soft 0-1', verbose=True)
expanded_val_idx = np.concatenate([np.arange(len(ALGORITHMS)*i, len(ALGORITHMS)*i+len(ALGORITHMS)) for i in val_idx])
_ = eval_wga_err(train_df.iloc[expanded_val_idx, :], y_pred, IDENTIFIER, ALGORITHMS, METRIC)

In [None]:
y_pred = predict_regression(trained_model, data["X_test"])
_ = eval_acc(data["y_test"], y_pred, mode='soft 0-1', verbose=True)
_ = eval_wga_err(eval_df, y_pred, IDENTIFIER, ALGORITHMS, METRIC)

### Other Baselines

In [None]:
baseline = 'global_best' # or "random"
data = prepare_data("baseline", train_df, eval_df, tr_idx, val_idx, input_feats, ALGORITHMS, METRIC, MARGIN)
global_rank = np.array(train_df[train_df["rank"]==1.0].iloc[tr_idx]['multi_hot'].tolist()).sum(axis=0)

In [None]:
def predict_baseline(mode, X):
    y_preds = []
    for i in range(len(X)):
        if mode == "random":
            num_winners = np.random.choice(len(ALGORITHMS), 1, replace=False)[0] + 1
            y_p = np.random.choice(len(ALGORITHMS), num_winners, replace=False)
        elif mode == "global_best":
            num_winners = np.random.choice(len(ALGORITHMS), 1, replace=False)[0] + 1
            y_p = np.argsort(global_rank)[::-1][:num_winners]
        else:
            raise ValueError(f"unknown mode {mode}")
        # convert y_pred to multi-hot
        y_p = [1 if i in y_p else 0 for i in range(len(ALGORITHMS))]
        y_preds.append(y_p)
    return np.array(y_preds)

In [None]:
y_pred = predict_baseline(baseline, data["X_val"])
_ = eval_acc(data["y_val"], y_pred, mode='soft 0-1', verbose=True)
expanded_val_idx = np.concatenate([np.arange(len(ALGORITHMS)*i, len(ALGORITHMS)*i+len(ALGORITHMS)) for i in val_idx])
_ = eval_wga_err(train_df.iloc[expanded_val_idx, :], y_pred, IDENTIFIER, ALGORITHMS, METRIC)

y_pred = predict_baseline(baseline, data["X_test"])
_ = eval_acc(data["y_test"], y_pred, mode='soft 0-1', verbose=True)
_ = eval_wga_err(eval_df, y_pred, IDENTIFIER, ALGORITHMS, METRIC)