In [1]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import transformers as ppb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
import warnings
import pickle

from sklearn.base import BaseEstimator
from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score, confusion_matrix

warnings.filterwarnings('ignore')

In [2]:
def save_pickle(obj, filename):
    with open(f"{filename}.pkl", mode="wb") as f:
        pickle.dump(obj, f)

def load_pickle(filename):
    with open(f"{filename}.pkl", mode="rb") as f:
        obj = pickle.load(f)
        return obj

test_obj = [1,2,3]
test_filename="test"
try:
    save_pickle(test_obj, test_filename)
    test_load = load_pickle(test_filename)
    assert test_obj==test_load, "TEST"
except Exception as err:
    print(err)

In [3]:
class ClassifierDataset(torch.utils.data.Dataset):
    def __init__(self, embeddings, labels):
        self.embeddings = embeddings
        self.labels = labels
    def __getitem__(self, idx):
        return self.embeddings[idx], self.labels[idx]
    def __len__(self):
        return len(self.embeddings)

def create_dataloader(embeddings, labels, batch_size=1):
    dataset = ClassifierDataset(embeddings, labels)
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size)

In [4]:
try:
    train_data = load_pickle("train_data")
    test_data = load_pickle("test_data")
    X_train, y_train = train_data["embeddings"], train_data["labels"]
    X_test, y_test = test_data["embeddings"], test_data["labels"]
except Exception as err:
    print(err)

In [21]:
train = create_dataloader(X_train, y_train, batch_size=64)
test = create_dataloader(X_test, y_test, batch_size=64)

# Model

In [9]:
class Classifier(torch.nn.Module):
    def __init__(self, n_classes=2):
        super().__init__()
        self.l1 = torch.nn.Linear(768, n_classes)
        self.silu = torch.nn.SiLU()
#         self.opt = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=weight_decay)
#         self.device = device
#         self.criteria = torch.nn.CrossEntropyLoss()
#         self.epochs = n_epochs
#         self.to(device)
    def forward(self, embeddings):
        outputs=self.l1(embeddings)
        return outputs
    
#     def train_step(self, train):
#         for batch in train:
#             X, y = batch
#             X = X.to(self.device)
#             y = y.to(self.device)
#             logits = self(X)
#             loss=self.criteria(logits, y)
#             self.opt.zero_grad()
#             loss.backward()
#             self.opt.step()
    
#     def eval_step(self, test):
#         for batch in test:
#             X, y = batch
#             X = X.to(self.device)
#             y = y.to(self.device)
#             logits = self(X)
#             loss=self.criteria(logits, y)
#             self.opt.zero_grad()
#             loss.backward()
#             self.opt.step()
        
    
#     def fit(self, train):
#         self.train()
#         for epoch in range(self.epochs):
#             self.train_step(train)
    
#     def predict(self, test):
#         self.eval()
#         with torch.no_grad():
#             probas, labels = [], []
#             softmax = torch.nn.Softmax(dim=-1)
#             for X in test:
#                 X = torch.FloatTensor(X).to(self.device)
#                 score = self(X)
#                 proba = softmax(score)[1].item()
#                 probas.append(proba)
#                 label = torch.argmax(score).item()
#                 labels.append(label)
#         return probas

In [33]:
"""
Optuna example that optimizes multi-layer perceptrons using PyTorch.

In this example, we optimize the validation accuracy of fashion product recognition using
PyTorch and FashionMNIST. We optimize the neural network architecture as well as the optimizer
configuration. As it is too time consuming to use the whole FashionMNIST dataset,
we here use a small subset of it.

"""

import os

import optuna
from optuna.trial import TrialState


def objective(trial):
    # Generate the model.
    device="cpu"

    # Generate the optimizers.
    #optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-4, 1e-1) 
    n_epochs = trial.suggest_int("n_epochs", 5, 100)
    
    model = Classifier()
    model.to(device)
    criteria = torch.nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    softmax = torch.nn.Softmax(dim=1)
    
    for epoch in range(n_epochs):
        model.train()
        for batch in train:
            X, y = batch
            X = X.to(device)
            y = y.to(device)
            logits = model(X)
            loss = criteria(logits, y)
            opt.zero_grad()
            loss.backward()
            opt.step()
        
        model.eval()
        with torch.no_grad():
            labels, probas = [], []
            for batch in test:
                X, y = batch
                X = X.to(device)
                logits = model(X)
                proba = softmax(logits)[:,1].tolist()
                labels += y.tolist()
                probas += proba
            
            roc_score = roc_auc_score(labels, probas)
            
            trial.report(roc_score, epoch)

            # Handle pruning based on the intermediate value.
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()
    
    return roc_score

In [34]:
if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100, timeout=600)

    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[I 2023-07-13 01:10:10,155] A new study created in memory with name: no-name-b408585b-f66d-4ac6-bd71-833d1bf03a82
[I 2023-07-13 01:10:13,992] Trial 0 finished with value: 0.8991090652013238 and parameters: {'lr': 0.005517622283844181, 'weight_decay': 0.0421665236511578, 'n_epochs': 68}. Best is trial 0 with value: 0.8991090652013238.
[I 2023-07-13 01:10:17,638] Trial 1 finished with value: 0.8948164036976336 and parameters: {'lr': 0.00032389092301081585, 'weight_decay': 0.05879814088541134, 'n_epochs': 65}. Best is trial 0 with value: 0.8991090652013238.
[I 2023-07-13 01:10:19,589] Trial 2 finished with value: 0.8889062951894341 and parameters: {'lr': 0.0003197993007051008, 'weight_decay': 0.09536832899788185, 'n_epochs': 33}. Best is trial 0 with value: 0.8991090652013238.
[I 2023-07-13 01:10:21,140] Trial 3 finished with value: 0.8890709855715158 and parameters: {'lr': 0.0008946960760299925, 'weight_decay': 0.09835426350244215, 'n_epochs': 28}. Best is trial 0 with value: 0.899109065

[I 2023-07-13 01:11:43,751] Trial 59 pruned. 
[I 2023-07-13 01:11:43,828] Trial 60 pruned. 
[I 2023-07-13 01:11:48,008] Trial 61 finished with value: 0.9107324571269428 and parameters: {'lr': 0.023175484388767268, 'weight_decay': 0.002205446703355569, 'n_epochs': 75}. Best is trial 55 with value: 0.9138281007316003.
[I 2023-07-13 01:11:51,243] Trial 62 finished with value: 0.9128024679456281 and parameters: {'lr': 0.019264712315203197, 'weight_decay': 0.002665336969567456, 'n_epochs': 58}. Best is trial 55 with value: 0.9138281007316003.
[I 2023-07-13 01:11:51,313] Trial 63 pruned. 
[I 2023-07-13 01:11:51,390] Trial 64 pruned. 
[I 2023-07-13 01:11:51,460] Trial 65 pruned. 
[I 2023-07-13 01:11:54,898] Trial 66 finished with value: 0.9131599665799028 and parameters: {'lr': 0.019261345426658225, 'weight_decay': 0.0024075967327274687, 'n_epochs': 61}. Best is trial 55 with value: 0.9138281007316003.
[I 2023-07-13 01:11:56,813] Trial 67 pruned. 
[I 2023-07-13 01:11:56,884] Trial 68 pruned. 

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  59
  Number of complete trials:  41
Best trial:
  Value:  0.9142820034919718
  Params: 
    lr: 0.012585282493342691
    weight_decay: 0.0018828840561176574
    n_epochs: 65
