# With Wandb pipline

### setup environment

In [None]:
%pip install transformers datasets
%pip install wandb
%pip install torch
%pip install torchmetrics
%pip install tqdm

### import

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import wandb
import random

from tqdm.auto import tqdm


# %wandb login
wandb.login()

# Set Ramdom Seed
random.seed(42)
torch.manual_seed(42)


## Training

### Configs

In [None]:
# Gobal variables
PROGJECT_NAME = 'WIDM-MuIME-LGClassification'
DATASET_ROOT_PATH = ".//Dataset//Train_Datasets//"
MODEL_SAVE_PATH = "./"
TRAIN_FILENAME = "bopomofo-0.txt"

WANDB_LOG = False
INPUT_SEQUENCE_LENGTH = 20
SEQUENCE_SIZE = INPUT_SEQUENCE_LENGTH


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

config = dict(
    epochs = 10,
    learning_rate = 3e-4,
    batch_size = 16,
    optimizer = "Adam",
    loss_function = "MSELoss",

    dataset=TRAIN_FILENAME,
    dataset_error_rate=0.0,
    # sequence_size = 128,
    # embedding_size = 768,
    # example_size = 500,
    # classes=10,
    # kernels=[16, 32],
    # dataset="MNIST",
    device=device,
    architecture = "Linear Feed Forward",
)

## Pipline

In [None]:
def tensorlog(name:str, x:torch.Tensor) -> None:
    print("{}: {}".format(name, x))
    print("{} shape: {}".format(name, x.shape))

### Model

Moved to MyModel directory

### Data Preprocessing

In [None]:
# Moved to data_preprocessing.py

### Dataset

In [None]:
from torch.utils.data import Dataset

class MyDataSet(Dataset):
    def __init__(self, X, Y, transform=None):
        self.data = X
        self.labels = Y

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index], self.labels[index]

### train

In [None]:
import sys

def train(model, train_loader, val_loader, criterion, optimizer, config):
    print("start training...")
    wandb.watch(model, criterion, log="all", log_freq=10) if WANDB_LOG else None


    model.to(config["device"])
    
    total_batches = len(train_loader) * config.epochs
    total_batch = len(train_loader)
    example_ct = 0  # number of examples seen

    best_val_acc = 0
    for epoch in tqdm(range(config.epochs)):

        # ==================== Train =====================
        model.train()
        train_loss, train_acc = 0.0, 0.0
        for t_batch, (inputs, labels) in enumerate(train_loader):
            batch_loss, correct_batch_token, total_batch_token = train_batch(inputs, labels, model, optimizer, criterion)
            batch_acc = correct_batch_token / total_batch_token
            example_ct +=  len(inputs)
            train_loss += batch_loss

            train_log(epoch, t_batch, total_batch, batch_loss, batch_acc, example_ct)


        # calculate train_acc
        print('\nTrain | Loss: {:.5f} Acc: {:.3f}%'.format(train_loss, train_acc))



        # ==================== Valid =====================
        model.eval()
        val_loss, val_acc = 0, -404
        total_correct_token, total_token = 0, 0
        with torch.no_grad():
            for t_val_batch, (inputs, labels) in enumerate(val_loader):
                val_batch_loss, correct_batch_token, total_batch_token = val_batch(inputs, labels, model, criterion)
                val_batch_acc = correct_batch_token / total_batch_token

                val_loss += val_batch_loss
                total_correct_token += correct_batch_token
                total_token += total_batch_token

        val_acc = total_correct_token / total_token
        val_log(epoch, val_loss, val_acc, example_ct)


        # todo: if val > train safe model
        # save with wandb.save
        if val_acc > best_val_acc:
            print("Saving model with acc {}".format(val_acc))
            torch.save(model.state_dict(), MODEL_SAVE_PATH + '/model.pth')


def train_batch(inputs, labels, model, optimizer, criterion):
    inputs, labels = inputs.to(device), labels.to(device)

    # Forward pass ➡
    outputs = model(inputs, labels) # (N, S, C)
    
    
    
    # matrix = torch.eye(21128).to(device)
    # embed_labels = matrix[labels].permute(0, 2, 1).to(device) # (N, S) -> (N, C, S)
    loss = criterion(outputs.permute(0, 2, 1), labels)


    reduced_outputs = torch.argmax(outputs, dim=-1) # (N, S)
    print("reduced_outputs: ", reduced_outputs)
    print("labels: ", labels)
    acc = (reduced_outputs.flatten(0) == labels.flatten(0)).sum() / labels.numel()


    # Backward pass ⬅
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1) # avoid exploding gradient

    # Step with optimizer
    optimizer.step()

    return loss.item(), (reduced_outputs.flatten(0) == labels.flatten(0)).sum(), labels.numel()


def val_batch(inputs, labels, model, criterion):
    inputs, labels = inputs.to(device), labels.to(device)

    outputs = model(inputs, labels)
    
    # .permute(0, 2, 1)  # (N, S, C) -> (N, C, S)
    # matrix = torch.eye(21128).to(device)
    # embed_labels = matrix[labels].permute(0, 2, 1)  # (N, S) -> (N, C, S)
    loss = criterion(outputs.permute(0, 2, 1), labels)

    reduced_outputs = torch.argmax(outputs, dim=-1) # (N, S)
    acc = (reduced_outputs.flatten(0) == labels.flatten(0)).sum() / labels.numel()


    return loss.item(), (reduced_outputs.flatten(0) == labels.flatten(0)).sum(), labels.numel()

def train_log(epoch:int, t_batch:int, total_batch:int, batch_loss:float, batch_acc:float, example_ct:int):
    wandb.log({"train_loss": batch_loss, "train_acc": batch_acc, "Epoach": epoch+1}, step=example_ct) if WANDB_LOG else None
    print("\r[ Epoch {}: {}/{} ] Batch loss: {:.3f} Batch acc: {:.3f}%".format(epoch+1, t_batch+1, total_batch, batch_loss, batch_acc*100), end="")


def val_log(epoch:int, val_loss:int, val_acc:int, example_ct:int):
    wandb.log({"val_loss": val_loss, "val_acc": val_acc}, step=example_ct) if WANDB_LOG else None
    print("Valid | Loss: {:.5f} Acc: {:.3f}% ".format(val_loss, val_acc*100))

#### Get Dataset

In [None]:
import torch
from MyTrainLib.KeystrokeTokenizer import KeystrokeTokenizer

example_size = 100

def load_training_dataset(dataset_path):
    print("loading datasets: {}...".format(dataset_path))

    X, Y = [], []
    with open(dataset_path, "r", encoding="utf-8") as dataset:
        lines = dataset.readlines()


    def pad(token_id_list:list[int]) -> list[int]:
        if len(token_id_list) > SEQUENCE_SIZE:
            token_id_list = token_id_list[:SEQUENCE_SIZE]
        else:
            pad_len = SEQUENCE_SIZE - len(token_id_list)
            token_id_list += [0] * pad_len
        return token_id_list


    for i, line in enumerate(lines):
        x_str, y_str = line.strip().split("\t")

        x_tokens = KeystrokeTokenizer.tokenize(x_str)
        x_ids = KeystrokeTokenizer.token_to_ids(x_tokens)
        y_ids = int(y_str)
        x_ids = pad(x_ids)
        x_ids = torch.tensor(x_ids)
        y_ids = torch.tensor(y_ids)

        X.append(x_ids)
        Y.append(y_ids)
        if i > example_size:
            break

    print("Full Datasets:")
    print("X:", len(X), "Y:", len(Y))
    return MyDataSet(X, Y)

# load_training_dataset(DATASET_ROOT_PATH + TRAIN_FILENAME)

In [None]:
import platform

def get_dataset(slice_rate=0.8):
    print("preparing datasets...")
    full_dataset = load_training_dataset(DATASET_ROOT_PATH + TRAIN_FILENAME)

    at = int(len(full_dataset) * slice_rate)
    train_dataset = torch.utils.data.Subset(full_dataset, indices=range(0, at))
    val_dataset = torch.utils.data.Subset(full_dataset, indices=range(at,))

    return train_dataset, val_dataset


def make_loader(dataset, batch_size):
    NUM_OF_WORKERS = 1 if platform.system() == "Windows" else 2
    print("running on system: {}".format(platform.system()))
    print("Setting num of workers: {}".format(NUM_OF_WORKERS))

    loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         pin_memory=True,
                                         num_workers=NUM_OF_WORKERS,
                                         drop_last=True)
    return loader

#### Make

Define model, criterion, and optimizer.

In [None]:
from MyTrainLib.LCModel import LCModel

def make(config):
    # Make the data
    train, val = get_dataset()
    print("Training set size: {}, Validation set size: {}".format(train.__len__(), val.__len__()))

    print("batch size:", config.batch_size)
    train_loader = make_loader(train, batch_size=config.batch_size)
    val_loader = make_loader(val, batch_size=config.batch_size)

    # Model
    model = LCModel(layers=[20, 40, 1])


    # Make the loss and optimizer
    criterion = nn.MSELoss() 
    optimizer = torch.optim.Adam(
        model.parameters(), lr=config.learning_rate)
    # todo: lr sheulare

    return model, train_loader, val_loader, criterion, optimizer

In [None]:
def log_model_info(model) -> None:
    print(model)
    total_parameters = sum(p.numel() for p in model.parameters())
    trainable_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('\nstart training, parameter total: {}, trainable: {}\n'.format(total_parameters, trainable_parameters))
    wandb.config.update({"Total parameter": total_parameters, "Trainable parameters:": trainable_parameters}) if WANDB_LOG else None

#### Pipline

In [None]:
def model_pipeline(hyperparameters):
    
    WANDB_LOG = True
    if WANDB_LOG:
      run =  wandb.init(project=PROGJECT_NAME, config=hyperparameters) # todo: as run
      config = wandb.config
    else:
      class ConfigWrapper:  # fixme: ugly
        def __init__(self, config_dict):
            self.__dict__ = config_dict

      config = ConfigWrapper(hyperparameters) 
    
    # make the model, data, and optimization
    model, train_loader, val_loader, criterion, optimizer = make(config)
    log_model_info(model)

    # Training
    train(model, train_loader, val_loader, criterion, optimizer, config)

    # Testing
    # test(model, test_loader)
       
    return model

# Run

In [None]:
import gc


gc.collect()

import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

torch.cuda.empty_cache()
model = model_pipeline(config)

#### TEST AREA

In [None]:
# from torchmetrics.classification import MultilabelAccuracy, MulticlassAccuracy
# from torch import tensor
# import numpy as np

# target = tensor([[[1, 0], [0, 1], [0, 0]], [[1, 0], [1, 0], [1, 0]], [[1, 0], [1, 0], [1, 0]], [[1, 0], [1, 0], [1, 0]]])
# preds = tensor(
#      [
#          [[1, 3], [1, 0], [1, 1]],
#          [[0, 0.04], [0.86, 0.780], [0.45, 0.37]],
#          [[0, 0.04], [0.86, 0.780], [0.45, 0.37]],
#          [[0, 1], [1, 1], [1, 1]],
#      ])

# target = tensor([[1, 1], [2, 1]])

# preds = tensor([[[0., 0.], [10., 0.], [0., 0.]],
#                 [[0., 10.], [0., 200.], [10., 0.]],
#                 ])

# print("target:", target.numel())
# print("preds:", preds.shape)

# reduced_prediction_tensor = np.argmax(preds, axis=1)
# print(reduced_prediction_tensor)
# accuracy = len()/(reduced_prediction_tensor.flatten(0) == target).sum()


# print(accuracy)

