# Initialization

## Library installation (only first time)

In [None]:
!pip install transformers

In [None]:
!pip install datasets

In [None]:
!pip install librosa

In [None]:
!pip install soundfile

In [None]:
!pip install --upgrade ipywidgets

## Import libraries

In [None]:
from transformers import AutoFeatureExtractor, ASTForAudioClassification, ASTModel
from datasets import load_dataset
import torch
from functools import reduce
from operator import mul
import math
import torch.nn as nn
import os
import librosa
from torch.utils.data import Dataset
import random
from tqdm.notebook import tqdm
import torch.nn.functional as F
from torch.utils.data import DataLoader
import numpy as np

# Import AST Pretrained and test

## Import dataset huggingface

In [None]:
dataset_huggingface = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
dataset_huggingface = dataset_huggingface.sort("id")
sampling_rate = dataset_huggingface.features["audio"].sampling_rate

## Import AST huggingface

In [None]:
# ast feature extractor
feature_extractor = AutoFeatureExtractor.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")

In [None]:
# ast pretrained
ast_huggingface = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
ast_huggingface

## Test pretrained model

In [None]:
# audio file is decoded on the fly
inputs = feature_extractor(dataset_huggingface[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")

with torch.no_grad():
    logits = ast_huggingface(**inputs).logits

predicted_class_ids = torch.argmax(logits, dim=-1).item()
predicted_label = ast_huggingface.config.id2label[predicted_class_ids]
print(predicted_label)

# compute loss - target_label is e.g. "down"
target_label = ast_huggingface.config.id2label[0]
inputs["labels"] = torch.tensor([ast_huggingface.config.label2id[target_label]])
loss = ast_huggingface(**inputs).loss
round(loss.item(), 2)

# Prompt Tuning

## Retrieve Output size

In [None]:
ast_model = ASTModel.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
ast_model

In [None]:
inputs = feature_extractor(dataset_huggingface[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
with torch.no_grad():
    outputs = ast_model(**inputs)

last_hidden_states = outputs.last_hidden_state
list(last_hidden_states.shape)

## Model and testing

In [None]:
class AST_PromptTuning(nn.Module):
    def __init__(self, prompt_tokens: int = 5, prompt_dropout: float = 0.0, prompt_type: str = 'deep'):
        super().__init__()

        # load AST model
        self.encoder = ASTModel.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")

        # hidden_size = depth of the model
        self.classifier = nn.Sequential(
            nn.Linear(self.encoder.config.hidden_size, 384),
            nn.Linear(384, 15)
        )

        # freeze
        for n, p in self.encoder.named_parameters():
            p.requires_grad = False

        self.prompt_type = prompt_type # "shallow" "deep" or None

        if prompt_type is not None:
            # prompt
            self.prompt_tokens = prompt_tokens  # number of prompted tokens
            self.prompt_dropout = nn.Dropout(prompt_dropout)
            self.prompt_dim = self.encoder.config.hidden_size

            # initiate prompt (random)
            val = math.sqrt(6. / float(3 * reduce(mul, (self.encoder.config.patch_size, self.encoder.config.patch_size), 1) + self.prompt_dim))

            # my vector of learnable parameters (how many (prompt_tokens) and dimension (prompt_dim))
            self.prompt_embeddings = nn.Parameter(torch.zeros(1, self.prompt_tokens, self.prompt_dim))

            # xavier_uniform initialization
            nn.init.uniform_(self.prompt_embeddings.data, -val, val)

            if self.prompt_type == 'deep':
                self.total_d_layer = self.encoder.config.num_hidden_layers
                self.deep_prompt_embeddings = nn.Parameter(
                    # - 1 cause shallow already inserted
                    torch.zeros(self.total_d_layer-1, self.prompt_tokens, self.prompt_dim)
                )
                # xavier_uniform initialization
                nn.init.uniform_(self.deep_prompt_embeddings.data, -val, val)

    def train(self, mode=True):
        # set train status for this class: disable all but the prompt-related modules
        if mode:
            # training:
            self.encoder.eval()
            if self.prompt_type is not None:
              # enable dropout and batch normalization
                self.prompt_dropout.train()
        else:
            # eval:
            for module in self.children():
                module.train(mode)

    def incorporate_prompt(self, x, prompt_embeddings, n_prompt: int = 0):
        # x shape: (batch size, n_tokens, hidden_dim)
        # pompt_embeddings shape: (1, n_prompt, hidden_dim)
        B = x.shape[0]
        # peek the class token, add prompts, add sequence
        # concat prompts: (batch size, cls_token + n_prompt + n_patches, hidden_dim)
        x = torch.cat((
            x[:, :1, :],
            self.prompt_dropout(prompt_embeddings.expand(B, -1, -1)),
            x[:, (1+n_prompt):, :]
        ), dim=1)
        return x

    def forward_features(self, x):
        # go through the encoder embeddings
        x = self.encoder.embeddings(x)
        # add prompts
        x = self.incorporate_prompt(x, self.prompt_embeddings)
        if self.prompt_type == 'deep':
            # deep mode
            x = self.encoder.encoder.layer[0](x)[0]
            for i in range(1, self.total_d_layer):
                x = self.incorporate_prompt(x, self.deep_prompt_embeddings[i-1], self.prompt_tokens)
                x = model.encoder.encoder.layer[i](x)[0]
        else:
            # shallow mode
            x = self.encoder.encoder(x)["last_hidden_state"]
        x = self.encoder.layernorm(x)
        return x

    def forward(self, x):
        if self.prompt_type is not None:
            x = self.forward_features(x)[:, 0, :]
        else:
          # pass x, take the classification token
            x = self.encoder(x)["last_hidden_state"][:, 0, :]
        x = self.classifier(x)
        return x

In [None]:
ast_head_tuning = AST_PromptTuning(prompt_type=None)
# count number of parameters
print("AST params:", sum(p.numel() for p in ast_head_tuning.parameters()))
# count number of trainable parameters
print("Head fine-tuning:", sum(p.numel() for p in ast_prompt.parameters() if p.requires_grad))
ast_shallow_tuning = AST_PromptTuning(prompt_type='shallow')
# count number of trainable parameters
print("Shallow prompt-tuning:", sum(p.numel() for p in ast_shallow_tuning.parameters() if p.requires_grad))
ast_deep_tuning = AST_PromptTuning(prompt_type='deep')
# count number of trainable parameters
print("Deep prompt-tuning:", sum(p.numel() for p in ast_deep_tuning.parameters() if p.requires_grad))

In [None]:
inputs = feature_extractor(dataset_huggingface[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")

with torch.no_grad():
    outputs = ast_prompt(inputs['input_values'])

predicted_class_ids = torch.argmax(outputs, dim=-1).item()
predicted_class_ids

In [None]:
softmax = F.softmax(outputs, dim=1)
softmax

# Implementation

## Utilities

In [None]:
# AutoFeatureExtractor wants in input an array that contains the audio in format .flac --> this function convert a raw audio in .flac format
def load_audio(audio_path):
    audio, sample_rate = librosa.load(audio_path, sr=16000)
    return audio, sample_rate

In [None]:
def data_augmentation(audio, sample_rate):
    # pitch_shift
    # audio = librosa.effects.pitch_shift(audio, sample_rate, n_steps = 2)
    # add noise
    # noise = np.random.randn(len(audio))
    # audio = audio + (0.05 * noise)
    #shift_time
    shift = np.random.randint(len(audio) * 0.2)
    audio = np.roll(audio, shift)
    # amplificazione
    ampl = 2.0
    audio = audio * ampl
    # random_crop
    # crop_len = int(len(audio) 0.8)
    # start = random.randint(0, len(audio) - crop_len)
    # audio = audio[start : start + crop_len]
    return audio, sample_rate

In [None]:
# this function returns the audio batch preprocessed
def feature_extractor_batch_data(batch):
    batch_feature_extractor = []
    for index in range(0, len(batch["audio"])):
        output = feature_extractor(batch["audio"][index], sampling_rate=batch["sample_rate"][index], return_tensors="pt")
        batch_feature_extractor.append(output['input_values'])
    # model wants in input a tensor with shape [num_batch, num_frame, num_mel]
    batch_audio = torch.stack(batch_feature_extractor) # stacvk all the audio in a tensor in batch size
    # prepare output
    batch["audio"] = batch_audio
    return batch["audio"], torch.tensor(batch["label"])

In [None]:
# test balanced dataset
def count_class_presence_from_file(path):
    class_count = {}
    with open(path, 'r') as file:
        for line in file:
            audio, label = line.strip().split()
            if label in class_count:
                class_count[label] += 1
            else:
                class_count[label] = 1
    return class_count

In [None]:
# test balanced dataset
def count_class_presence_from_list(list_audio):
    class_count = {}
    for item in list_audio:
        audio, label = item.strip().split()
        if label in class_count:
            class_count[label] += 1
        else:
            class_count[label] = 1
    return class_count

In [None]:
# model wants in input an integer for each label --> this function create a dictionary that maps each label to an index
def create_dict_label(path):
    class_dict = {}
    label_index = 0
    with open(path, 'r') as file:
        for line in file:
            _, class_name = line.strip().split()
            if class_name not in class_dict:
                class_dict[class_name] = label_index
                label_index += 1
    return class_dict

In [None]:
# retrieve index of given label
def from_label_to_index(label, dict_label):
    return dict_label[label]

In [None]:
# retrieve audio file cross validation zenodo
def create_file_list_split_fold(path, val_frac, task_train_percentage, num_fold = 4):
    # retrieve alll files inside the folder (fold_x_train, fold_x_test, fold_x_evaluate)
    folds_names = os.listdir(path)
    
    # discard test files (same of evaluate) --> train and test section
    train_and_test_folds = [item for item in folds_names if item[-8:] != "test.txt"]

    # retrieve all labels (only iterators purpose)
    dict_label = create_dict_label(os.path.join(path, os.listdir(path)[0]))
    labels = [key for key in dict_label.keys()]

    # recreate folds
    folds_indication = []
    for i in range(0, num_fold):
        # get num items train split
        train_fold = [train for train in train_and_test_folds if train[-9:] == "train.txt"][0]
        train_size = 0
        with open(os.path.join(path, train_fold), 'r') as file:
            for line in file:
                train_size += 1
    
        # split dataset (task percentage)
        num_train = int(train_size * task_train_percentage)
    
        # create train and split data
        # we know that each split contains the same number of sample for each label
        # recompute class percentage depending on task_train_percentage
        
        # retrieve class presence inside all split
        class_presence = count_class_presence_from_file(os.path.join(path, train_fold))
    
        # recompute class presence
        task_class_presence = dict([(label, math.floor(class_presence[label] * task_train_percentage)) for label in labels])
    
        train_file_row = []
        with open(os.path.join(path, train_fold), 'r') as file:
            # copy all row inside a list
            train_file_row = [line for line in file]

        test_fold = [test for test in train_and_test_folds if test[-9:] != "train.txt"][0]
        test_file_row = []
        with open(os.path.join(path, test_fold), 'r') as file:
            # copy all row inside a list
            test_file_row = [line for line in file]
    
        # take task_class_presence row for each label - shift inside list of class_presence
        train_file = []
        # create n subset (1 for each label)
        for index in range(0, len(labels)):
            # selected label: labels[index]
            # retrieve m sample of each label (m: task_class_presence)
            index_rows = random.sample(range(index * class_presence[labels[index]], class_presence[labels[index]] * (index + 1)), task_class_presence[labels[index]])
            
            train_rows = [train_file_row[file_index] for file_index in index_rows]
            [train_file.append(item) for item in train_rows]
    
        random.shuffle(train_file)

        # split train in train and validation
        num_train = len(train_file)
        num_val = int(num_train * val_frac)

        # separate audio from label
        train_audio_list = [item.strip().split()[0][6:] for item in train_file[:num_train - num_val]]
        train_label_list = [item.strip().split()[1] for item in train_file[:num_train - num_val]]
        val_audio_list = [item.strip().split()[0][6:] for item in train_file[num_train - num_val:]]
        val_label_list = [item.strip().split()[1] for item in train_file[num_train - num_val:]]
        test_audio_list = [item.strip().split()[0][6:] for item in test_file_row]
        test_label_list = [item.strip().split()[1] for item in test_file_row]
        
        
        folds_indication.append({
            'train_audio_list': train_audio_list,
            'train_label_list': train_label_list,
            'val_audio_list': val_audio_list,
            'val_label_list': val_label_list,
            'test_audio_list': test_audio_list,
            'test_label_list': test_label_list
        })
    return folds_indication, dict_label

## TUT17 Dataset

In [None]:
# from folder to PyTorch Dataset
class TUT17(Dataset):
    # root_dir: TUT17 folder
    # audio_folder: folder that contains all audio (no division)
    # split: tell which part of the dataset i'm using
    def __init__(self, root_dir, audio_folder, fold_specification, class_dict, split = 'train'):
        super().__init__()
        # store path audio
        self.audio_path = os.path.join(root_dir, audio_folder)
        # store dict label
        self.class_dict = class_dict     
        # split dataset (audio names)
        if split == 'train':
            self.data = fold_specification['train_audio_list']
            self.label = fold_specification['train_label_list']
        elif split == 'val':
            self.data = fold_specification['val_audio_list']
            self.label = fold_specification['val_label_list']
        elif split == 'test':
            self.data = fold_specification['test_audio_list']
            self.label = fold_specification['test_label_list']
        else:
          raise ValueError('Invalid split value.')
    
    # optional
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        audio_path = os.path.join(self.audio_path, self.data[idx])
        audio, sample_rate = load_audio(audio_path)
        audio, sample_rate = data_augmentation(audio, sample_rate)
        return {'audio': audio, 'sample_rate': sample_rate, 'label': from_label_to_index(self.label[idx], self.class_dict)}

# Train

In [None]:
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    model.to(device)
    criterion.to(device)
    running_loss = 0.0
    for batch in tqdm(train_loader):
        # reset
        optimizer.zero_grad()
        # preprocess input AST feature extractor
        audio_list, labels = feature_extractor_batch_data(batch)
        # send input and labels to CUDA
        audio_list = audio_list.to(device)
        labels = labels.to(device)
        # Compute output
        output = model(audio_list.squeeze())
        # compute loss and update parameters
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

In [None]:
def validation(model, val_loader, criterion, device):
    model.eval()
    model.to(device)
    criterion.to(device)
    running_loss = 0.0
    for batch in tqdm(val_loader):
        # preprocess input AST feature extractor
        audio_list, labels = feature_extractor_batch_data(batch)
        # send input and labels to CUDA
        audio_list = audio_list.to(device)
        labels = labels.to(device)
        # Compute output
        output = model(audio_list.squeeze())
        # compute loss
        loss = criterion(output, labels)
        running_loss += loss.item()
    return running_loss / len(val_loader)

In [None]:
def test(model, test_loader, criterion, device):
    model.eval()
    model.to(device)
    criterion.to(device)
    running_loss = 0.0
    labels_l = []
    predictions_l = []
    for batch in tqdm(test_loader):
         # preprocess input AST feature extractor
        audio_list, labels = feature_extractor_batch_data(batch)
        # send input and labels to CUDA
        audio_list = audio_list.to(device)
        labels = labels.to(device)
        # Compute output
        output = model(audio_list.squeeze())
        predictions = torch.argmax(output, dim=1)
        # compute accuracy
        labels_l.append(labels)
        predictions_l.append(predictions)
    labels = torch.cat(labels_l, dim=0)
    predictions = torch.cat(predictions_l, dim=0)
    accuracy = (predictions == labels).sum().item() / len(labels)
    return accuracy

In [None]:
def cross_validation(path, val_frac, task_train_percentage, model_type, folder_checkpoints, n_epochs: int = 10): 
    # select device
    dev = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # instantiate criterion
    criterion = nn.CrossEntropyLoss()
    # retrieve fold
    folds_indication, class_dict = create_file_list_split_fold(path, val_frac, task_train_percentage * 10 / 100)
    # create epochs statistics of loss
    epoch_statistics = []
    for epoch in range(n_epochs):
        folds_loss = []
        for fold in range(len(folds_indication)):
            folds_loss.append({
                'train_loss': 0.0,
                'val_loss': 0.0
            })
        epoch_statistics.append(folds_loss)
    # create folds statistic of accuracy
    folds_accuracy = []    
    # for each fold train the model for n_epochs
    sum_accuracy = 0.0
    for fold in tqdm(range(len(folds_indication))):
        # instantiate model
        if model_type == 0:
            model = AST_PromptTuning(prompt_type = None)
        elif model_type == 1:
            model = AST_PromptTuning(prompt_type = 'shallow')
        elif model_type == 2:
            model = AST_PromptTuning(prompt_type = 'deep')
        else:
          raise ValueError('Invalid Model type')
        # instantiate optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
        # instantiate dataset
        train_dataset = TUT17("c:/Users/cerru/Desktop/TUT17", "Audio", folds_indication[fold], class_dict, split = 'train')
        val_dataset = TUT17("c:/Users/cerru/Desktop/TUT17", "Audio", folds_indication[fold], class_dict, split = 'val')
        test_dataset = TUT17("c:/Users/cerru/Desktop/TUT17", "Audio", folds_indication[fold], class_dict, split = 'test')
        # instantiate data loaders
        train_loader = DataLoader(train_dataset, batch_size=4, num_workers=0, shuffle=True, drop_last=True)
        val_loader = DataLoader(val_dataset, batch_size=4, num_workers=0, shuffle=True, drop_last=True)
        test_loader = DataLoader(test_dataset, batch_size=4, num_workers=0, shuffle=True, drop_last=True)
        # train model
        for epoch in tqdm(range(n_epochs)):
            # execute train
            train_loss = train(model, train_loader, criterion, optimizer, dev)
            epoch_statistics[epoch][fold]['train_loss'] = train_loss
            # execute validation
            val_loss = validation(model, val_loader, criterion, dev)
            epoch_statistics[epoch][fold]['val_loss'] = val_loss

            # print single stats
            print(f'Epoch {epoch + 1}/{n_epochs} Fold {fold + 1} : Train Loss {train_loss:.4f} : Val Loss {val_loss:.4f}')
        # test model
        accuracy = test(model, test_loader, criterion, dev)
        folds_accuracy.append(accuracy)
        print(f'Fold {fold + 1} : Accuracy {accuracy:.4f}')
        # store parameters
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, os.path.join(folder_checkpoints, "model_type_" + str(model_type) + "_train_frac_" + str(task_train_percentage * 100) + ".pth"))
    return folds_accuracy, epoch_statistics

In [None]:
def compute_statistics_cross_validation(epoch_statistics, folds_accuracy):
    # mean performance of each fold for each epoch
    epoch_train_loss_mean = []
    epoch_val_loss_mean = []
    for epoch in range(len(epoch_statistics)):
        sum_train_loss = 0.0
        sum_val_loss = 0.0
        for fold in range(len(epoch_statistics[epoch])):
            sum_train_loss += epoch_statistics[epoch][fold]['train_loss']
            sum_val_loss += epoch_statistics[epoch][fold]['val_loss']
        epoch_train_loss_mean.append(sum_train_loss / len(epoch_statistics[epoch]))
        epoch_val_loss_mean.append(sum_val_loss / len(epoch_statistics[epoch]))
    mean_accuracy = np.mean(folds_accuracy)
    return epoch_train_loss_mean, epoch_val_loss_mean, mean_accuracy

## Task execution

In [None]:
task_set = {
    'dataset_10_only_head': {
            'epoch_train_loss_mean': [],
            'epoch_val_loss_mean': [],
            'mean_accuracy': 0.0
        },
    'dataset_50_only_head': {
            'epoch_train_loss_mean': [],
            'epoch_val_loss_mean': [],
            'mean_accuracy': 0.0
        },
    'dataset_100_only_head': {
            'epoch_train_loss_mean': [],
            'epoch_val_loss_mean': [],
            'mean_accuracy': 0.0
        },
    'dataset_10_shallow_prompt': {
            'epoch_train_loss_mean': [],
            'epoch_val_loss_mean': [],
            'mean_accuracy': 0.0
        },
    'dataset_50_shallow_prompt': {
            'epoch_train_loss_mean': [],
            'epoch_val_loss_mean': [],
            'mean_accuracy': 0.0
        },
    'dataset_100_shallow_prompt': {
            'epoch_train_loss_mean': [],
            'epoch_val_loss_mean': [],
            'mean_accuracy': 0.0
        },
    'dataset_10_deep_prompt': {
            'epoch_train_loss_mean': [],
            'epoch_val_loss_mean': [],
            'mean_accuracy': 0.0
        },
    'dataset_50_deep_prompt': {
            'epoch_train_loss_mean': [],
            'epoch_val_loss_mean': [],
            'mean_accuracy': 0.0
        },
    'dataset_100_deep_prompt': {
            'epoch_train_loss_mean': [],
            'epoch_val_loss_mean': [],
            'mean_accuracy': 0.0
        },
}

In [None]:
if not os.path.isfile(os.path.join("./models_metrics", "dataset_10_only_head_processed_data.npy")):
    folds_accuracy, epoch_statistics = cross_validation("c:/Users/cerru/Desktop/TUT17/Fold", 0.1, 0.1, 0, "c:/Users/cerru/Desktop/models_checkpoints")
    # Save raw data
    metrics = {
        'folds_accuracy': folds_accuracy,
        'epoch_statistics': epoch_statistics
    }
    np.save(os.path.join("./models_metrics", 'dataset_10_only_head_raw_data.npy'), metrics)
    # process data
    etlm, evlm, ma = compute_statistics_cross_validation(metrics['epoch_statistics'], metrics['folds_accuracy'])
    processed_data = {
        'epoch_train_loss_mean': etlm,
        'epoch_val_loss_mean': etlm,
        'mean_accuracy': ma
    }
    # save processed data
    np.save(os.path.join("./models_metrics", 'dataset_10_only_head_processed_data.npy'), processed_data)

In [None]:
if not os.path.isfile(os.path.join("./models_metrics", "dataset_50_only_head_processed_data.npy")):
    folds_accuracy, epoch_statistics = cross_validation("c:/Users/cerru/Desktop/TUT17/Fold", 0.1, 0.5, 0, "c:/Users/cerru/Desktop/models_checkpoints")
    # Save raw data
    metrics = {
        'folds_accuracy': folds_accuracy,
        'epoch_statistics': epoch_statistics
    }
    np.save(os.path.join("./models_metrics", 'dataset_50_only_head_raw_data.npy'), metrics)
    # process data
    etlm, evlm, ma = compute_statistics_cross_validation(metrics['epoch_statistics'], metrics['folds_accuracy'])
    processed_data = {
        'epoch_train_loss_mean': etlm,
        'epoch_val_loss_mean': etlm,
        'mean_accuracy': ma
    }
    # save processed data
    np.save(os.path.join("./models_metrics", 'dataset_50_only_head_processed_data.npy'), processed_data)

In [None]:
if not os.path.isfile(os.path.join("./models_metrics", "dataset_100_only_head_processed_data.npy")):
    folds_accuracy, epoch_statistics = cross_validation("c:/Users/cerru/Desktop/TUT17/Fold", 0.1, 1, 0, "c:/Users/cerru/Desktop/models_checkpoints")
    # Save raw data
    metrics = {
        'folds_accuracy': folds_accuracy,
        'epoch_statistics': epoch_statistics
    }
    np.save(os.path.join("./models_metrics", 'dataset_100_only_head_raw_data.npy'), metrics)
    # process data
    etlm, evlm, ma = compute_statistics_cross_validation(metrics['epoch_statistics'], metrics['folds_accuracy'])
    processed_data = {
        'epoch_train_loss_mean': etlm,
        'epoch_val_loss_mean': etlm,
        'mean_accuracy': ma
    }
    # save processed data
    np.save(os.path.join("./models_metrics", 'dataset_100_only_head_processed_data.npy'), processed_data)

In [None]:
if not os.path.isfile(os.path.join("./models_metrics", "dataset_10_shallow_prompt_processed_data.npy")):
    # da fare
    folds_accuracy, epoch_statistics = cross_validation("c:/Users/cerru/Desktop/TUT17/Fold", 0.1, 0.1, 1, "c:/Users/cerru/Desktop/models_checkpoints")
    # Save raw data
    metrics = {
        'folds_accuracy': folds_accuracy,
        'epoch_statistics': epoch_statistics
    }
    np.save(os.path.join("./models_metrics", 'dataset_10_shallow_prompt_raw_data.npy'), metrics)
    # process data
    etlm, evlm, ma = compute_statistics_cross_validation(metrics['epoch_statistics'], metrics['folds_accuracy'])
    processed_data = {
        'epoch_train_loss_mean': etlm,
        'epoch_val_loss_mean': etlm,
        'mean_accuracy': ma
    }
    # save processed data
    np.save(os.path.join("./models_metrics", 'dataset_10_shallow_prompt_processed_data.npy'), processed_data)

In [None]:
if not os.path.isfile(os.path.join("./models_metrics", "dataset_50_shallow_prompt_processed_data.npy")):
    # da fare
    folds_accuracy, epoch_statistics = cross_validation("c:/Users/cerru/Desktop/TUT17/Fold", 0.1, 0.5, 1, "c:/Users/cerru/Desktop/models_checkpoints")
    # Save raw data
    metrics = {
        'folds_accuracy': folds_accuracy,
        'epoch_statistics': epoch_statistics
    }
    np.save(os.path.join("./models_metrics", 'dataset_50_shallow_prompt_raw_data.npy'), metrics)
    # process data
    etlm, evlm, ma = compute_statistics_cross_validation(metrics['epoch_statistics'], metrics['folds_accuracy'])
    processed_data = {
        'epoch_train_loss_mean': etlm,
        'epoch_val_loss_mean': etlm,
        'mean_accuracy': ma
    }
    # save processed data
    np.save(os.path.join("./models_metrics", 'dataset_50_shallow_prompt_processed_data.npy'), processed_data)

In [None]:
if not os.path.isfile(os.path.join("./models_metrics", "dataset_100_shallow_prompt_processed_data.npy")):
    # da fare
    folds_accuracy, epoch_statistics = cross_validation("c:/Users/cerru/Desktop/TUT17/Fold", 0.1, 1, 1, "c:/Users/cerru/Desktop/models_checkpoints")
    # Save raw data
    metrics = {
        'folds_accuracy': folds_accuracy,
        'epoch_statistics': epoch_statistics
    }
    np.save(os.path.join("./models_metrics", 'dataset_100_shallow_prompt_raw_data.npy'), metrics)
    # process data
    etlm, evlm, ma = compute_statistics_cross_validation(metrics['epoch_statistics'], metrics['folds_accuracy'])
    processed_data = {
        'epoch_train_loss_mean': etlm,
        'epoch_val_loss_mean': etlm,
        'mean_accuracy': ma
    }
    # save processed data
    np.save(os.path.join("./models_metrics", 'dataset_100_shallow_prompt_processed_data.npy'), processed_data)

In [None]:
if not os.path.isfile(os.path.join("./models_metrics", "dataset_10_deep_prompt_processed_data.npy")):
    # da fare
    folds_accuracy, epoch_statistics = cross_validation("c:/Users/cerru/Desktop/TUT17/Fold", 0.1, 0.1, 2, "c:/Users/cerru/Desktop/models_checkpoints")
    # Save raw data
    metrics = {
        'folds_accuracy': folds_accuracy,
        'epoch_statistics': epoch_statistics
    }
    np.save(os.path.join("./models_metrics", 'dataset_10_deep_prompt_raw_data.npy'), metrics)
    # process data
    etlm, evlm, ma = compute_statistics_cross_validation(metrics['epoch_statistics'], metrics['folds_accuracy'])
    processed_data = {
        'epoch_train_loss_mean': etlm,
        'epoch_val_loss_mean': etlm,
        'mean_accuracy': ma
    }
    # save processed data
    np.save(os.path.join("./models_metrics", 'dataset_10_deep_prompt_processed_data.npy'), processed_data)

In [None]:
if not os.path.isfile(os.path.join("./models_metrics", "dataset_50_deep_prompt_processed_data.npy")):
    # da fare
    folds_accuracy, epoch_statistics = cross_validation("c:/Users/cerru/Desktop/TUT17/Fold", 0.1, 0.5, 2, "c:/Users/cerru/Desktop/models_checkpoints")
    # Save raw data
    metrics = {
        'folds_accuracy': folds_accuracy,
        'epoch_statistics': epoch_statistics
    }
    np.save(os.path.join("./models_metrics", 'dataset_50_deep_prompt_raw_data.npy'), metrics)
    # process data
    etlm, evlm, ma = compute_statistics_cross_validation(metrics['epoch_statistics'], metrics['folds_accuracy'])
    processed_data = {
        'epoch_train_loss_mean': etlm,
        'epoch_val_loss_mean': etlm,
        'mean_accuracy': ma
    }
    # save processed data
    np.save(os.path.join("./models_metrics", 'dataset_50_deep_prompt_processed_data.npy'), processed_data)

In [None]:
if not os.path.isfile(os.path.join("./models_metrics", "dataset_100_deep_prompt_processed_data.npy")):
    # da fare
    folds_accuracy, epoch_statistics = cross_validation("c:/Users/cerru/Desktop/TUT17/Fold", 0.1, 1, 2, "c:/Users/cerru/Desktop/models_checkpoints")
    # Save raw data
    metrics = {
        'folds_accuracy': folds_accuracy,
        'epoch_statistics': epoch_statistics
    }
    np.save(os.path.join("./models_metrics", 'dataset_100_deep_prompt_raw_data.npy'), metrics)
    # process data
    etlm, evlm, ma = compute_statistics_cross_validation(metrics['epoch_statistics'], metrics['folds_accuracy'])
    processed_data = {
        'epoch_train_loss_mean': etlm,
        'epoch_val_loss_mean': etlm,
        'mean_accuracy': ma
    }
    # save processed data
    np.save(os.path.join("./models_metrics", 'dataset_100_deep_prompt_processed_data.npy'), processed_data)