In [None]:
import librosa
import zipfile
import json
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from transformers import Wav2Vec2ForSequenceClassification, TrainingArguments, Trainer, Wav2Vec2Processor
!pip install evaluate
import evaluate
from datasets import Dataset, load_metric
from google.colab import drive, files
drive.mount('/content/drive')
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        class Sample:
            pass
        sample = Sample()
        sample.input_ids = self.features[idx]
        sample.labels = self.labels[idx]
        return sample

# accuracy_metric = load_metric("accuracy")
# precision_metric = load_metric("precision")
# recall_metric = load_metric("recall")
# f1_metric = load_metric("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    accuracy = evaluate.load("accuracy")
    precision = evaluate.load("precision")
    recall = evaluate.load("recall")
    f1 = evaluate.load("f1")

    results = {
        "accuracy": accuracy.compute(predictions=predictions, references=labels)["accuracy"],
        "precision": precision.compute(predictions=predictions, references=labels, average="macro")["precision"],
        "recall": recall.compute(predictions=predictions, references=labels, average="macro")["recall"],
        "f1": f1.compute(predictions=predictions, references=labels, average="macro")["f1"],
    }

    return results

def log_metrics(metrics, filename="metrics.json"):
    with open(filename, "a") as file:
        json.dump(metrics, file)
        file.write("\n")  # Newline for separating entries

In [None]:
!pip install accelerate -U
# Assuming you have a DataFrame with columns "filename" and "emotion"
# data = pd.read_csv("C:/MyDocs/DTU/MSc/Thesis/Data/MELD/MELD_preprocess_test/pre_process_test.csv")
# data = pd.read_csv("C:/Users/DANIEL/Desktop/thesis/low-resource-emotion-recognition/MELD_preprocess_test/pre_process_test.csv")
data = pd.read_csv('/content/drive/My Drive/Thesis_Data/MELD/Run3/labels/train_labels_corrected.csv')

# directory = "C:/MyDocs/DTU/MSc/Thesis/Data/MELD/MELD_preprocess_test/MELD_preprocess_test_data"
zip_path = '/content/drive/My Drive/Thesis_Data/MELD/Run3/data/train_audio.zip'
extract_to = '/content/extracted_data'
# os.makedirs(extract_to, exist_ok=True)
# directory = '/content/drive/My Drive/Thesis_Data/MELD/Run3/data/train_audio.zip'

if os.path.exists(extract_to):
    if not os.listdir(extract_to):
        # If the directory is empty, extract the files
        # os.makedirs(extract_to, exist_ok=True)
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_to)
        print("Files extracted successfully!")
    else:
        print("Directory is not empty. Extraction skipped to avoid overwriting.")
else:
    print("Directory does not exist. Creating...")
    os.makedirs(extract_to, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print("Files extracted successfully!")

Directory is not empty. Extraction skipped to avoid overwriting.


In [None]:
files = []

directory = os.path.join(extract_to, "train_audio")

# Get a list of all files in the directory
for file in os.listdir(directory):
    if file.endswith('.wav'):
        files.append(file)

# Add filenames to a new column in the DataFrame
data['filename'] = files

features = []
labels = []

label_encoder = LabelEncoder()

raw_labels = data['Emotion'].values
labels = label_encoder.fit_transform(raw_labels)

# Show the label-encoding pairs:
print(label_encoder.classes_)
print("[0,         1,       2,       3,         4,         5,   6]")

print(labels)

max_length = 16000 * 9  # 9 seconds

# Load the processor
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53")

for index, row in data.iterrows():

    # Load audio file
    file_to_load = row['filename']
    file_to_load_path = os.path.join(directory, file_to_load)
    # print()
    # print(index)
    # print(file_to_load)
    # print()

    audio, sr = librosa.load(file_to_load_path, sr=16000)
    audio = librosa.util.normalize(audio)

    if len(audio) > max_length:
        audio = audio[:max_length]
    else:
        padding = max_length - len(audio)
        offset = padding // 2
        audio = np.pad(audio, (offset, padding - offset), 'constant')

    # Process the audio
    inputs = processor(audio, sampling_rate=sr, return_tensors="pt")


    features.append(inputs.input_values[0])

    # Encode label
    # labels.append(label_encoder.transform([row['Emotion']]))


# Convert labels to tensors
labels_tensor = torch.tensor(labels).long()  # Use .long() for integer labels, .float() for one-hot

# Print the dimensions of the labels tensor
print(f"Labels tensor dimensions: {labels_tensor.shape}")

# Choose train indices and validation indices
indices = torch.randperm(len(features))
train_indices = indices[:int(0.8 * len(features))]
val_indices = indices[int(0.8 * len(features)):]

# Print the number of training and validation samples
print(f"Number of training samples: {len(train_indices)}")
print(f"Number of validation samples: {len(val_indices)}")


# Convert the TensorDatasets to Datasets
train_dataset = Dataset.from_dict({
    'input_values': [features[i] for i in train_indices],
    'labels': labels_tensor[train_indices]
})
val_dataset = Dataset.from_dict({
    'input_values': [features[i] for i in val_indices],
    'labels': labels_tensor[val_indices]
})

# Print the dimensions of the first feature in the training and validation dataset
print(f"First training sample dimensions: {train_dataset['input_values'][0].shape}")
print(f"First validation sample dimensions: {val_dataset['input_values'][0].shape}")

# Load a pre-trained model for pretrained
model = Wav2Vec2ForSequenceClassification.from_pretrained("facebook/wav2vec2-large-xlsr-53", num_labels=7)

# Define training arguments
# training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")

# Initialize the trainer
# metric = evaluate.load("accuracy")

['anger' 'disgust' 'fear' 'joy' 'neutral' 'sadness' 'surprise']
[0,         1,       2,       3,         4,         5,   6]
[4 4 4 ... 6 4 3]
(9134, 144000)
(9134,)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of Wav2Vec2ForSequenceClassification were not initialized from the model checkpoint at facebook/wav2vec2-large-xlsr-53 and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Prepare the trainer

training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    learning_rate=1e-4,              # Learning rate
    num_train_epochs=3,              # Number of training epochs
    per_device_train_batch_size=4,   # Batch size for training
    per_device_eval_batch_size=8,    # Batch size for evaluation
    gradient_accumulation_steps=2,   # Number of updates steps to accumulate before performing a backward/update pass
    warmup_steps=500,                # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
    save_strategy='steps',               # Saving model checkpoint strategy
    save_steps=500,                      # Save checkpoint every 500 steps
    save_total_limit=3,
    fp16=True     
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

# Train the model
trainer.train()



# Save the model
torch.save(model.state_dict(), 'emotion_recognition_model.pth')

Step,Training Loss
500,1.7115
1000,1.5408
1500,1.5725
2000,1.5768
2500,1.542
3000,1.5706
3500,1.5436
4000,1.5505
4500,1.5593
5000,1.5212


Step,Training Loss
500,1.7115
1000,1.5408
1500,1.5725
2000,1.5768
2500,1.542
3000,1.5706
3500,1.5436
4000,1.5505
4500,1.5593
5000,1.5212


In [None]:
save_path = '/content/drive/My Drive/Thesis_Data/MELD/Run3/model/emotion_recognition_model.pth'
torch.save(model.state_dict(), save_path)

In [None]:
def zip_folder(folder_path, output_path):
    """Zip the contents of an entire folder (with subfolders) into a zip file."""
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, os.path.join(folder_path, '..')))

# Example usage: zipping the 'data' folder into 'data.zip'
# zip_folder('/content/data', '/content/data.zip')
zip_folder('/content/logs', '/content/logs.zip')

In [None]:
zip_folder('/content/results', '/content/results.zip')

In [None]:
save_zip_path = '/content/drive/My Drive/Thesis_Data/MELD/Run3/results/results.zip'

In [None]:
import shutil
shutil.copy('/content/results.zip', save_zip_path)

'/content/drive/My Drive/Thesis_Data/MELD/Run3/results/results.zip'