# Setting up the environment

In [30]:
from google.colab import drive
import os
import shutil
from google.colab import userdata

# Step 1: Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Step 2: Navigate to Your GitHub Repository
repo_path = "/content/drive/MyDrive/colab_repos/Wav2Vec2-vs-HUbert"  # Adjust to your repository path
os.chdir(repo_path)

# Step 3: Set Git User Identity
# Configure Git with your username and email for committing
!git config --global user.name "FilipLarsson12"
!git config --global user.email "hockeyfilip12@gmail.com"

# Step 4: Reset the Git Repository
# Hard reset or delete .git to ensure the repository is clean
!rm -rf .git  # Remove all previous history
!git init  # Reinitialize the Git repository

# Step 5: Rename 'master' to 'main'
# This resolves the branch mismatch error
!git branch -m master main  # Rename the initial branch to 'main'

# Step 6: Configure Git Remote
github_token = userdata.get("github_access_token")  # Retrieve the GitHub Personal Access Token
repo_url = f"https://{github_token}@github.com/FilipLarsson12/Wav2Vec2-vs-HUbert.git"

# Set the Git remote with authentication
!git remote add origin {repo_url}

# Step 7: Stage and Commit Changes
!git add "Wav2Vec2forER KEX.ipynb"  # Adjust to your notebook's name
!git commit -m "Fresh start after removing secrets"

# Step 8: Push to GitHub
# Use force push to overwrite previous history
!git push -f origin main  # Push to 'main'


Mounted at /content/drive
[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/drive/MyDrive/colab_repos/Wav2Vec2-vs-HUbert/.git/
[master (root-commit) 9dc5f16] Fresh start after removing secrets
 1 file changed, 1 insertion(+)
 create mode 100644 Wav2Vec2forER KEX.ipynb
error: src refspec main does not match any
[31merror: failed to push some refs to 'https://github.com/FilipLarsson12/Wav2Vec2-vs-HUbert.git'
[m

In [None]:
%%capture

!pip install git+https://github.com/huggingface/datasets.git
!pip install git+https://github.com/huggingface/transformers.git
!pip install jiwer
!pip install torchaudio
!pip install librosa


In [None]:
%env LC_ALL=C.UTF-8
%env LANG=C.UTF-8
%env TRANSFORMERS_CACHE=/content/cache
%env HF_DATASETS_CACHE=/content/cache
%env CUDA_LAUNCH_BLOCKING=1

In [None]:
# Monitor the training process
!pip install wandb

In [None]:
# # Uncomment this part if you want to setup your wandb project
from google.colab import userdata
import os
wandb_token = userdata.get("WANDB_TOKEN")

%env WANDB_WATCH=all
%env WANDB_LOG_MODEL=1
%env WANDB_PROJECT=Wav2Vec2forER
!wandb login {wandb_token} --relogin  # Use the secret for authentication


# Loading in and preparing the RAVDESS dataset

In [None]:
from datasets import load_dataset

# Set the custom cache directory to your new destination
import os

# Re-load the dataset with the new cache
dataset = load_dataset("narad/ravdess")

In [None]:
import numpy as np
import pandas as pd

from pathlib import Path
from tqdm import tqdm

import torchaudio
from sklearn.model_selection import train_test_split

import os
import sys

In [None]:
import torchaudio
import librosa
import IPython.display as ipd
import numpy as np

In [None]:
print(dataset['train'])

Creating label2id and id2label dictionaries to get easier overview of classes and labels.

In [None]:
# Get information about the dataset
print(dataset['train'].features)
label_names = dataset['train'].features['labels'].names
print(label_names)

# Create a dictionary mapping label names to their corresponding IDs
label2id = {name: idx for idx, name in enumerate(label_names)}

# Create a dictionary mapping label IDs to their corresponding label names
id2label = {idx: name for idx, name in enumerate(label_names)}

# Print the dictionaries
print("Label to ID:", label2id)
print("ID to Label:", id2label)

In [None]:
df = dataset['train'].to_pandas()

In [None]:
df.head()

Adding an emotion column to the Dataframe to make things more clear.

In [None]:
df["emotion"] = df["labels"].map(id2label)

In [None]:
df.head()

Listening to a random sample:

In [None]:
idx = np.random.randint(0, len(df))
sample = df.iloc[idx]

path = sample['audio']["path"]
label = sample["emotion"]


print(f"ID Location: {idx}")
print(f"      Label: {label}")
print()

speech, sr = torchaudio.load(path)
print(path)
print(speech[0])
speech = speech[0].numpy().squeeze()
print(speech)
speech = librosa.resample(y=speech, orig_sr=sr, target_sr=16000)  # Corrected usage
ipd.Audio(data=np.asarray(speech), autoplay=False, rate=16000)

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns
df.groupby('emotion').size().plot(kind='barh', color=sns.palettes.mpl_palette('Dark2'))
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
print("Labels: ", df["emotion"].unique())
print()
df.groupby("emotion").count()['audio']

Restructuring the dataframe a bit for clarity:

In [None]:
df["path"] = df["audio"].apply(lambda audio: audio.get("path", None))


def speech_file_to_array_fn(path):
    speech_array, sampling_rate = torchaudio.load(path)
    resampler = torchaudio.transforms.Resample(sampling_rate, target_sampling_rate)
    speech = resampler(speech_array).squeeze().numpy()
    return speech

target_sampling_rate = 16000

# Creating a new column 'input_values' that contains the speech signal with 16000. hz sampling rate:

df["input_values"] = df["path"].apply(speech_file_to_array_fn)

# Removing the 'audio' column as its redundant
df = df.drop("audio", axis=1)  # axis=1 specifies columns



In [None]:
df.head()

Now we are gonna split the dataset into a train and test split and also save them into content/data as csv files:

In [None]:
import os

save_path = "/content/data"

os.makedirs(save_path, exist_ok=True)

train_df, test_df = train_test_split(df, test_size=0.2, random_state=101, stratify=df["labels"])

train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

train_df.to_csv(f"{save_path}/train.csv", sep="\t", encoding="utf-8", index=False)
test_df.to_csv(f"{save_path}/test.csv", sep="\t", encoding="utf-8", index=False)


print(train_df.shape)
print(test_df.shape)

In [None]:
unique_labels = df["emotion"].unique()
print(unique_labels)
print(f"A classification problem with {len(unique_labels)} emotions.")

# Loading in the Wav2Vec2 model

In [None]:
from transformers import AutoConfig, Wav2Vec2Processor

In [None]:
model_name_or_path = "facebook/Wav2Vec2-Base-960h"
pooling_mode = "mean"

In [None]:
print(label2id)
print(id2label)

# config
config = AutoConfig.from_pretrained(
    model_name_or_path,
    num_labels=len(unique_labels),
    label2id=label2id,
    id2label=id2label,
    finetuning_task="wav2vec2_clf",
)
setattr(config, 'pooling_mode', pooling_mode)
print(config)

In [22]:
print("Hej jag heter Filip")

Hej jag heter Filip
