<a href="https://colab.research.google.com/github/ArtunKARA/MusicEmotionRecognition/blob/main/hubert_large_ls960_ft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

WAV dosyalarını yükleme ve preprocessing

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix, roc_auc_score, roc_curve, auc, matthews_corrcoef
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

# Dosya yollarını belirleyin
base_dir = '/content/drive/MyDrive/Music Data/'
categories = ['Agresif', 'Hüzünlü', 'Neşeli']
sampling_rate = 16000

# WAV dosyalarını yükleme ve preprocessing
def load_data(base_dir, categories, sampling_rate):
    data = []
    labels = []
    max_length = 0

    # Önce tüm dosyaları yükleyip en uzun ses dosyasının boyutunu buluyoruz
    all_audio_files = []
    for label, category in enumerate(categories):
        folder_path = os.path.join(base_dir, category)
        for file_name in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file_name)
            audio, sr = librosa.load(file_path, sr=sampling_rate)
            all_audio_files.append((audio, label))
            if len(audio) > max_length:
                max_length = len(audio)

    # Verileri aynı uzunluğa getiriyoruz
    for audio, label in all_audio_files:
        if len(audio) < max_length:
            padding = max_length - len(audio)
            audio = np.pad(audio, (0, padding), 'constant')
        data.append(audio)
        labels.append(label)

    return np.array(data), np.array(labels)

data, labels = load_data(base_dir, categories, sampling_rate)

# Veriyi eğitim ve test setlerine ayırma
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42, stratify=labels)

print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Test labels shape: {y_test.shape}")




Mounted at /content/drive


HuBert Modelini yükleme ve eğitme

In [None]:
# (Burada HuBert modelinin nasıl eğitileceği ve kullanılacağı detaylandırılmalıdır. Örneğin, transformers kütüphanesi kullanılabilir.)
from transformers import Wav2Vec2Processor, HubertForSequenceClassification
import torch

processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
model = HubertForSequenceClassification.from_pretrained("facebook/hubert-large-ls960-ft", num_labels=len(categories))

# Verileri dönüştürme
def preprocess(data):
    inputs = processor(data, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
    return inputs

train_inputs = preprocess(X_train.tolist())
test_inputs = preprocess(X_test.tolist())

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-large-ls960-ft and are newly initialized: ['classifier.bias', 'classifier.weight', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Eğitim

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
loss_fn = torch.nn.CrossEntropyLoss()

# Eğitim döngüsü
num_epochs = 5
train_losses = []
test_losses = []

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(**train_inputs).logits
    loss = loss_fn(outputs, torch.tensor(y_train))
    loss.backward()
    optimizer.step()
    train_losses.append(loss.item())

    model.eval()
    with torch.no_grad():
        outputs = model(**test_inputs).logits
        loss = loss_fn(outputs, torch.tensor(y_test))
        test_losses.append(loss.item())

    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_losses[-1]}, Test Loss: {test_losses[-1]}")

# Modelin ezberlemediğini kanıtlamak için loss vs epoch grafiği
plt.plot(range(num_epochs), train_losses, label='Train Loss')
plt.plot(range(num_epochs), test_losses, label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  Dosya: /content/drive/MyDrive/Music Data/Hüzünlü/çok eskiden rastlaşacaktık _ playlist (128kbit_AAC)_chunk30.wav
  Dosya: /content/drive/MyDrive/Music Data/Hüzünlü/çok eskiden rastlaşacaktık _ playlist (128kbit_AAC)_chunk39.wav
  Dosya: /content/drive/MyDrive/Music Data/Hüzünlü/çok eskiden rastlaşacaktık _ playlist (128kbit_AAC)_chunk18.wav
  Dosya: /content/drive/MyDrive/Music Data/Hüzünlü/çok eskiden rastlaşacaktık _ playlist (128kbit_AAC)_chunk14.wav
  Dosya: /content/drive/MyDrive/Music Data/Hüzünlü/çok eskiden rastlaşacaktık _ playlist (128kbit_AAC)_chunk12.wav
  Dosya: /content/drive/MyDrive/Music Data/Hüzünlü/çok eskiden rastlaşacaktık _ playlist (128kbit_AAC)_chunk50.wav
  Dosya: /content/drive/MyDrive/Music Data/Hüzünlü/çok eskiden rastlaşacaktık _ playlist (128kbit_AAC)_chunk46.wav
  Dosya: /content/drive/MyDrive/Music Data/Hüzünlü/çok eskiden rastlaşacaktık _ playlist (128kbit_AAC)_chunk24.wav

Modeli değerlendirme

In [None]:
model.eval()
with torch.no_grad():
    test_outputs = model(**test_inputs).logits
    predictions = torch.argmax(test_outputs, dim=1).numpy()

accuracy = accuracy_score(y_test, predictions)
f1 = f1_score(y_test, predictions, average='weighted')
recall = recall_score(y_test, predictions, average='weighted')
precision = precision_score(y_test, predictions, average='weighted')
mcc = matthews_corrcoef(y_test, predictions)
conf_matrix = confusion_matrix(y_test, predictions)

# ROC ve AUC hesaplama
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(categories)):
    fpr[i], tpr[i], _ = roc_curve(y_test == i, predictions == i)
    roc_auc[i] = auc(fpr[i], tpr[i])

# ROC eğrisi çizme
for i in range(len(categories)):
    plt.figure()
    plt.plot(fpr[i], tpr[i], label=f'ROC curve (area = {roc_auc[i]:.2f}) for class {categories[i]}')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic for {categories[i]}')
    plt.legend(loc="lower right")
    plt.show()

# Sonuçları raporlama
results = pd.DataFrame({
    'Metric': ['Accuracy', 'F1 Score', 'Recall', 'Precision', 'MCC'],
    'Score': [accuracy, f1, recall, precision, mcc]
})
print(results)

# Cross-validation uygulama
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cross_val_scores = []

for train_index, val_index in kf.split(data):
    X_train_fold, X_val_fold = data[train_index], data[val_index]
    y_train_fold, y_val_fold = labels[train_index], labels[val_index]

    train_inputs_fold = preprocess(X_train_fold.tolist())
    val_inputs_fold = preprocess(X_val_fold.tolist())

    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(**train_inputs_fold).logits
        loss = loss_fn(outputs, torch.tensor(y_train_fold))
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(**val_inputs_fold).logits
        val_predictions = torch.argmax(val_outputs, dim=1).numpy()

    accuracy_fold = accuracy_score(y_val_fold, val_predictions)
    cross_val_scores.append(accuracy_fold)

print(f"Cross-validation accuracy scores: {cross_val_scores}")
print(f"Mean cross-validation accuracy: {np.mean(cross_val_scores)}")

HuBERT model ve processor yükleniyor...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/291 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.26G [00:00<?, ?B/s]

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-large-ls960-ft and are newly initialized: ['classifier.bias', 'classifier.weight', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HuBERT model ve processor yüklendi.
