<a href="https://colab.research.google.com/github/ArtunKARA/MusicEmotionRecognition/blob/main/hubert_large_ls960_ft.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Drive İle Bağlantı

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


Veri Hazırlama

In [None]:
import os
import torchaudio
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB

# Veri yolu
data_dir = '/content/drive/MyDrive/Music Data/'

# Veri seti hazırlama
labels = ['Agresif', 'Hüzünlü', 'Neşeli']
data = []
for label in labels:
    folder_path = os.path.join(data_dir, f"{label} Müzikler")
    for filename in os.listdir(folder_path):
        if filename.endswith('.wav'):
            filepath = os.path.join(folder_path, filename)
            waveform, sample_rate = torchaudio.load(filepath)
            data.append((waveform, label))

# MelSpectrogram dönüştürme
transform = MelSpectrogram()
data = [(transform(waveform), label) for waveform, label in data]


HuBert Modelini Eğitme


In [None]:
from transformers import Wav2Vec2Processor, HubertForSequenceClassification
import torch
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

# HuBert modeli ve tokenizer'ı yükleme
processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft")
model = HubertForSequenceClassification.from_pretrained("facebook/hubert-large-ls960-ft", num_labels=3)

# Custom Dataset sınıfı
class AudioDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        waveform, label = self.data[idx]
        inputs = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt")
        return inputs.input_values, torch.tensor(labels.index(label))

# Veriyi train ve test setlerine ayırma
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# DataLoader
train_dataset = AudioDataset(train_data, labels)
test_dataset = AudioDataset(test_data, labels)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8)

Model Performansını Değerlendirme

In [None]:
from torch import nn, optim
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, roc_auc_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt

# Eğitim döngüsü
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=5e-5)

num_epochs = 5
train_losses = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    train_losses.append(avg_loss)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss}")

# Kayıp grafiği
plt.plot(range(num_epochs), train_losses)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss vs Epochs')
plt.show()

# Model değerlendirme
model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs).logits
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.numpy())
        y_pred.extend(preds.numpy())

# Metikler
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
auc_score = roc_auc_score(y_true, model(torch.tensor(inputs)).logits, multi_class='ovr')

# ROC curve
fpr = {}
tpr = {}
roc_auc = {}

for i in range(3):
    fpr[i], tpr[i], _ = roc_curve(y_true, model(torch.tensor(inputs)).logits[:, i], pos_label=i)
    roc_auc[i] = auc(fpr[i], tpr[i])

# ROC Curve grafiği
plt.figure()
for i in range(3):
    plt.plot(fpr[i], tpr[i], label=f'ROC curve (area = {roc_auc[i]:.2f}) for class {labels[i]}')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc='lower right')
plt.show()

Sonuçların Raporlanması

In [None]:
import pandas as pd

# Sonuçları bir DataFrame'de toplayalım
results = pd.DataFrame({
    'Metric': ['Accuracy', 'F-measure', 'Recall', 'Precision', 'AUC'],
    'Score': [accuracy, f1, recall, precision, auc_score]
})

print(results)