In [8]:
import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer
from torch.utils.data import DataLoader, Dataset
from transformers import BertForSequenceClassification, BertTokenizerFast, AdamW
from sklearn.metrics import f1_score, accuracy_score
import pandas as pd
import time

import nlpaug.augmenter.word as naw
import random
from transformers import get_linear_schedule_with_warmup
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.utils import class_weight
import numpy as np
from tqdm import tqdm
from transformers import get_linear_schedule_with_warmup


# Load train and test data
train_data = pd.read_csv("../data/train_data.csv")
test_data = pd.read_csv("../data/test_data.csv")

print(train_data.head())

# Split train and test data into features and targets
train_features = train_data["lyrics"]
train_targets = train_data["most_common_genre"]

test_features = test_data["lyrics"]
test_targets = test_data["most_common_genre"]

def model_summary(model):
    print("Model summary:")
    print("---------------------------")
    total_params = 0
    for name, param in model.named_parameters():
        param_count = param.numel()
        total_params += param_count
    print(f"Total parameters: {total_params}")

categories = sorted(list(train_targets.unique()))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class LyricsDataset(Dataset):
    def __init__(self, data, labels, augment=False, augmentation_rate=0.1):
        self.data = data
        self.labels = labels
        self.augment = augment
        self.augmentation_rate = augmentation_rate
        self.category_to_index = {category: index for index, category in enumerate(categories)}
        
        self.augmenter = naw.SynonymAug(aug_src='wordnet')

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        lyrics = self.data.iloc[index]
        label = self.labels.iloc[index]
        label_index = torch.tensor(self.category_to_index[label], dtype=torch.long).to(device)

        if self.augment:
            lyrics = self.augmenter.augment(lyrics)
            
        return lyrics, label_index

batchsize = 256
learning_rate = 1e-5
train_dataset = LyricsDataset(train_features, train_targets)
test_dataset = LyricsDataset(test_features, test_targets)
train_dataloader = DataLoader(train_dataset, batch_size=batchsize, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False)
class_weights = class_weight.compute_class_weight(
    class_weight = "balanced",
    classes = np.unique(train_targets),
    y = train_targets  
)
class_weights = torch.FloatTensor(class_weights).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)
# Load BERT model
model = BertForSequenceClassification.from_pretrained("../models/lyrics-bert/", num_labels=len(train_dataset.category_to_index))
tokenizer = BertTokenizerFast.from_pretrained("../models/lyrics-bert/")
model.to(device)

optimizer = AdamW(model.parameters(), lr=learning_rate)

# Metrics to store
metrics = {
    "train_accuracy": [],
    "test_accuracy": [],
    "f1_score": [],
    "train_loss": [],
    "test_loss": [],
    "train_time_per_step": [],
    "eval_time_per_step": []
}

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"The model has {trainable_params} trainable parameters.")

scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_dataloader)*10)

for epoch in range(10):  # Number of epochs
    start_time = time.time()
    # Training
    model.train()
    total_train_loss = 0
    correct_train_preds = 0
    for batch in tqdm(train_dataloader, desc="Training"):
        b_input_ids, b_labels = batch
        b_input_ids = tokenizer(b_input_ids, padding=True, truncation=True, max_length=512, return_tensors='pt').input_ids.to(device)
        optimizer.zero_grad()
        outputs = model(b_input_ids, labels=b_labels)
        loss = outputs.loss
        total_train_loss += loss.item()
        pred = torch.argmax(outputs.logits, dim=1)
        correct_train_preds += (pred == b_labels).sum().item()
        loss.backward()
        # Gradient clipping to avoid exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_train_loss / len(train_dataloader)
    train_acc = correct_train_preds / len(train_dataset)
    metrics["train_loss"].append(avg_train_loss)
    metrics["train_accuracy"].append(train_acc)

    # Time per step
    train_time = time.time() - start_time
    train_time_per_step = train_time / len(train_dataloader)
    metrics["train_time_per_step"].append(train_time_per_step)

    # Evaluation
    model.eval()
    total_eval_loss = 0
    correct_test_preds = 0
    all_preds, all_labels = [], []

    start_time = time.time()
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            b_input_ids, b_labels = batch
            b_input_ids = tokenizer(b_input_ids, padding=True, truncation=True, max_length=512, return_tensors='pt').input_ids.to(device)
            outputs = model(b_input_ids, labels=b_labels)
            loss = outputs.loss
            total_eval_loss += loss.item()
            pred = torch.argmax(outputs.logits, dim=1)
            correct_test_preds += (pred == b_labels).sum().item()
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(b_labels.cpu().numpy())

    avg_test_loss = total_eval_loss / len(test_dataloader)
    test_acc = correct_test_preds / len(test_dataset)
    f1 = f1_score(all_labels, all_preds, average='weighted')

    metrics["test_loss"].append(avg_test_loss)
    metrics["test_accuracy"].append(test_acc)
    metrics["f1_score"].append(f1)

    # Time per step
    eval_time = time.time() - start_time
    eval_time_per_step = eval_time / len(test_dataloader)
    metrics["eval_time_per_step"].append(eval_time_per_step)

    print(f"Epoch: {epoch+1} Train Accuracy: {train_acc:.4f} Test Accuracy: {test_acc:.4f} F1 Score: {f1:.4f} Train Loss: {avg_train_loss:.4f} Test Loss: {avg_test_loss:.4f} Train Time/Step: {train_time_per_step:.4f} Eval Time/Step: {eval_time_per_step:.4f}")


# Save metrics to CSV
# Save metrics to CSV
df = pd.DataFrame(metrics)
df.to_csv("full-fine-tuned-lyrics-bert.csv", index=False)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../models/lyrics-bert/ and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


                       id                                             lyrics  \
0  1FAmKoufyAXMfzPPs9bsjA  i tied my bandana took my pack from the floor ...   
1  3QvPEv8XjHa73iYhaienWw  i want to live on the moon never see a human a...   
2  5VPFATm85G3P04Q5g8yxqr  bitch you know you can t parallel park anyway ...   
3  7J2jCftItt7htcOUdcMnpt  graceless falling slipping in the cold with no...   
4  4cBPzVIbDIQx0LIyauFAy0  madame morse estate stood five hundred years p...   

       artist_name most_common_genre  \
0  Waylon Jennings           country   
1   Phantom Planet              rock   
2    Isaiah Rashad           hip-hop   
3     Matt Pond PA             indie   
4       Ariel Pink               pop   

                                          genre_list  
0  ['country', 'country', 'rock', 'outlaw', 'coun...  
1                                    ['pop', 'rock']  
2  ['hip-hop', 'rap', 'tennessee', 'hip-hop', 'un...  
3                                ['philly', 'indie']  
4  

Training: 100%|██████████| 75/75 [00:33<00:00,  2.23it/s]
Evaluating: 100%|██████████| 19/19 [00:04<00:00,  4.61it/s]


Epoch: 1 Train Accuracy: 0.2114 Test Accuracy: 0.2337 F1 Score: 0.0885 Train Loss: 2.1918 Test Loss: 2.1423 Train Time/Step: 0.4475 Eval Time/Step: 0.2171


Training: 100%|██████████| 75/75 [00:34<00:00,  2.19it/s]
Evaluating: 100%|██████████| 19/19 [00:04<00:00,  4.61it/s]


Epoch: 2 Train Accuracy: 0.2361 Test Accuracy: 0.2337 F1 Score: 0.0885 Train Loss: 2.1370 Test Loss: 2.1415 Train Time/Step: 0.4559 Eval Time/Step: 0.2173


Training: 100%|██████████| 75/75 [00:34<00:00,  2.16it/s]
Evaluating: 100%|██████████| 19/19 [00:04<00:00,  4.54it/s]


Epoch: 3 Train Accuracy: 0.2362 Test Accuracy: 0.2337 F1 Score: 0.0885 Train Loss: 2.1355 Test Loss: 2.1418 Train Time/Step: 0.4624 Eval Time/Step: 0.2205


Training: 100%|██████████| 75/75 [00:34<00:00,  2.19it/s]
Evaluating: 100%|██████████| 19/19 [00:04<00:00,  4.61it/s]


Epoch: 4 Train Accuracy: 0.2368 Test Accuracy: 0.2337 F1 Score: 0.0885 Train Loss: 2.1353 Test Loss: 2.1421 Train Time/Step: 0.4556 Eval Time/Step: 0.2171


Training: 100%|██████████| 75/75 [00:34<00:00,  2.19it/s]
Evaluating: 100%|██████████| 19/19 [00:04<00:00,  4.51it/s]


Epoch: 5 Train Accuracy: 0.2361 Test Accuracy: 0.2337 F1 Score: 0.0885 Train Loss: 2.1360 Test Loss: 2.1419 Train Time/Step: 0.4571 Eval Time/Step: 0.2218


Training: 100%|██████████| 75/75 [00:34<00:00,  2.20it/s]
Evaluating: 100%|██████████| 19/19 [00:04<00:00,  4.50it/s]


Epoch: 6 Train Accuracy: 0.2362 Test Accuracy: 0.2337 F1 Score: 0.0885 Train Loss: 2.1354 Test Loss: 2.1419 Train Time/Step: 0.4552 Eval Time/Step: 0.2227


Training: 100%|██████████| 75/75 [00:33<00:00,  2.22it/s]
Evaluating: 100%|██████████| 19/19 [00:03<00:00,  5.01it/s]


Epoch: 7 Train Accuracy: 0.2372 Test Accuracy: 0.2337 F1 Score: 0.0885 Train Loss: 2.1351 Test Loss: 2.1418 Train Time/Step: 0.4502 Eval Time/Step: 0.1998


Training: 100%|██████████| 75/75 [00:34<00:00,  2.18it/s]
Evaluating: 100%|██████████| 19/19 [00:04<00:00,  4.50it/s]


Epoch: 8 Train Accuracy: 0.2367 Test Accuracy: 0.2337 F1 Score: 0.0885 Train Loss: 2.1331 Test Loss: 2.1419 Train Time/Step: 0.4595 Eval Time/Step: 0.2222


Training: 100%|██████████| 75/75 [00:33<00:00,  2.24it/s]
Evaluating: 100%|██████████| 19/19 [00:03<00:00,  4.81it/s]


Epoch: 9 Train Accuracy: 0.2366 Test Accuracy: 0.2337 F1 Score: 0.0885 Train Loss: 2.1348 Test Loss: 2.1417 Train Time/Step: 0.4464 Eval Time/Step: 0.2081


Training: 100%|██████████| 75/75 [00:34<00:00,  2.15it/s]
Evaluating: 100%|██████████| 19/19 [00:04<00:00,  4.66it/s]

Epoch: 10 Train Accuracy: 0.2374 Test Accuracy: 0.2337 F1 Score: 0.0885 Train Loss: 2.1350 Test Loss: 2.1418 Train Time/Step: 0.4644 Eval Time/Step: 0.2149



