In [2]:
import os
import torch
import torchaudio
import numpy as np
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
from dataclasses import dataclass
from typing import List, Dict
import json
import re
import pandas as pd

In [3]:
import warnings
warnings.filterwarnings('ignore')

**DATA LOADING**

The Surrey Audio-Visual Expressed Emotion (SAVEE) dataset consists of 480 recordings from 4 male actors, speaking sentences in a British English accent, is chosen for this study. It is an enacted audio dataset which consists of seven different emotions, such as angry, disgust, happy, neutral, sad, fear, and surprise. The audio files are in .wav format (waveform audio file). Dataset is loaded directly from kaggle

In [4]:
!pip install -q kaggle
#Loading savee dataset from kaggle
#Savee dataset
savee_dataset_name= "barelydedicated/savee-database"

# Download the dataset (zipped file) from kaggle path
!kaggle datasets download -d {savee_dataset_name}

# Unzip the dataset
!unzip -q savee-database.zip -d savee_dataset

# Verify the dataset
!ls savee_dataset


Dataset URL: https://www.kaggle.com/datasets/barelydedicated/savee-database
License(s): copyright-authors
Downloading savee-database.zip to /content
 94% 202M/215M [00:01<00:00, 178MB/s]
100% 215M/215M [00:01<00:00, 165MB/s]
audiodata  AudioData


**WAV2VEC2.0**

The Wav2Vec2.0 is a model developed by Facebook AI for speech recognition. it accpets raw audio waverform as input and generates text as output. The audio waverform is sampled to a sampling rate of 16KHZ. Normalization and metadata extraction is performed before the input to the model.The models appends all the text along wiht its emotion and saves it in josn file for future use


In [5]:
from huggingface_hub import login
login(token="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")

In [6]:
@dataclass
class AudioTranscription:
    filename: str
    emotion: str
    transcription: str

In [7]:
# converting audio from ravdess and savee dataset into text using wav2vec2.0 model
class SAVEE_Processor:
    def __init__(self,savee_dataset_path: str):
        self.savee_dataset_path = savee_dataset_path  # Added path for SAVEE dataset
        self.processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") # loading wav2vec2 model
        self.model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)


       #mapping emotion labels for ravdess dataset
        self.savee_emotion_map = {
            "a": "angry",
            "d": "disgust",
            "f": "fearful",
            "h": "happy",
            "n": "neutral",
            "sa": "sad",
            "su": "surprised"
        }

    def parse_savee_filename(self, filename: str) -> Dict[str, str]:
        """Parse SAVEE filename to extract metadata (emotion label from file name)."""
        parts = filename.split('.')[0]
        emotion_code = re.sub(r'\d+', '', parts).lower()
        return {
            'emotion': self.savee_emotion_map.get(emotion_code, 'unknown'),
        }

    def load_audio(self, file_path: str) -> torch.Tensor:
        """Loading and preprocess audio files."""
        try:
            waveform, sample_rate = torchaudio.load(file_path)

            # Convert to mono if stereo
            if waveform.shape[0] > 1:
                waveform = torch.mean(waveform, dim=0, keepdim=True)

            # Resample to 16kHz
            if sample_rate != 16000:
                resampler = torchaudio.transforms.Resample(
                    orig_freq=sample_rate,
                    new_freq=16000
                )
                waveform = resampler(waveform)

            return waveform.squeeze()
        except Exception as e:
            print(f"Error loading {file_path}: {str(e)}")
            return None

    def transcribe_audio(self, waveform: torch.Tensor) -> str:
        """Transcribe audio using wav2vec2."""
        try:
            # Normalize waveform
            waveform = (waveform - waveform.mean()) / torch.sqrt(waveform.var() + 1e-7)

            # Prepare input
            inputs = self.processor(
                waveform,
                sampling_rate=16000,
                return_tensors="pt",
                padding=True
            )

            # Move input to device
            input_values = inputs.input_values.to(self.device)

            # Get prediction
            with torch.no_grad():
                logits = self.model(input_values).logits

            # Decode prediction
            predicted_ids = torch.argmax(logits, dim=-1)
            transcription = self.processor.batch_decode(predicted_ids)

            return transcription[0]
        except Exception as e:
            print(f"Transcription error: {str(e)}")
            return ""


    def process_savee_data(self) -> List[AudioTranscription]:
        """Process all audio files in SAVEE dataset."""
        transcriptions = []

        for root, _, files in os.walk(self.savee_dataset_path):
            for file in files:
                if file.endswith(".wav"):
                    try:
                        file_path = os.path.join(root, file)
                        print(f"Processing SAVEE: {file_path}")

                        # Load and transcribe audio
                        waveform = self.load_audio(file_path)
                        if waveform is None:
                            continue

                        # Get transcription
                        transcription = self.transcribe_audio(waveform)

                        # Parse filename for metadata
                        metadata = self.parse_savee_filename(file)

                        # Store results
                        transcriptions.append(AudioTranscription(
                            filename=file,
                            emotion=metadata['emotion'],
                            transcription=transcription
                        ))

                    except Exception as e:
                        print(f"Error processing SAVEE {file}: {str(e)}")
                        continue

        return transcriptions


In [8]:
def main():
    # Initialize processor
    processor = SAVEE_Processor("savee_dataset/AudioData")

    # Process dataset
    results = processor.process_savee_data()

    # Display some results
    print("\nSample transcriptions:")
    for result in results[:5]:
        print(f"\nFile: {result.filename}")
        print(f"Emotion: {result.emotion}")
        print(f"Transcription: {result.transcription}")

    # Save results to file
    output_data = [vars(r) for r in results]
    with open("transcriptions.json", "w") as f:
        json.dump(output_data, f, indent=2)

In [9]:
if __name__ == "__main__":
    main()

preprocessor_config.json:   0%|          | 0.00/159 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/163 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/291 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/378M [00:00<?, ?B/s]

Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Processing SAVEE: savee_dataset/AudioData/JK/n03.wav
Processing SAVEE: savee_dataset/AudioData/JK/n19.wav
Processing SAVEE: savee_dataset/AudioData/JK/f09.wav
Processing SAVEE: savee_dataset/AudioData/JK/a02.wav
Processing SAVEE: savee_dataset/AudioData/JK/h10.wav
Processing SAVEE: savee_dataset/AudioData/JK/h12.wav
Processing SAVEE: savee_dataset/AudioData/JK/n05.wav
Processing SAVEE: savee_dataset/AudioData/JK/d03.wav
Processing SAVEE: savee_dataset/AudioData/JK/n25.wav
Processing SAVEE: savee_dataset/AudioData/JK/n01.wav
Processing SAVEE: savee_dataset/AudioData/JK/h09.wav
Processing SAVEE: savee_dataset/AudioData/JK/su13.wav
Processing SAVEE: savee_dataset/AudioData/JK/n04.wav
Processing SAVEE: savee_dataset/AudioData/JK/n15.wav
Processing SAVEE: savee_dataset/AudioData/JK/f04.wav
Processing SAVEE: savee_dataset/AudioData/JK/h13.wav
Processing SAVEE: savee_dataset/AudioData/JK/f10.wav
Processing SAVEE: savee_dataset/AudioData/JK/su04.wav
Processing SAVEE: savee_dataset/AudioData/JK

**RoBERTa**

RoBERTa model is an optimized version of BERT. The transcriptions.json file is given as the input to this model where the emotions are classifed and performac eof the model is evaluated with metrics such as accuracy,precision,recall, f1-scopre and a confusion matrix.

In [10]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import (
    RobertaTokenizer,
    RobertaForSequenceClassification,
    AdamW,
    get_linear_schedule_with_warmup
)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_recall_fscore_support
from sklearn.utils.class_weight import compute_class_weight
import json
from typing import List, Dict, Tuple
import matplotlib.pyplot as plt
import seaborn as sns
from torch.nn import functional as F
from tqdm import tqdm
import logging
import nltk
import warnings

In [11]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Suppress specific warnings
warnings.filterwarnings("ignore", message="Some weights of RobertaForSequenceClassification")

# Download required NLTK data upfront
try:
    nltk.download('wordnet', quiet=True)
    nltk.download('averaged_perceptron_tagger', quiet=True)
    nltk.download('omw-1.4', quiet=True)
except Exception as e:
    logging.warning(f"NLTK download warning: {str(e)}")



In [12]:
class EmotionDataset(Dataset):

    def __init__(self, texts: List[str], labels: List[int], tokenizer, max_length: int = 128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }


In [13]:
class EmotionClassifier:
    def __init__(self,
                 num_labels: int,
                 model_name: str = "roberta-base",
                 max_length: int = 128,
                 batch_size: int = 8,
                 learning_rate: float = 1e-5,
                 epochs: int = 15):

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        logging.info(f"Using device: {self.device}")

        self.num_labels = num_labels
        self.model_name = model_name
        self.max_length = max_length
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epochs = epochs

        # Initialize tokenizer
        logging.info("Loading tokenizer...")
        self.tokenizer = RobertaTokenizer.from_pretrained(model_name)

        # Initialize model with progress tracking
        logging.info("Loading model...")
        self.model = RobertaForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels,
        ).to(self.device)

        self.label_mapping = {
            'neutral': 0,
            'happy': 1,
            'sad': 2,
            'angry': 3,
            'fearful': 4,
            'disgust': 5,
            'surprised': 6
        }

        # Initialize metrics history
        self.metrics_history = {
            'train_loss': [],
            'val_loss': [],
            'val_accuracy': [],
            'val_f1': []
        }

    def prepare_data(self, transcriptions_file: str) -> Tuple[DataLoader, DataLoader, DataLoader]:
        """Prepare train, validation, and test datasets."""
        logging.info("Loading and preparing data...")

        try:
            with open(transcriptions_file, 'r') as f:
                data = json.load(f)
        except Exception as e:
            logging.error(f"Error loading transcriptions file: {str(e)}")
            raise

        df = pd.DataFrame(data)
        df['label'] = df['emotion'].map(self.label_mapping)



        # Print class distribution
        logging.info("\nInitial class distribution:")
        print(df['emotion'].value_counts())

        # Split data
        train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42, stratify=df['label'])
        val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df['label'])

        # Compute class weights
        class_weights = compute_class_weight(
            'balanced',
            classes=np.unique(train_df['label']),
            y=train_df['label']
        )
        self.class_weights = torch.FloatTensor(class_weights).to(self.device)

        logging.info("\nClass weights:")
        for emotion, weight in zip(self.label_mapping.keys(), class_weights):
            print(f"{emotion}: {weight:.2f}")

        # Create datasets
        train_dataset = EmotionDataset(
            train_df['transcription'].tolist(),
            train_df['label'].tolist(),
            self.tokenizer,
            self.max_length
        )

        val_dataset = EmotionDataset(
            val_df['transcription'].tolist(),
            val_df['label'].tolist(),
            self.tokenizer,
            self.max_length
        )

        test_dataset = EmotionDataset(
            test_df['transcription'].tolist(),
            test_df['label'].tolist(),
            self.tokenizer,
            self.max_length
        )

        # Create dataloaders
        train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=self.batch_size)
        test_loader = DataLoader(test_dataset, batch_size=self.batch_size)

        logging.info(f"Data splits: Train={len(train_dataset)}, Val={len(val_dataset)}, Test={len(test_dataset)}")

        return train_loader, val_loader, test_loader

    def evaluate(self, dataloader: DataLoader, desc: str = "Evaluating") -> Tuple[float, Dict]:
        """Evaluate the model and return detailed metrics."""
        self.model.eval()
        total_loss = 0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            progress_bar = tqdm(dataloader, desc=desc)
            for batch in progress_bar:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                outputs = self.model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    labels=labels
                )

                loss = outputs.loss
                if self.class_weights is not None:
                    loss = F.cross_entropy(outputs.logits, labels, weight=self.class_weights)

                total_loss += loss.item()

                preds = torch.argmax(outputs.logits, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})

        # Calculate metrics
        avg_loss = total_loss / len(dataloader)
        metrics = self.calculate_metrics(all_labels, all_preds)

        return avg_loss, metrics

    def calculate_metrics(self, true_labels: List[int], predicted_labels: List[int]) -> Dict:
        """Calculate and return comprehensive evaluation metrics."""
        # Convert label indices to emotion names for better readability
        label_names = list(self.label_mapping.keys())

        # Calculate basic metrics
        accuracy = accuracy_score(true_labels, predicted_labels)

        # Generate classification report
        clf_report = classification_report(
            true_labels,
            predicted_labels,
            target_names=label_names,
            output_dict=True
        )

        # Calculate per-class metrics
        precision, recall, f1, support = precision_recall_fscore_support(
            true_labels,
            predicted_labels,
            average=None,
            labels=range(len(label_names))
        )

        # Create confusion matrix
        conf_matrix = confusion_matrix(true_labels, predicted_labels)

        metrics = {
            'accuracy': accuracy,
            'classification_report': clf_report,
            'confusion_matrix': conf_matrix,
            'true_labels': true_labels,
            'predicted_labels': predicted_labels,
            'per_class_metrics': {
                emotion: {
                    'precision': prec,
                    'recall': rec,
                    'f1': f1_score,
                    'support': sup
                }
                for emotion, prec, rec, f1_score, sup
                in zip(label_names, precision, recall, f1, support)
            }
        }

        return metrics

    def plot_confusion_matrix(self, conf_matrix: np.ndarray, title: str = "Confusion Matrix"):
        """Plot and save confusion matrix heatmap."""
        plt.figure(figsize=(12, 10))
        sns.heatmap(
            conf_matrix,
            annot=True,
            fmt='d',
            cmap='Blues',
            xticklabels=self.label_mapping.keys(),
            yticklabels=self.label_mapping.keys()
        )
        plt.title(title)
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.tight_layout()
        plt.savefig(f'{title.lower().replace(" ", "_")}.png')
        plt.close()

    def plot_metrics(self, metrics_history: Dict):
        """Plot training metrics over time."""
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

        # Plot losses
        ax1.plot(metrics_history['train_loss'], label='Training Loss')
        ax1.plot(metrics_history['val_loss'], label='Validation Loss')
        ax1.set_title('Loss History')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Loss')
        ax1.legend()

        # Plot validation metrics
        ax2.plot(metrics_history['val_accuracy'], label='Accuracy')
        ax2.plot(metrics_history['val_f1'], label='F1 Score')
        ax2.set_title('Validation Metrics History')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('Score')
        ax2.legend()

        plt.tight_layout()
        plt.savefig('training_metrics.png')
        plt.close()

    def train_model(self, train_loader: DataLoader, val_loader: DataLoader):
        """Train the model with enhanced metrics tracking."""
        logging.info("Starting training...")

        optimizer = AdamW(
            self.model.parameters(),
            lr=self.learning_rate,
            weight_decay=0.01
        )

        total_steps = len(train_loader) * self.epochs
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=total_steps // 10,
            num_training_steps=total_steps
        )

        best_val_loss = float('inf')
        best_val_f1 = 0
        patience = 3
        patience_counter = 0

        for epoch in range(self.epochs):
            # Training phase
            self.model.train()
            total_train_loss = 0
            progress_bar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{self.epochs} [Training]')

            for batch in progress_bar:
                optimizer.zero_grad()

                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                outputs = self.model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    labels=labels
                )

                loss = outputs.loss
                if self.class_weights is not None:
                    loss = F.cross_entropy(outputs.logits, labels, weight=self.class_weights)

                total_train_loss += loss.item()

                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()

                progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})

            avg_train_loss = total_train_loss / len(train_loader)
            self.metrics_history['train_loss'].append(avg_train_loss)

            # Validation phase
            val_loss, val_metrics = self.evaluate(
                val_loader,
                desc=f'Epoch {epoch + 1}/{self.epochs} [Validation]'
            )

            self.metrics_history['val_loss'].append(val_loss)
            self.metrics_history['val_accuracy'].append(val_metrics['accuracy'])
            self.metrics_history['val_f1'].append(val_metrics['classification_report']['macro avg']['f1-score'])

            logging.info(f"\nEpoch {epoch + 1}")
            logging.info(f"Average training loss: {avg_train_loss:.4f}")
            logging.info(f"Validation loss: {val_loss:.4f}")
            logging.info(f"Validation accuracy: {val_metrics['accuracy']:.4f}")
            logging.info(f"Validation macro F1: {val_metrics['classification_report']['macro avg']['f1-score']:.4f}")

            # Save best model based on F1 score
            current_f1 = val_metrics['classification_report']['macro avg']['f1-score']
            if current_f1 > best_val_f1:
                logging.info("Saving best model...")
                best_val_f1 = current_f1
                patience_counter = 0
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': self.model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'val_loss': val_loss,
                    'val_f1': current_f1,
                }, 'best_emotion_model.pth')
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    logging.info("Early stopping triggered")
                    break

        # Plot final metrics
        self.plot_metrics(self.metrics_history)

        return self.metrics_history

In [14]:
def main():
    try:
        classifier = EmotionClassifier(
            num_labels=7,
            model_name="roberta-base",
            max_length=128,
            batch_size=8,
            learning_rate=1e-5,
            epochs=15
        )

        # Prepare datasets and dataloaders
        logging.info("Loading and preparing data...")
        train_loader, val_loader, test_loader = classifier.prepare_data("transcriptions.json")

        # Train the model
        logging.info("Starting training process...")
        history = classifier.train_model(train_loader, val_loader)

        # Final evaluation on test set
        logging.info("\nPerforming final evaluation on test set...")
        test_loss, test_metrics = classifier.evaluate(test_loader, desc="Final Evaluation")

        # Print final metrics
        logging.info("\nFinal Test Results:")
        logging.info(f"Test Loss: {test_loss:.4f}")
        logging.info(f"Test Accuracy: {test_metrics['accuracy']:.4f}")
        logging.info("\nClassification Report:")
        print(classification_report(
            test_metrics['true_labels'],
            test_metrics['predicted_labels'],
            target_names=classifier.label_mapping.keys()
        ))

        # Plot confusion matrix
        classifier.plot_confusion_matrix(
            test_metrics['confusion_matrix'],
            title="Test Set Confusion Matrix"
        )

        # Save final evaluation results
        logging.info("\nSaving evaluation results...")
        results = {
            'test_loss': test_loss,
            'test_metrics': {
                'accuracy': test_metrics['accuracy'],
                'classification_report': test_metrics['classification_report'],
                'confusion_matrix': test_metrics['confusion_matrix'].tolist()
            },
            'training_history': history
        }

        with open('evaluation_results.json', 'w') as f:
            json.dump(results, f, indent=4)

        logging.info("Evaluation results saved to 'evaluation_results.json'")

    except Exception as e:
        logging.error(f"An error occurred: {str(e)}")
        raise

In [15]:
if __name__ == "__main__":
    main()

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


emotion
neutral      120
fearful       60
angry         60
happy         60
disgust       60
surprised     60
sad           60
Name: count, dtype: int64
neutral: 0.57
happy: 1.14
sad: 1.14
angry: 1.14
fearful: 1.14
disgust: 1.14
surprised: 1.14


Epoch 1/15 [Training]: 100%|██████████| 42/42 [08:59<00:00, 12.85s/it, loss=1.9607]
Epoch 1/15 [Validation]: 100%|██████████| 9/9 [00:32<00:00,  3.57s/it, loss=1.9800]
Epoch 2/15 [Training]: 100%|██████████| 42/42 [08:57<00:00, 12.80s/it, loss=1.8910]
Epoch 2/15 [Validation]: 100%|██████████| 9/9 [00:32<00:00,  3.66s/it, loss=1.9653]
Epoch 3/15 [Training]: 100%|██████████| 42/42 [08:39<00:00, 12.37s/it, loss=1.9436]
Epoch 3/15 [Validation]: 100%|██████████| 9/9 [00:31<00:00,  3.45s/it, loss=1.9580]
Epoch 4/15 [Training]: 100%|██████████| 42/42 [08:43<00:00, 12.47s/it, loss=1.8724]
Epoch 4/15 [Validation]: 100%|██████████| 9/9 [00:32<00:00,  3.64s/it, loss=1.9202]
Epoch 5/15 [Training]: 100%|██████████| 42/42 [08:59<00:00, 12.84s/it, loss=1.4686]
Epoch 5/15 [Validation]: 100%|██████████| 9/9 [00:34<00:00,  3.80s/it, loss=1.8619]
Epoch 6/15 [Training]: 100%|██████████| 42/42 [08:58<00:00, 12.82s/it, loss=1.1725]
Epoch 6/15 [Validation]: 100%|██████████| 9/9 [00:32<00:00,  3.57s/it, loss=

              precision    recall  f1-score   support

     neutral       0.54      0.39      0.45        18
       happy       0.50      0.67      0.57         9
         sad       0.62      0.56      0.59         9
       angry       0.78      0.78      0.78         9
     fearful       1.00      0.56      0.71         9
     disgust       0.78      0.78      0.78         9
   surprised       0.56      1.00      0.72         9

    accuracy                           0.64        72
   macro avg       0.68      0.67      0.66        72
weighted avg       0.66      0.64      0.63        72

