# Tone Classifier

### Imports

In [42]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report



from data_loader import extract_features

### Dataset

In [43]:
data_dir = 'data/'

wav_files_list = [f for f in os.listdir(data_dir) if f.endswith('.wav')]
# print(wav_files_list)

In [44]:
# Extract the label out of file names
data = []
for file in wav_files_list:
    parts = file.split('_')
    if len(parts) > 2:
        label = parts[2]
        filepath = os.path.join(data_dir, file)
        data.append({'file': filepath, 'label': label})

df = pd.DataFrame(data)
df.head()

Unnamed: 0,file,label
0,data/1001_DFA_ANG_XX.wav,ANG
1,data/1001_DFA_DIS_XX.wav,DIS
2,data/1001_DFA_FEA_XX.wav,FEA
3,data/1001_DFA_HAP_XX.wav,HAP
4,data/1001_DFA_NEU_XX.wav,NEU


## Tone Classification model

For lightweight tone classification we will use SVM and XGBoost. 
We use LSTMs for sequential tone analysis 

In [45]:
# Extract features
X = []
y = []

for _, row in df.iterrows():
    features = extract_features(row['file'])
    if features is not None:
        X.append(features)
        y.append(row['label'])

X = np.array(X)
y = np.array(y)

In [46]:
# Encode lables (converts emotions to numbers cause SVM only takes numerical values)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [47]:
# Normalize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reshape for LSTM (samples, time_steps=1, features)
X_lstm = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# split train and test
X_train, X_test, y_train, y_test = train_test_split(
    X_lstm, y_encoded, test_size=0.25, stratify=y_encoded, random_state=42
)

In [48]:
# LSTM

# Define the LSTM model
class LSTMToneClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTMToneClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        return self.fc(hn[-1])

In [49]:
# === K-Fold Cross Validation on training set ===
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

input_dim = X_train.shape[2]
hidden_dim = 64
output_dim = len(np.unique(y_encoded))
batch_size = 16
epochs = 15

fold_accuracies = []
best_model = None
best_acc = 0

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
    print(f"\n=== Fold {fold+1}/{n_splits} ===")

    X_fold_train = X_train[train_idx]
    y_fold_train = y_train[train_idx]
    X_fold_val = X_train[val_idx]
    y_fold_val = y_train[val_idx]

    # Loaders
    train_dataset = TensorDataset(torch.tensor(X_fold_train, dtype=torch.float32),
                                  torch.tensor(y_fold_train, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_fold_val, dtype=torch.float32),
                                torch.tensor(y_fold_val, dtype=torch.long))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    # Initialize model
    model = LSTMToneClassifier(input_dim, hidden_dim, output_dim)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    for epoch in range(epochs):
        model.train()
        running_train_loss = 0.0
        for xb, yb in train_loader:
            optimizer.zero_grad()
            outputs = model(xb)
            loss = criterion(outputs, yb)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()

        avg_train_loss = running_train_loss / len(train_loader)

        # Validation loop
        model.eval()
        all_preds, all_labels = [], []
        val_loss_total = 0.0
        with torch.no_grad():
            for xb, yb in val_loader:
                outputs = model(xb)
                loss = criterion(outputs, yb)
                val_loss_total += loss.item()
                _, predicted = torch.max(outputs, 1)
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(yb.cpu().numpy())

        avg_val_loss = val_loss_total / len(val_loader)
        acc = accuracy_score(all_labels, all_preds)

        print(f"Epoch {epoch+1:02d}/{epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {acc:.4f}")

    # Fold summary
    fold_acc = accuracy_score(all_labels, all_preds)
    fold_accuracies.append(fold_acc)

    if fold_acc > best_acc:
        best_acc = fold_acc
        best_model = model

    print("Classification Report:\n", classification_report(all_labels, all_preds, target_names=le.classes_))

print(f"\n=== Average Accuracy over {n_splits} folds: {np.mean(fold_accuracies):.4f} ± {np.std(fold_accuracies):.4f}")



=== Fold 1/5 ===
Epoch 01/15 | Train Loss: 1.5737 | Val Loss: 1.4608 | Val Acc: 0.4163
Epoch 02/15 | Train Loss: 1.4061 | Val Loss: 1.3891 | Val Acc: 0.4512
Epoch 03/15 | Train Loss: 1.3552 | Val Loss: 1.3615 | Val Acc: 0.4682
Epoch 04/15 | Train Loss: 1.3330 | Val Loss: 1.3464 | Val Acc: 0.4790
Epoch 05/15 | Train Loss: 1.3174 | Val Loss: 1.3366 | Val Acc: 0.4754
Epoch 06/15 | Train Loss: 1.3036 | Val Loss: 1.3310 | Val Acc: 0.4727
Epoch 07/15 | Train Loss: 1.2939 | Val Loss: 1.3248 | Val Acc: 0.4709
Epoch 08/15 | Train Loss: 1.2832 | Val Loss: 1.3186 | Val Acc: 0.4861
Epoch 09/15 | Train Loss: 1.2742 | Val Loss: 1.3122 | Val Acc: 0.4790
Epoch 10/15 | Train Loss: 1.2670 | Val Loss: 1.3065 | Val Acc: 0.4745
Epoch 11/15 | Train Loss: 1.2570 | Val Loss: 1.3037 | Val Acc: 0.4781
Epoch 12/15 | Train Loss: 1.2493 | Val Loss: 1.3004 | Val Acc: 0.4781
Epoch 13/15 | Train Loss: 1.2419 | Val Loss: 1.2943 | Val Acc: 0.4754
Epoch 14/15 | Train Loss: 1.2360 | Val Loss: 1.2903 | Val Acc: 0.4790
Ep