In [7]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder

### Data Preparation

In [8]:


# Custom Dataset to load the .npy files
class AccentDataset(Dataset):
    def __init__(self, feature_dir):
        self.feature_dir = feature_dir
        self.files = []
        self.labels = []

        # Get all .npy files and their corresponding folder names (targets)
        for folder in os.listdir(feature_dir):
            folder_path = os.path.join(feature_dir, folder)
            if os.path.isdir(folder_path):
                for file in os.listdir(folder_path):
                    if file.endswith('.npy'):
                        self.files.append(os.path.join(folder_path, file))
                        self.labels.append(folder)
        
        # Encode the folder names (categories) to numeric labels
        self.label_encoder = LabelEncoder()
        self.labels = self.label_encoder.fit_transform(self.labels)
    
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        file_path = self.files[idx]
        features = np.load(file_path)  # Load the .npy file
        features = torch.tensor(features).float()  # Convert to tensor
        label = torch.tensor(self.labels[idx]).long()  # Get label
        return features, label


### Model Setup

In [9]:
import torch.nn as nn
import torch.optim as optim

In [10]:


class SimpleConformer(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(SimpleConformer, self).__init__()
        self.conformer = nn.Sequential(
            nn.Conv1d(768, 32, kernel_size=3, stride=1, padding=1),  # Example Conv layer
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
        )
        self.flattened_size = 64 * 162
        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(self.flattened_size, 128),
            nn.ReLU(),
            nn.Linear(128, 3),  # Output layer for 3 categories
        )
    def forward(self, x):
        x = self.conformer(x)
        x = x.view(x.size(0), -1)  # Flatten
        out = self.fc(x)
        return out
# Example usage
model = SimpleConformer(208002,3)
input_tensor = torch.randn(16, 768, 649)  # Batch of 16, 768 channels, sequence length 649
output = model(input_tensor)
print(output.shape) 

def compute_accuracy(preds, labels):
    _, predicted = torch.max(preds, 1)  # Get the index of the max log-probability
    correct = (predicted == labels).sum().item()  # Count correct predictions
    accuracy = correct / labels.size(0)
    return accuracy


torch.Size([16, 3])


### Training the Model

In [6]:
dataset = AccentDataset(feature_dir='TrialDataset')
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

# Define the model, loss function, and optimizer
input_dim = 208002  # Your input dimension (sequence length)
num_classes = 3  # Number of accent categories
model = SimpleConformer(input_dim, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    
    for features, labels in dataloader:
        features, labels = features.to(device), labels.to(device)
        
        
        features = features.squeeze(1)
        features = features.transpose(1, 2)  
        
        # Forward pass
        outputs = model(features)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
            
            # Compute accuracy
        accuracy = compute_accuracy(outputs, labels)
        correct_predictions += accuracy * labels.size(0)
        total_predictions += labels.size(0)
    
    epoch_loss = running_loss / total_predictions
    epoch_accuracy = correct_predictions / total_predictions
    
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')
        
print('Training completed!')


Epoch 1/10, Loss: 0.0663, Accuracy: 0.4156
Epoch 2/10, Loss: 0.0636, Accuracy: 0.4720
Epoch 3/10, Loss: 0.0599, Accuracy: 0.5255
Epoch 4/10, Loss: 0.0564, Accuracy: 0.5724
Epoch 5/10, Loss: 0.0528, Accuracy: 0.6083
Epoch 6/10, Loss: 0.0490, Accuracy: 0.6417
Epoch 7/10, Loss: 0.0453, Accuracy: 0.6719
Epoch 8/10, Loss: 0.0416, Accuracy: 0.7015
Epoch 9/10, Loss: 0.0380, Accuracy: 0.7295
Epoch 10/10, Loss: 0.0348, Accuracy: 0.7511
Training completed!


In [8]:
torch.save(model.state_dict(), 'model_3cat')
print("Model saved successfully.")

Model saved successfully.


### Load Test Data and Perform Evaluation

In [11]:
input_dim = 208002  # Your input dimension (sequence length)
num_classes = 3  # Number of accent categories

In [12]:
# Load the model for testing/evaluation
model = SimpleConformer(input_dim, num_classes)
model.load_state_dict(torch.load('model_3cat'))
model.eval()  # Set to evaluation mode


  model.load_state_dict(torch.load('model_3cat'))


SimpleConformer(
  (conformer): Sequential(
    (0): Conv1d(768, 32, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (4): ReLU()
    (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=10368, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=3, bias=True)
  )
)

In [13]:
import numpy as np
import os

In [24]:
class AccentDataset(Dataset):
    def __init__(self, feature_dir):
        self.feature_dir = feature_dir
        self.files = []
        self.labels = []

        # Get all .npy files and their corresponding folder names (targets)
        for folder in os.listdir(feature_dir):
            folder_path = os.path.join(feature_dir, folder)
            if os.path.isdir(folder_path):
                for file in os.listdir(folder_path):
                    if file.endswith('.npy'):
                        self.files.append(os.path.join(folder_path, file))
                        self.labels.append(folder)
        
        # Encode the folder names (categories) to numeric labels
        self.label_encoder = LabelEncoder()
        self.labels = self.label_encoder.fit_transform(self.labels)
    
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        file_path = self.files[idx]
        features = np.load(file_path)  # Load the .npy file
        features = torch.tensor(features).float()  # Convert to tensor
        features = features.squeeze(0)  # Add channel dimension if needed
        features = features.permute(1, 0)
        label = torch.tensor(self.labels[idx]).long()  # Get label
        return features, label

In [15]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix,classification_report

In [16]:
def evaluate_model(model, test_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    report = classification_report(all_labels, all_preds)
    conf_matrix = confusion_matrix(all_labels, all_preds)

    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Classification Report:")
    print(report)
    print("Confusion Matrix:")
    print(conf_matrix)

    return accuracy, report, conf_matrix

In [25]:
test_dir = 'TrialDataset'

# Hyperparameters
batch_size = 16
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load test dataset
test_dataset = AccentDataset(test_dir)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Load your model (replace 'YourModelClass' with the actual class of your model)
model = SimpleConformer(input_dim, num_classes)
model.load_state_dict(torch.load('model_3cat'))
model.to(device)

# Evaluate the model
evaluate_model(model, test_loader, device)

  model.load_state_dict(torch.load('model_3cat'))


Accuracy: 79.64%
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.74      0.78     10047
           1       0.75      0.84      0.79      9486
           2       0.83      0.81      0.82      7000

    accuracy                           0.80     26533
   macro avg       0.80      0.80      0.80     26533
weighted avg       0.80      0.80      0.80     26533

Confusion Matrix:
[[7466 1893  688]
 [1018 7969  499]
 [ 542  763 5695]]


(0.7963667885274941,
 '              precision    recall  f1-score   support\n\n           0       0.83      0.74      0.78     10047\n           1       0.75      0.84      0.79      9486\n           2       0.83      0.81      0.82      7000\n\n    accuracy                           0.80     26533\n   macro avg       0.80      0.80      0.80     26533\nweighted avg       0.80      0.80      0.80     26533\n',
 array([[7466, 1893,  688],
        [1018, 7969,  499],
        [ 542,  763, 5695]], dtype=int64))