In [18]:
import os
import librosa
import numpy as np
import librosa.display
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, f1_score, roc_auc_score

In [2]:
def audio_fixer(files_path):
    zcr_list = []
    for path in files_path:
        audio_data, _ = librosa.load(path)
        if len(audio_data) == 0:
            continue
        zcr = librosa.feature.zero_crossing_rate(audio_data)
        zcr_list.append(zcr)

    return zcr_list.copy()

In [3]:
disorder_speech_folder = 'C:/Users/Работяга/Desktop/disorder voices speech/Dataset/Disorder Voices/Disorder Voices'
healthy_speech_folder = 'C:/Users/Работяга/Desktop/disorder voices speech/Dataset/Normal Voices/Normal Voices'

In [4]:
all_healthy_files_path = [os.path.join(healthy_speech_folder, f) for f in os.listdir(healthy_speech_folder) if f.endswith('.wav')]
all_disorder_files_path = [os.path.join(disorder_speech_folder, f) for f in os.listdir(disorder_speech_folder) if f.endswith('.wav')]

In [5]:
zcr_healthy = audio_fixer(all_healthy_files_path)

In [6]:
zcr_disorder = audio_fixer(all_disorder_files_path)

In [7]:
def dataset_fixer(data):
    max_len = 0
    for i in data:
        data_len = len(i[0])
        if data_len > max_len:
            max_len = data_len
    new_dataset = []
    for i in data:
        zeros_to_append = max_len - len(i[0])
        if zeros_to_append != 0:
            zeros = np.zeros(zeros_to_append, dtype=np.float64)
            new_dataset.append(np.copy(np.hstack([i[0], zeros])))
        else:
            new_dataset.append(np.copy(i[0]))
    return new_dataset

In [8]:
# Метки данных
labels_healthy = torch.zeros(len(zcr_healthy), dtype=torch.long)  # 0 для здоровых
labels_disorder = torch.ones(len(zcr_disorder), dtype=torch.long)  # 1 для больных

# Объединение данных и меток
dataset = zcr_healthy + zcr_disorder
labels = torch.cat((labels_healthy, labels_disorder))

dataset = dataset_fixer(dataset)

train_dataset, test_dataset, train_labels, test_labels = train_test_split(dataset, labels, test_size=0.2, random_state=42)

In [9]:
train_dataset = [torch.tensor(data, dtype=torch.float32) for data in train_dataset]
test_dataset = [torch.tensor(data, dtype=torch.float32) for data in test_dataset]

In [10]:
# Определяем кастомный Dataset
class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Создаем экземпляры Dataset для тренировки и тестирования
train_data = CustomDataset(train_dataset, train_labels)
test_data = CustomDataset(test_dataset, test_labels)

In [11]:
# Создаем DataLoader для тренировки и тестирования
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=True)

In [12]:
# GRU Model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

In [21]:
# Model parameters
input_size = len(train_dataset[0])  # Input size should be the number of features
hidden_size = 128
output_size = 2
num_layers = 2

# Initialize model, loss function, and optimizer
model = GRUModel(input_size, hidden_size, output_size, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# Train the model
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        for data, labels in train_loader:
            data, labels = data.to(device), labels.to(device)
            data = data.unsqueeze(1)  # Add batch dimension: (batch_size, sequence_length, input_size)
            outputs = model(data)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


In [22]:
# Train and evaluate
train_model(model, train_loader, criterion, optimizer)

Epoch [1/10], Loss: 0.2942
Epoch [2/10], Loss: 0.1315
Epoch [3/10], Loss: 0.0302
Epoch [4/10], Loss: 0.0294
Epoch [5/10], Loss: 0.1075
Epoch [6/10], Loss: 0.0814
Epoch [7/10], Loss: 0.0163
Epoch [8/10], Loss: 0.1335
Epoch [9/10], Loss: 0.0079
Epoch [10/10], Loss: 0.0012


In [23]:
def evaluate_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    all_labels = []
    all_predictions = []
    all_probabilities = []

    with torch.no_grad():
        for data, labels in test_loader:
            data, labels = data.to(device), labels.to(device)
            data = data.unsqueeze(1)  # Add batch dimension: (batch_size, sequence_length, input_size)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            
            # Collect all predictions and labels
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())
            
            # If outputs contain probabilities, collect them for ROC AUC
            if outputs.size(1) > 1:  # Check if it's not binary classification
                probabilities = torch.nn.functional.softmax(outputs, dim=1)[:, 1]
            else:
                probabilities = torch.sigmoid(outputs)
            all_probabilities.extend(probabilities.cpu().numpy())

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    precision = precision_score(all_labels, all_predictions, average='binary')
    f1 = f1_score(all_labels, all_predictions, average='binary')
    roc_auc = roc_auc_score(all_labels, all_probabilities)

    print(f'Accuracy: {accuracy:.2f}%')
    print(f'Precision: {precision:.2f}')
    print(f'F1 Score: {f1:.2f}')
    print(f'ROC AUC: {roc_auc:.2f}')

In [24]:
evaluate_model(model, test_loader, device)

Accuracy: 99.12%
Precision: 0.98
F1 Score: 0.99
ROC AUC: 1.00
