In [1]:
import os

## dataset

In [2]:
from torch.utils.data import DataLoader

In [3]:
import torchaudio
from torch.utils.data import Dataset

class HeartAudioDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None, sample_rate=22050, duration=60):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform
        self.sample_rate = sample_rate
        self.duration = duration
        self.max_len = sample_rate * duration

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        waveform, sr = torchaudio.load(self.file_paths[idx])

        if waveform.shape[0] > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)

        if waveform.shape[1] > self.max_len:
            waveform = waveform[:, :self.max_len]
        else:
            pad = self.max_len - waveform.shape[1]
            waveform = torch.nn.functional.pad(waveform, (0, pad))

        if self.transform:
            waveform = self.transform(waveform)
        return waveform, torch.tensor(self.labels[idx])


In [4]:
directory_cdpd = '../data/CDPD/training_data/'

def load_cdpd_paths(file_names):
    paths = []
    for file_name in tqdm(file_names):
        paths.append(f"{directory_cdpd}{file_name}_combined.wav")
    return paths

def load_acd_paths(directory):
    paths = []

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".wav"):
                paths.append(os.path.join(root, file))
    return paths

##### cdpd

In [5]:
from tqdm import tqdm

In [6]:
# load paths and labels
import pandas as pd
df = pd.read_csv('../data/CDPD/training_data.csv') 
df = df[df['Locations'].isin(['AV+PV+TV+MV', 'AV+PV+MV', 'AV+AV+PV+PV+TV+MV', 'AV+MV+MV', 
                                        'AV+PV+MV+Phc+Phc', 'AV+PV+TV+TV+MV', 'AV+AV+MV+MV', 'AV+AV+PV+TV+MV', 'AV+PV+TV+MV+Phc', 'AV+AV+AV+MV', 'AV+AV+PV+TV+MV+MV', ])&(df['Murmur']!='Unknown')]
# df[df['Patient ID']==50321]
df = df.drop([266])
# df[df['Patient ID']==50321]

In [7]:
df

Unnamed: 0,Patient ID,Locations,Age,Sex,Height,Weight,Pregnancy status,Murmur,Murmur locations,Most audible location,...,Systolic murmur grading,Systolic murmur pitch,Systolic murmur quality,Diastolic murmur timing,Diastolic murmur shape,Diastolic murmur grading,Diastolic murmur pitch,Diastolic murmur quality,Campaign,Additional ID
0,2530,AV+PV+TV+MV,Child,Female,98.0,15.9,False,Absent,,,...,,,,,,,,,CC2015,
1,9979,AV+PV+TV+MV,Child,Female,103.0,13.1,False,Present,AV+MV+PV+TV,TV,...,III/VI,High,Harsh,,,,,,CC2015,
3,13918,AV+PV+TV+MV,Child,Male,98.0,15.9,False,Present,TV,TV,...,I/VI,Low,Blowing,,,,,,CC2015,
4,14241,AV+PV+TV+MV,Child,Male,87.0,11.2,False,Present,AV+MV+PV+TV,PV,...,II/VI,Low,Harsh,,,,,,CC2015,
5,14998,AV+PV+TV+MV,Child,Male,,,False,Absent,,,...,,,,,,,,,CC2015,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
934,85337,AV+PV+TV+MV,Child,Male,130.0,27.3,False,Absent,,,...,,,,,,,,,CC2015,
935,85338,AV+PV+TV+MV,,Female,,,True,Absent,,,...,,,,,,,,,CC2015,
937,85340,AV+PV+TV+MV,Child,Male,105.0,16.6,False,Absent,,,...,,,,,,,,,CC2015,
938,85341,AV+PV+TV+MV,Child,Male,92.0,15.2,False,Absent,,,...,,,,,,,,,CC2015,


In [8]:
file_names_cdpd = list(df['Patient ID']) # 0 - normal, 1 - patology
cdpd_paths = load_cdpd_paths(file_names_cdpd)

cdpd_labels = list(df['Murmur'].apply(lambda x: 0 if x=='Absent' else 1))

  0%|          | 0/594 [00:00<?, ?it/s]

100%|██████████| 594/594 [00:00<00:00, 3141761.13it/s]


##### acd

In [10]:
patients = "../data/Пациенты.csv"
norm_dir = "../data/норма"
patology_dir = "../data/патология"

cda_paths = load_acd_paths(norm_dir)
norm = len(cda_paths)
cda_paths.extend(load_acd_paths(patology_dir))
patology = len(cda_paths) - norm

cda_labels = [0]* norm + [1] * patology

#### dataset

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(cdpd_paths, cdpd_labels, test_size=0.2, random_state=42)
X_train_cda, X_test_cda, y_train_cda, y_test_cda = train_test_split(cda_paths, cda_labels, test_size=0.2, random_state=42)


X_train_cdpd_cda = X_train + X_train_cda
y_train_cdpd_cda = y_train + y_train_cda
X_test_cdpd_cda = X_test + X_test_cda
y_test_cdpd_cda = y_test + y_test_cda

In [13]:
from binary_datasets_ import HeartSoundDataset

In [78]:
# train_dataset = (X_train_cdpd_cda, y_train_cdpd_cda)
# val_dataset = HeartAudioDataset(X_test_cdpd_cda,y_test_cdpd_cda)HeartAudioDataset

In [14]:
TARGET_SR = 16000
MAX_DURATION_SEC = 30 # Максимальная длина для обработки
N_MELS = 64 # Для примера, ResNet18 не очень глубокий, много мел-компонент может быть избыточно
N_FFT = 1024
HOP_LENGTH = 512

In [15]:
train_dataset_resnet = HeartSoundDataset(
        audio_paths=X_train_cdpd_cda,
        labels=y_train_cdpd_cda,
        target_sample_rate=TARGET_SR,
        max_duration_seconds=MAX_DURATION_SEC,
        n_mels=N_MELS,
        n_fft=N_FFT,
        hop_length=HOP_LENGTH,
        resnet_mode=True
    )
val_dataset_resnet = HeartSoundDataset(
        audio_paths=X_test_cdpd_cda,
        labels=y_test_cdpd_cda,
        target_sample_rate=TARGET_SR,
        max_duration_seconds=MAX_DURATION_SEC,
        n_mels=N_MELS,
        n_fft=N_FFT,
        hop_length=HOP_LENGTH,
        resnet_mode=True
    )

In [16]:
train_dataset_bilstm = HeartSoundDataset(
        audio_paths=X_train_cdpd_cda,
        labels=y_train_cdpd_cda,
        target_sample_rate=TARGET_SR,
        max_duration_seconds=MAX_DURATION_SEC,
        n_mels=N_MELS,
        n_fft=N_FFT,
        hop_length=HOP_LENGTH,
        resnet_mode=False # Важно для BiLSTM
    )
val_dataset_bilstm = HeartSoundDataset(
        audio_paths=X_test_cdpd_cda,
        labels=y_test_cdpd_cda,
        target_sample_rate=TARGET_SR,
        max_duration_seconds=MAX_DURATION_SEC,
        n_mels=N_MELS,
        n_fft=N_FFT,
        hop_length=HOP_LENGTH,
        resnet_mode=False # Важно для BiLSTM
    )

In [17]:
train_loader = DataLoader(train_dataset_resnet, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset_resnet, batch_size=64, shuffle=True)

In [18]:
train_lstm_loader = DataLoader(train_dataset_bilstm, batch_size=64, shuffle=True)
val_lstm_loader = DataLoader(val_dataset_bilstm, batch_size=64, shuffle=True)

#### models

In [19]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.models import resnet18
import torchaudio.transforms as T

def train_model(model, train_loader, val_loader, num_epochs=10, name='', lr=1e-4):
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    model.to(device)
    criterion = nn.CrossEntropyLoss() # ммм блин ладно справедливо конкретный лосс тут скорее придирка
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    best_acc = 0.0

    for epoch in tqdm(range(num_epochs)):
        model.train()
        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            preds = model(x_batch)
            loss = criterion(preds, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Валидация
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for x_val, y_val in val_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                outputs = model(x_val)
                _, preds = torch.max(outputs, 1)

                correct += (preds == y_val).sum().item()
                total += y_val.size(0)
        acc = correct / total
        print(f"Epoch {epoch+1}/{num_epochs}, Val Acc: {acc:.4f}")
        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), f"./models/best_{name}_model.pth")


In [21]:
from sklearn.metrics import accuracy_score, f1_score

In [22]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch.nn.functional as F

def specificity_score(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    if cm.shape == (2, 2):
        tn, fp = cm[0, 0], cm[0, 1]
        return tn / (tn + fp + 1e-10)
    return 0

def evaluate_model(model, dataloader, name, num_classes=2):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for x, y in dataloader:
            x = x.to(device)
            y = y.to(device)
            out = model(x)
            pred = torch.argmax(out, dim=1)
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    prec = precision_score(all_labels, all_preds, average='macro')
    rec = recall_score(all_labels, all_preds, average='macro')
    spec = specificity_score(all_labels, all_preds)
    return {"Model": name, "Accuracy": acc, "F1": f1, "Precision": prec, "Recall": rec, "Specificity": spec}


In [23]:
def predict(model_path, audio_path, num_classes=2):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = HeartSoundResNet(num_classes=num_classes)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    model.to(device)

    waveform, sr = torchaudio.load(audio_path)
    mel_spec = T.MelSpectrogram(sample_rate=sr, n_mels=128)(waveform)
    mel_spec = mel_spec[:, :, :500]  # ограничим длину
    mel_spec = mel_spec.to(device)

    with torch.no_grad():
        output = model(mel_spec)
        pred = torch.argmax(output, dim=1).item()
    return pred


In [32]:
def train_and_evaluate_all(models, train_loader, val_loader, num_classes=2, epochs=10):
    results = []
    for name, model in models.items():
        print(f"Training {name}...")
        # model.load_state_dict(torch.load(f"./models/best_{name}_model.pth"))
        train_model(model, train_loader, val_loader, num_epochs=epochs, name=name)
        model.load_state_dict(torch.load(f"./models/best_{name}_model.pth"))  # можно переименовать
        result = evaluate_model(model, val_loader, name, num_classes=num_classes)
        results.append(result)
    return pd.DataFrame(results)


In [25]:
from nn_models import  HeartSoundResNet, HeartSoundBiLSTM, AttentionCNN

In [31]:
models = {
    'HeartSoundResNet': HeartSoundResNet(),#HeartSoundResNet(),
    # 'HeartSoundBiLSTM': HeartSoundBiLSTM(),
    # 'AttentionCNN': AttentionCNN(),
    }

#### train

In [33]:
results = train_and_evaluate_all(models, train_loader, val_loader)

Training HeartSoundResNet...


 10%|█         | 1/10 [01:25<12:46, 85.17s/it]

Epoch 1/10, Val Acc: 0.7846


 20%|██        | 2/10 [02:42<10:45, 80.65s/it]

Epoch 2/10, Val Acc: 0.8462


 30%|███       | 3/10 [03:53<08:52, 76.03s/it]

Epoch 3/10, Val Acc: 0.8308


 40%|████      | 4/10 [05:23<08:09, 81.59s/it]

Epoch 4/10, Val Acc: 0.8462


 50%|█████     | 5/10 [06:51<06:59, 83.81s/it]

Epoch 5/10, Val Acc: 0.8846


In [27]:
results = train_and_evaluate_all({'HeartSoundBiLSTM': HeartSoundBiLSTM()}, train_lstm_loader, val_lstm_loader, epochs=20)

Training HeartSoundBiLSTM...


  5%|▌         | 1/20 [01:14<23:26, 74.02s/it]

Epoch 1/20, Val Acc: 0.7231


 10%|█         | 2/20 [02:29<22:28, 74.94s/it]

Epoch 2/20, Val Acc: 0.7231


 15%|█▌        | 3/20 [03:34<19:55, 70.32s/it]

Epoch 3/20, Val Acc: 0.7231


 20%|██        | 4/20 [04:55<19:55, 74.70s/it]

Epoch 4/20, Val Acc: 0.7231


 25%|██▌       | 5/20 [06:06<18:17, 73.20s/it]

Epoch 5/20, Val Acc: 0.7231


 30%|███       | 6/20 [07:28<17:47, 76.26s/it]

Epoch 6/20, Val Acc: 0.7231


 35%|███▌      | 7/20 [08:53<17:07, 79.03s/it]

Epoch 7/20, Val Acc: 0.7231


 35%|███▌      | 7/20 [09:03<16:49, 77.64s/it]


KeyboardInterrupt: 