In [46]:
import tensorflow_hub as hub
import tensorflow as tf
import numpy as np
import librosa
import pandas as pd
import os
import urllib.request
from sklearn.model_selection import train_test_split

In [47]:
# ---- Load YAMNet Model and Class Labels ----
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')
LABELS_URL = "https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/yamnet/yamnet_class_map.csv"


In [48]:
def load_labels(url):
    with urllib.request.urlopen(url) as f:
        lines = f.read().decode('utf-8').splitlines()
    return [line.split(',')[2] for line in lines][1:]  # Extract class names

class_labels = load_labels(LABELS_URL)


In [49]:
# ---- Audio Processing ----
def preprocess_audio(file_path, target_sr=16000):
    waveform, sr = librosa.load(file_path, sr=target_sr)
    waveform = waveform / np.max(np.abs(waveform))  # Normalize
    return waveform, sr

def extract_features(waveform, sr=16000, n_mels=128, target_width=128):
    mel_spectrogram = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=n_mels, fmax=8000)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)
    
    current_width = log_mel_spectrogram.shape[-1]
    if current_width < target_width:
        pad_width = target_width - current_width
        log_mel_spectrogram = np.pad(log_mel_spectrogram, ((0, 0), (0, pad_width)), mode='constant')
    elif current_width > target_width:
        log_mel_spectrogram = log_mel_spectrogram[:, :target_width]
    
    return log_mel_spectrogram




In [50]:
# ---- Load Data ----
def load_data(dataset_folder, set_a_csv, set_b_csv):
    df_a = pd.read_csv(set_a_csv)
    df_b = pd.read_csv(set_b_csv)
    
    audio_files = []
    labels = []
    
    for _, row in df_a.iterrows():
        if 'filename' in df_a.columns:
            file_path = os.path.join(dataset_folder, 'set_a', row['filename'])
            if os.path.exists(file_path):
                audio_files.append(file_path)
                labels.append(row['label'])
    
    for _, row in df_b.iterrows():
        if 'filename' in df_b.columns:
            file_path = os.path.join(dataset_folder, 'set_b', row['filename'])
            if os.path.exists(file_path):
                audio_files.append(file_path)
                labels.append(row['label'])
    
    return audio_files, labels


In [51]:
# ---- Custom Dataset Class ----
class HeartbeatDataset(tf.keras.utils.Sequence):
    def __init__(self, audio_files, labels, batch_size=32, target_sr=16000, shuffle=True):
        self.audio_files = audio_files
        self.labels = labels
        self.batch_size = batch_size
        self.target_sr = target_sr
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.audio_files))
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.floor(len(self.audio_files) / self.batch_size))
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    def __getitem__(self, index):
        batch_indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        batch_audio_files = [self.audio_files[k] for k in batch_indexes]
        batch_labels = [self.labels[k] for k in batch_indexes]
        X, y = self.__data_generation(batch_audio_files, batch_labels)
        return X, y
    
    def __data_generation(self, batch_audio_files, batch_labels):
        batch_labels = np.array(batch_labels)
        X = np.empty((self.batch_size, 128, 128))
        y = np.empty((self.batch_size, batch_labels.shape[1]))
        
        for i, audio_file in enumerate(batch_audio_files):
            waveform, sr = preprocess_audio(audio_file, self.target_sr)
            features = extract_features(waveform, sr)
            X[i,] = features
            y[i,] = batch_labels[i]
        
        return X, y


In [None]:
# ---- Train Model ----
def train_heartbeat_model(train_audio_files, train_labels, val_audio_files, val_labels):
    train_labels = np.array(train_labels)
    val_labels = np.array(val_labels)
    
    num_classes = len(set(train_labels))
    train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=num_classes)
    val_labels = tf.keras.utils.to_categorical(val_labels, num_classes=num_classes)
    
    train_dataset = HeartbeatDataset(train_audio_files, train_labels, batch_size=32, shuffle=True)
    val_dataset = HeartbeatDataset(val_audio_files, val_labels, batch_size=32, shuffle=False)
    
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(128, 128, 1)),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(train_dataset, validation_data=val_dataset, epochs=10)
    return model


NameError: name 'train_audio_files' is not defined

In [56]:
# Save the trained model
train_heartbeat_model.save("heartbeat_model.h5")  # Saves in HDF5 format
print("Model saved successfully!")


AttributeError: 'function' object has no attribute 'save'

In [37]:
# ---- Example Usage ----
dataset_folder = r"C:\Users\ronit\Desktop\desktop\projects\health prediction\heart"
set_a_csv = r"C:\Users\ronit\Desktop\desktop\projects\health prediction\heart\set_a_timing.csv"
set_b_csv = r"C:\Users\ronit\Desktop\desktop\projects\health prediction\heart\set_b.csv"

In [38]:
# Load data
audio_files, labels = load_data(dataset_folder, set_a_csv, set_b_csv)


In [39]:
print(f"Total audio files loaded: {len(audio_files)}")
print(f"Total labels loaded: {len(labels)}")


Total audio files loaded: 0
Total labels loaded: 0


In [42]:
if len(audio_files) == 0:
    raise ValueError("No audio files found. Check dataset structure.")


ValueError: No audio files found. Check dataset structure.

In [None]:
# Split into training and validation sets
train_audio_files, val_audio_files, train_labels, val_labels = train_test_split(audio_files, labels, test_size=0.2, random_state=42)

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [43]:
print(f"Total audio files loaded: {len(audio_files)}")
print(f"Total labels loaded: {len(labels)}")


Total audio files loaded: 0
Total labels loaded: 0


In [44]:
set_a_df = pd.read_csv(set_a_csv)
set_b_df = pd.read_csv(set_b_csv)
print(set_a_df.head())  # Ensure 'filename' column exists
print(set_b_df.head())


                            fname  cycle sound  location
0  set_a/normal__201102081321.wav      1    S1     10021
1  set_a/normal__201102081321.wav      1    S2     20759
2  set_a/normal__201102081321.wav      2    S1     35075
3  set_a/normal__201102081321.wav      2    S2     47244
4  set_a/normal__201102081321.wav      3    S1     62992
  dataset                                              fname       label  \
0       b  set_b/Btraining_extrastole_127_1306764300147_C...  extrastole   
1       b  set_b/Btraining_extrastole_128_1306344005749_A...  extrastole   
2       b  set_b/Btraining_extrastole_130_1306347376079_D...  extrastole   
3       b  set_b/Btraining_extrastole_134_1306428161797_C...  extrastole   
4       b  set_b/Btraining_extrastole_138_1306762146980_B...  extrastole   

  sublabel  
0      NaN  
1      NaN  
2      NaN  
3      NaN  
4      NaN  


In [45]:
for _, row in data_df.iterrows():
    file_path = os.path.join(dataset_folder, row['filename'])  # Adjust if needed
    print(f"Checking file: {file_path}")
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")


NameError: name 'data_df' is not defined

In [None]:
# Train model
heartbeat_model = train_heartbeat_model(train_audio_files, train_labels, val_audio_files, val_labels)