<a href="https://colab.research.google.com/github/RohitSh26/pytorch-learn/blob/master/cry.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!git clone https://github.com/gveres/donateacry-corpus.git

Cloning into 'donateacry-corpus'...
remote: Enumerating objects: 1616, done.[K
remote: Total 1616 (delta 0), reused 0 (delta 0), pack-reused 1616 (from 1)[K
Receiving objects: 100% (1616/1616), 67.06 MiB | 46.46 MiB/s, done.
Resolving deltas: 100% (43/43), done.


In [13]:
!pip install coremltools
import coremltools as ct

Collecting coremltools
  Downloading coremltools-8.1-cp310-none-manylinux1_x86_64.whl.metadata (2.5 kB)
Collecting cattrs (from coremltools)
  Downloading cattrs-24.1.2-py3-none-any.whl.metadata (8.4 kB)
Collecting pyaml (from coremltools)
  Downloading pyaml-24.12.1-py3-none-any.whl.metadata (12 kB)
Downloading coremltools-8.1-cp310-none-manylinux1_x86_64.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cattrs-24.1.2-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-24.12.1-py3-none-any.whl (25 kB)
Installing collected packages: pyaml, cattrs, coremltools
Successfully installed cattrs-24.1.2 coremltools-8.1 pyaml-24.12.1




In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import numpy as np
import librosa
import os
import joblib
# import streamlit as st
# import sounddevice as sd
from scipy.io.wavfile import write

In [39]:
class InfantCryTraining:
    """Class responsible for training the model in Google Colab."""

    def __init__(self, sample_rate=16000):
        self.sample_rate = sample_rate
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        self.model = None

    class CryDataset(Dataset):
        def __init__(self, features, labels):
            self.features = torch.tensor(features, dtype=torch.float32)
            self.labels = torch.tensor(labels, dtype=torch.long)

        def __len__(self):
            return len(self.labels)

        def __getitem__(self, idx):
            return self.features[idx], self.labels[idx]

    def extract_features(self, file_path):
        """Extract features from audio files."""
        try:
            audio, sr = librosa.load(file_path, sr=self.sample_rate)
            mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
            mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr)
            chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
            features = np.concatenate([
                np.mean(mfccs.T, axis=0),
                np.mean(mel_spec.T, axis=0),
                np.mean(chroma.T, axis=0)
            ])
            return features
        except Exception as e:
            print(f"Error processing file {file_path}: {e}")
            return None

    def load_data(self, data_path):
        """Load audio data and labels from the dataset path."""
        features = []
        labels = []
        for category in os.listdir(data_path):
            category_path = os.path.join(data_path, category)
            if os.path.isdir(category_path):
                for file in os.listdir(category_path):
                    file_path = os.path.join(category_path, file)
                    if os.path.isfile(file_path):
                        feature = self.extract_features(file_path)
                        if feature is not None:
                            features.append(feature)
                            labels.append(category)

        # Check if features and labels are not empty
        if not features or not labels:
            raise ValueError("No valid data found. Please check the dataset path and file format.")

        features = np.array(features)
        labels = self.label_encoder.fit_transform(labels)
        self.scaler.fit(features)
        features = self.scaler.transform(features)

        return features, labels

    def build_model(self, input_size, num_classes):
        class CryModel(nn.Module):
            def __init__(self, input_size, num_classes):
                super(CryModel, self).__init__()
                self.fc1 = nn.Linear(input_size, 256)
                self.dropout1 = nn.Dropout(0.3)
                self.fc2 = nn.Linear(256, 128)
                self.dropout2 = nn.Dropout(0.3)
                self.fc3 = nn.Linear(128, num_classes)

            def forward(self, x):
                x = torch.relu(self.fc1(x))
                x = self.dropout1(x)
                x = torch.relu(self.fc2(x))
                x = self.dropout2(x)
                x = self.fc3(x)
                return x

        self.model = CryModel(input_size, num_classes)

    def train(self, features, labels, input_size, num_classes, epochs=30, batch_size=32):
        X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)
        train_dataset = self.CryDataset(X_train, y_train)
        val_dataset = self.CryDataset(X_val, y_val)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)

        self.build_model(input_size, num_classes)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=0.001)

        for epoch in range(epochs):
            self.model.train()
            train_loss = 0
            for features, labels in train_loader:
                optimizer.zero_grad()
                outputs = self.model(features)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()

            self.model.eval()
            val_loss = 0
            correct = 0
            total = 0
            with torch.no_grad():
                for features, labels in val_loader:
                    outputs = self.model(features)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
                    _, predicted = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {correct/total:.4f}")

    def save_model(self, model_path):
        torch.save(self.model.state_dict(), model_path)
        print(f"Model saved to {model_path}")

        # Save the scaler
        joblib.dump(self.scaler, "scaler.pkl")
        print("Scaler saved to scaler.pkl")

if __name__ == "__main__":
    # Training block for Google Colab
    data_path = "./donateacry-corpus/donateacry_corpus_cleaned_and_updated_data/"  # Update with the correct path

    trainer = InfantCryTraining()
    features, labels = trainer.load_data(data_path)

    input_size = features.shape[1]  # Number of features
    num_classes = len(set(labels))  # Number of unique labels

    trainer.train(features, labels, input_size, num_classes, epochs=30, batch_size=32)
    trainer.save_model("infant_cry_model.pth")


Epoch 1/30, Train Loss: 1.1350, Val Loss: 0.8313, Val Accuracy: 0.8043
Epoch 2/30, Train Loss: 0.6590, Val Loss: 0.9070, Val Accuracy: 0.8043
Epoch 3/30, Train Loss: 0.6173, Val Loss: 0.8705, Val Accuracy: 0.8043
Epoch 4/30, Train Loss: 0.5471, Val Loss: 0.8485, Val Accuracy: 0.8043
Epoch 5/30, Train Loss: 0.5231, Val Loss: 0.8647, Val Accuracy: 0.8043
Epoch 6/30, Train Loss: 0.4806, Val Loss: 0.8840, Val Accuracy: 0.8043
Epoch 7/30, Train Loss: 0.4652, Val Loss: 0.9443, Val Accuracy: 0.8043
Epoch 8/30, Train Loss: 0.4065, Val Loss: 0.9735, Val Accuracy: 0.8043
Epoch 9/30, Train Loss: 0.3692, Val Loss: 0.9943, Val Accuracy: 0.8043
Epoch 10/30, Train Loss: 0.3523, Val Loss: 1.0518, Val Accuracy: 0.7935
Epoch 11/30, Train Loss: 0.3066, Val Loss: 1.1286, Val Accuracy: 0.8043
Epoch 12/30, Train Loss: 0.2845, Val Loss: 1.1901, Val Accuracy: 0.7935
Epoch 13/30, Train Loss: 0.2791, Val Loss: 1.2402, Val Accuracy: 0.7935
Epoch 14/30, Train Loss: 0.2507, Val Loss: 1.2503, Val Accuracy: 0.7935
E

In [40]:
class InfantCryModelLoader:
    """Class responsible for loading the trained model locally."""

    def __init__(self):
        self.model = None
        self.scaler = None
        self.label_encoder = None

    def build_model(self, input_size, num_classes):
        class CryModel(nn.Module):
            def __init__(self, input_size, num_classes):
                super(CryModel, self).__init__()
                self.fc1 = nn.Linear(input_size, 256)
                self.dropout1 = nn.Dropout(0.3)
                self.fc2 = nn.Linear(256, 128)
                self.dropout2 = nn.Dropout(0.3)
                self.fc3 = nn.Linear(128, num_classes)

            def forward(self, x):
                x = torch.relu(self.fc1(x))
                x = self.dropout1(x)
                x = torch.relu(self.fc2(x))
                x = self.dropout2(x)
                x = self.fc3(x)
                return x

        self.model = CryModel(input_size, num_classes)

    def load_model(self, model_path, input_size, num_classes, scaler_path, label_encoder):
        self.build_model(input_size, num_classes)
        self.model.load_state_dict(torch.load(model_path))
        self.model.eval()
        self.scaler = joblib.load(scaler_path)  # Load the saved scaler
        self.label_encoder = label_encoder
        print(f"Model and Scaler loaded from {model_path} and {scaler_path}")


In [43]:
import coremltools as ct
import torch

# Initialize and load the PyTorch model
model_loader = InfantCryModelLoader()
model_loader.load_model(
    model_path="infant_cry_model.pth",
    input_size=180,  # Changed to 180 to match the input size used during training
    num_classes=5,
    scaler_path="scaler.pkl",  # Scaler isn't needed for CoreML conversion
    label_encoder=None
)
model = model_loader.model

# Convert the PyTorch model to CoreML
example_input = torch.rand(1, 180)  # Example input tensor with the correct shape
traced_model = torch.jit.trace(model, example_input)
coreml_model = ct.convert(
    traced_model,
    inputs=[ct.TensorType(name="input", shape=example_input.shape)],
    convert_to="neuralnetwork"  # Specify NeuralNetwork format for `.mlmodel`
)


# Save the CoreML model
coreml_model.save("InfantCryClassifier.mlmodel")

  self.model.load_state_dict(torch.load(model_path))


Model and Scaler loaded from infant_cry_model.pth and scaler.pkl


Converting PyTorch Frontend ==> MIL Ops:  91%|█████████ | 10/11 [00:00<00:00, 101.90 ops/s]
Running MIL frontend_pytorch pipeline: 100%|██████████| 5/5 [00:00<00:00, 4057.17 passes/s]
Running MIL default pipeline: 100%|██████████| 87/87 [00:00<00:00, 2756.43 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|██████████| 9/9 [00:00<00:00, 4968.90 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 11/11 [00:00<00:00, 579.92 ops/s]
