# Smart Care - Baby Cry Analysis System

This system uses AI to analyze infant crying sounds to identify needs such as hunger, pain, discomfort, or sleep.

In [None]:
import numpy as np
import librosa
import os
import sounddevice as sd
from tkinter import filedialog, messagebox, ttk
import tkinter as tk
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Input, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.utils import to_categorical
from collections import Counter
from datetime import datetime
from scipy.io.wavfile import write
import noisereduce as nr
import threading
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# ================================
# Main directory for audio data
# ================================
data_dir = "donateacry_corpus"

In [None]:
# ================================
# Helper Functions
# ================================
def normalize_audio(audio):
    """Normalize audio signal to have consistent peak levels."""
    peak = np.abs(audio).max()
    if peak > 0:
        audio = audio / peak
    return audio

def clean_audio(audio, sample_rate=16000):
    reduced_noise = nr.reduce_noise(y=audio, sr=sample_rate)
    normalized_audio = normalize_audio(reduced_noise)
    return librosa.util.normalize(normalized_audio)

def extract_features(file_path):
    try:
        audio, sample_rate = librosa.load(file_path, sr=None)
        if len(audio) < 22050:  # Less than one second
            raise ValueError("Audio file is too short.")

        audio = clean_audio(audio, sample_rate)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=20)
        mfccs = librosa.util.fix_length(mfccs, size=100, axis=1)
        delta = librosa.feature.delta(mfccs)
        delta_delta = librosa.feature.delta(mfccs, order=2)
        combined_features = np.vstack((mfccs, delta, delta_delta))
        return np.mean(combined_features, axis=1)
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None

def extract_features_from_audio_data(audio_data, sample_rate=16000):
    """Extract MFCC features from real-time audio data."""
    try:
        if len(audio_data) < 22050:  # Less than one second
            audio_data = np.pad(audio_data, (0, 22050 - len(audio_data)), 'constant')

        audio = clean_audio(audio_data, sample_rate)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=20)
        mfccs = librosa.util.fix_length(mfccs, size=100, axis=1)
        delta = librosa.feature.delta(mfccs)
        delta_delta = librosa.feature.delta(mfccs, order=2)
        combined_features = np.vstack((mfccs, delta, delta_delta))
        return np.mean(combined_features, axis=1)
    except Exception as e:
        print(f"Error extracting features from audio data: {e}")
        return None

In [None]:
# ================================
# Load and process data
# ================================
features = []
labels = []

for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.endswith((".wav", ".mp3")):
            file_path = os.path.join(root, file)
            label = os.path.basename(root)
            feature = extract_features(file_path)
            if feature is not None:
                features.append(feature)
                labels.append(label)

X = np.array(features)
y = np.array(labels)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("Before SMOTE:", Counter(y_encoded))

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y_encoded)

print("After SMOTE:", Counter(y_resampled))

y_resampled_categorical = to_categorical(y_resampled)

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled_categorical, test_size=0.2, random_state=42)

In [None]:
# ================================
# Build and train the model
# ================================
input_layer = Input(shape=(X_train.shape[1], 1))
x = Conv1D(32, 3, activation='relu')(input_layer)
x = MaxPooling1D(2)(x)
x = Conv1D(32, 3, activation='relu')(x)
x = MaxPooling1D(2)(x)
x = Conv1D(32, 3, activation='relu')(x)
x = MaxPooling1D(2)(x)
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.4)(x)
output_layer = Dense(y_train.shape[1], activation='softmax')(x)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='nadam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

# Save the model
model.save("baby_cry_model_cnn.keras")
print("Model saved as 'baby_cry_model_cnn.keras'.")

In [None]:
# ================================
# Show training and validation accuracy/loss plots
# ================================
plt.figure(figsize=(12, 6))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', linestyle='--')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss', linestyle='--')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# ================================
# Show Confusion Matrix
# ================================
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

cm = confusion_matrix(y_true_classes, y_pred_classes)
class_labels = label_encoder.classes_

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()

print("Classification Report:")
print(classification_report(y_true_classes, y_pred_classes, target_names=class_labels))

In [None]:
# ================================
# Show SMOTE Effect
# ================================
original_class_counts = Counter(y_encoded)
resampled_class_counts = Counter(y_resampled)

plt.figure(figsize=(12, 6))
bar_width = 0.35
index = np.arange(len(original_class_counts))

plt.bar(index, [original_class_counts[cls] for cls in original_class_counts], bar_width, label='Original', color='blue')
plt.bar(index + bar_width, [resampled_class_counts[cls] for cls in resampled_class_counts], bar_width, label='After SMOTE', color='green')

plt.xlabel('Classes')
plt.ylabel('Number of Samples')
plt.title('Class Distribution Before and After SMOTE')
plt.legend()
plt.xticks(index + bar_width / 2, [f'Class {cls}' for cls in original_class_counts])
plt.show()

In [None]:
# ================================
# Real-Time Prediction Logic & GUI Functions
# ================================
stop_real_time = False
last_predictions = []

def update_predictions_list():
    last_predictions_listbox.delete(0, tk.END)
    for prediction in last_predictions[-5:]:
        entry = f"{prediction['Name']} ({prediction['Age']}, {prediction['Gender']}): {prediction['Prediction']} at {prediction['Time']}"
        last_predictions_listbox.insert(tk.END, entry)

def predict_audio_file():
    child_name = name_entry.get().strip()
    child_age = age_combobox.get().strip()
    child_gender = gender_combobox.get().strip()

    if not child_name or not child_age or not child_gender:
        messagebox.showerror("Missing Information", "Please fill out all fields (Name, Age, and Gender).")
        return

    file_path = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav;*.mp3")])
    if file_path:
        feature = extract_features(file_path)
        if feature is not None:
            try:
                feature_scaled = scaler.transform([feature])[..., np.newaxis]
                predicted_probs = model.predict(feature_scaled)
                predicted_label = np.argmax(predicted_probs)
                predicted_label_name = label_encoder.inverse_transform([predicted_label])[0]

                prediction_info = {
                    "Name": child_name,
                    "Age": child_age,
                    "Gender": child_gender,
                    "Prediction": predicted_label_name,
                    "Time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                }
                last_predictions.append(prediction_info)
                update_predictions_list()
                messagebox.showinfo("Prediction Result", f"Prediction: {predicted_label_name}")
            except Exception as e:
                messagebox.showerror("Error", f"An error occurred during prediction: {e}")

def record_audio_with_progress(duration, sample_rate):
    progress_bar["value"] = 0
    progress_bar["maximum"] = duration * 10
    audio_data = []

    for _ in range(int(duration * 10)):
        if stop_real_time: 
            break
        frame = sd.rec(int(sample_rate / 10), samplerate=sample_rate, channels=1, dtype="float32")
        sd.wait()
        audio_data.extend(frame.flatten())
        progress_bar["value"] += 1
        root.update_idletasks()

    return np.array(audio_data)

def is_significant_sound(audio_data, threshold=0.02):
    energy = np.sum(np.square(audio_data)) / len(audio_data)
    return energy > threshold

def real_time_prediction():
    global stop_real_time
    duration = 10 
    sample_rate = 16000
    stop_real_time = False 

    def record_and_predict():
        while not stop_real_time:
            audio_data = record_audio_with_progress(duration, sample_rate)
            if not is_significant_sound(audio_data):
                messagebox.showwarning("No Sound", "No significant sound detected. Please try again.")
                continue

            if len(audio_data) < sample_rate * 4:
                messagebox.showwarning("Insufficient Data", "Audio data is insufficient for prediction. Please try again.")
                continue

            try:
                clean_data = clean_audio(audio_data)
                feature = extract_features_from_audio_data(clean_data, sample_rate)
                if feature is not None:
                    feature_scaled = scaler.transform([feature])[..., np.newaxis]
                    feature_reshaped = feature_scaled.reshape((feature_scaled.shape[0], feature_scaled.shape[1], 1))

                    predicted_probs = model.predict(feature_reshaped)
                    predicted_label = np.argmax(predicted_probs)
                    confidence = np.max(predicted_probs) 
                    predicted_label_name = label_encoder.inverse_transform([predicted_label])[0]

                    if confidence < 0.5: 
                        messagebox.showwarning("Low Confidence", "Prediction confidence is too low. Please try again.")
                        continue

                    prediction_info = {
                        "Name": "Real-Time",
                        "Age": "N/A",
                        "Gender": "N/A",
                        "Prediction": predicted_label_name,
                        "Time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                    }
                    last_predictions.append(prediction_info)
                    update_predictions_list()
                else:
                    messagebox.showwarning("Feature Extraction Error", "Unable to extract valid features. Please try again.")
            except Exception as e:
                messagebox.showerror("Prediction Error", f"An error occurred during prediction: {e}")

    prediction_thread = threading.Thread(target=record_and_predict)
    prediction_thread.start()

def stop_real_time_prediction():
    global stop_real_time
    if progress_bar["value"] < 40:
        messagebox.showwarning("Warning", "No audio detected. Recording stopped before sufficient data was captured.")
    stop_real_time = True

In [None]:
# ================================
# Tkinter GUI Main Loop
# ================================
root = tk.Tk()
root.title("Baby Cry Analysis System")
root.geometry("500x600")
root.config(bg="lightblue")

input_frame = tk.Frame(root, bg="lightblue")
input_frame.pack(pady=20)

name_label = tk.Label(input_frame, text="Child's Name:", bg="lightblue", font=("Arial", 12))
name_label.grid(row=0, column=0, padx=5, pady=5)
name_entry = ttk.Entry(input_frame) 
name_entry.grid(row=0, column=1, padx=5, pady=5)

age_label = tk.Label(input_frame, text="Child's Age:", bg="lightblue", font=("Arial", 12))
age_label.grid(row=1, column=0, padx=10, pady=5)
age_combobox = ttk.Combobox(input_frame, values=["0-3 months", "3-6 months", "6-12 months", "1-2 years"])
age_combobox.grid(row=1, column=1, padx=5, pady=5)

gender_label = tk.Label(input_frame, text="Child's Gender:", bg="lightblue", font=("Arial", 12))
gender_label.grid(row=2, column=0, padx=5, pady=5)
gender_combobox = ttk.Combobox(input_frame, values=["Male", "Female"])
gender_combobox.grid(row=2, column=1, padx=5, pady=5)

button_frame = tk.Frame(root, bg="lightblue")
button_frame.pack(pady=20)

predict_button = tk.Button(button_frame, text="Predict Audio File", command=predict_audio_file, bg="#4CAF50", fg="white", font=("Arial", 12))
predict_button.grid(row=0, column=0, padx=10, pady=10)

record_button = tk.Button(button_frame, text="Start Real-Time Prediction", command=real_time_prediction, bg="#4CAF50", fg="white", font=("Arial", 12))
record_button.grid(row=0, column=1, padx=10, pady=10)

stop_button = tk.Button(button_frame, text="Stop Real-Time Prediction", command=stop_real_time_prediction, bg="#f44336", fg="white", font=("Arial", 12))
stop_button.grid(row=1, column=0, columnspan=2, pady=10)

last_predictions_label = tk.Label(root, text="Last 5 Predictions:", bg="lightblue", font=("Arial", 12))
last_predictions_label.pack(pady=10)

last_predictions_listbox = tk.Listbox(root, height=5, width=60)
last_predictions_listbox.pack(pady=5)

progress_bar = ttk.Progressbar(root, length=300, mode="determinate")
progress_bar.pack(pady=10)

root.mainloop()