<a href="https://colab.research.google.com/github/Austin523/AI/blob/main/AImodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [262]:
from google.colab import files
uploaded = files.upload()  # Upload your dataset.zip

Saving AI sound.zip to AI sound (2).zip


In [263]:
#unzip the file
import zipfile

with zipfile.ZipFile("AI sound.zip", 'r') as zip_ref:
  zip_ref.extractall("datasetAI")

In [264]:
!pip install imbalanced-learn --quiet

In [265]:
!pip install tensorflow-addons



In [266]:
!pip install librosa matplotlib



In [267]:
def augment_audio(y, sr):
    # Apply random augmentations
    if np.random.rand() < 0.3:
        y = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=np.random.uniform(-2, 2))
    if np.random.rand() < 0.3:
        y = librosa.effects.time_stretch(y, rate=np.random.uniform(0.8, 1.2))
    if np.random.rand() < 0.3:
        noise = np.random.normal(0, 0.01, size=y.shape)
        y = y + noise
    return y

In [302]:
import tensorflow as tf
from tensorflow.keras import backend as K

def focal_loss(gamma=2.0, alpha_val=None):
    alpha_tensor = tf.convert_to_tensor(alpha_val, dtype=tf.float32) if alpha_val is not None else None

    def loss(y_true, y_pred):
        y_pred = tf.clip_by_value(y_pred, tf.keras.backend.epsilon(), 1. - tf.keras.backend.epsilon())
        cross_entropy = -y_true * tf.math.log(y_pred)

        if alpha_tensor is not None:
            alpha_reshaped = tf.reshape(alpha_tensor, (1, -1))  # Shape (1, num_classes)
            alpha_factor = tf.reduce_sum(alpha_reshaped * y_true, axis=-1, keepdims=True)
        else:
            alpha_factor = 1.0

        focal_factor = tf.pow(1.0 - y_pred, gamma)
        loss = alpha_factor * focal_factor * cross_entropy

        return tf.reduce_mean(tf.reduce_sum(loss, axis=-1))  # Final scalar loss

    return loss

In [269]:
import numpy as np
from tensorflow.keras import Input
import librosa
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
import os
from sklearn.model_selection import train_test_split

def wav_to_mel_spectrogram(wav_path, n_mels=128, fmax=8000):
    y, sr = librosa.load(wav_path, sr=None)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
    S_dB = librosa.power_to_db(S, ref=np.max)
    return S_dB

def normalize_spectrogram(S_dB):
    S_norm = (S_dB - S_dB.min()) / (S_dB.max() - S_dB.min())
    return S_norm

def resize_spectrogram(S_norm, target_shape=(128, 128)):
    # Resize spectrogram to fixed size (width x height)
    resized = cv2.resize(S_norm, target_shape, interpolation=cv2.INTER_AREA)
    return resized

def add_channel_dim(S_resized):
    return S_resized[..., np.newaxis]  # shape (H, W, 1)

def to_3_channels(S_resized):
    return np.repeat(S_resized[..., np.newaxis], 3, axis=-1)  # shape (H, W, 3)



def preprocess_wav_for_cnn(wav_path, target_shape=(128, 128), channels=1):
    S_dB = wav_to_mel_spectrogram(wav_path)
    S_norm = normalize_spectrogram(S_dB)
    S_resized = resize_spectrogram(S_norm, target_shape)
    if channels == 3:
        S_input = to_3_channels(S_resized)
    else:
        S_input = add_channel_dim(S_resized)
    return S_input


input_shape = (128, 128, 1)
num_classes = 4

model = Sequential([
    Input(shape=input_shape),  # <-- add this line
    Conv2D(32, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),

    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),

    Conv2D(128, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.4),

    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),

    Dense(num_classes, activation='softmax')
])

In [270]:
data_dir = 'datasetAI/AI sound'
labels = ['EM Interference', 'Leak', 'No Leak', 'Noise Pollution']

file_paths = []
file_labels = []

for idx, label in enumerate(labels):
    folder = os.path.join(data_dir, label)
    if not os.path.exists(folder):
        print(f"Folder not found: {folder}")
        continue
    for filename in os.listdir(folder):
        if filename.endswith('.wav'):
            file_paths.append(os.path.join(folder, filename))
            file_labels.append(idx)

file_paths = np.array(file_paths)
file_labels = np.array(file_labels)


In [271]:
from sklearn.model_selection import train_test_split
import numpy as np
import os

# Build file list and label list
data_dir = 'datasetAI/AI sound'
labels = ['EM Interference', 'Leak', 'No Leak', 'Noise Pollution']

file_paths = []
file_labels = []

for idx, label in enumerate(labels):
    folder = os.path.join(data_dir, label)
    for fname in os.listdir(folder):
        if fname.endswith(".wav"):
            file_paths.append(os.path.join(folder, fname))
            file_labels.append(idx)

file_paths = np.array(file_paths)
file_labels = np.array(file_labels)

# Now split by file
train_paths, test_paths, train_labels, test_labels = train_test_split(
    file_paths, file_labels, test_size=0.2, stratify=file_labels, random_state=42
)


print("X_train for model:", X_train_3ch.shape)
print("X_test for model:", X_test_3ch.shape)

X_train for model: (243, 128, 216, 3)
X_test for model: (61, 128, 216, 3)


In [272]:
def extract_mel_spectrogram(file_path, sr=22050, duration=5, n_mels=128, fixed_length=216, augment=False):
    y, _ = librosa.load(file_path, sr=sr, duration=duration)

    if augment:
        y = augment_audio(y, sr)

    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    mel_db = librosa.power_to_db(mel, ref=np.max)

    if mel_db.shape[1] < fixed_length:
        pad_width = fixed_length - mel_db.shape[1]
        mel_db = np.pad(mel_db, ((0, 0), (0, pad_width)), mode='constant')
    else:
        mel_db = mel_db[:, :fixed_length]

    mel_db_norm = (mel_db - mel_db.min()) / (mel_db.max() - mel_db.min())

    mel_3ch = np.stack([mel_db_norm] * 3, axis=-1)  # ✅ Shape: (128, 216, 3)
    return mel_3ch


In [273]:
# Load mel spectrograms with augmentation for training set
X_train = np.array([extract_mel_spectrogram(p, augment=True) for p in train_paths])
X_test = np.array([extract_mel_spectrogram(p, augment=False) for p in test_paths])
print(X_train[0].shape)

# One-hot encode labels
from tensorflow.keras.utils import to_categorical
y_train = np.array(train_labels)
y_test = np.array(test_labels)
y_train_cat = to_categorical(y_train, num_classes=4)
y_test_cat = to_categorical(y_test, num_classes=4)

print("✅ Shapes:")
print("X_train:", X_train.shape)
print("y_train_cat:", y_train_cat.shape)
print("X_test:", X_test.shape)
print("y_test_cat:", y_test_cat.shape)

(128, 216, 3)
✅ Shapes:
X_train: (142, 128, 216, 3)
y_train_cat: (142, 4)
X_test: (36, 128, 216, 3)
y_test_cat: (36, 4)


In [274]:

# Convert grayscale to RGB
X_train_3ch = np.repeat(X_train, 3, axis=-1)
X_test_3ch = np.repeat(X_test, 3, axis=-1)

# One-hot encode
from tensorflow.keras.utils import to_categorical
y_train_cat = to_categorical(y_train, num_classes=4)
y_test_cat = to_categorical(y_test, num_classes=4)

In [275]:
from tensorflow.keras.utils import to_categorical

num_classes = 4  # replace with your actual number of classes

y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

print("y_train shape after categorical:", y_train_cat.shape)

y_train shape after categorical: (142, 4)


In [276]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)

In [277]:
train_gen = datagen.flow(X_train_3ch, y_train_cat, batch_size=32)

In [305]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from tensorflow.keras import backend as K


# Input shape with 3 channels
input_shape = (128, 216, 3)

# Number of classes (make sure this is defined)
num_classes = 4  # change this as per your dataset

# Load base model without top layers, with imagenet weights
base_model = MobileNetV2(include_top=False, weights='imagenet', input_shape=input_shape)

# Freeze base model layers to start
base_model.trainable = False

# Add custom classification head
inputs = Input(shape=input_shape)
x = base_model(inputs, training=False)  # keep BN layers in inference mode for frozen base

x = GlobalAveragePooling2D()(x)

# Add BatchNorm and Dropout here
x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)  # 50% dropout, you can tune this

outputs = Dense(num_classes, activation='softmax')(x)

model = Model(inputs, outputs)

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss=focal_loss(gamma=2.0, alpha_val=[0.2, 0.9, 0.9, 0.2]),  # class-specific weights
    metrics=['accuracy']
)


model.summary()

  base_model = MobileNetV2(include_top=False, weights='imagenet', input_shape=input_shape)


In [306]:

# Ensure input shape matches expected format for MobileNetV2
X_resampled_flat, y_resampled_int = smote.fit_resample(X_flat, y_int)
X_resampled = X_resampled_flat.reshape((-1, 128, 216, 1))  # for (128, 216)

# Train-test split
X_train, X_test, y_train_int, y_test_int = train_test_split(
    X_resampled, y_resampled_int, test_size=0.2, stratify=y_resampled_int, random_state=42
)

# Convert grayscale to RGB (3 channels)
X_train_3ch = np.repeat(X_train, 3, axis=-1)  # shape: (samples, 128, 216, 3)
X_test_3ch = np.repeat(X_test, 3, axis=-1)

# One-hot encode labels
from tensorflow.keras.utils import to_categorical
y_train_cat = to_categorical(y_train_int, num_classes=4)
y_test_cat = to_categorical(y_test_int, num_classes=4)

# Verify matching sizes
print("✅ Training set shape:", X_train_3ch.shape, y_train_cat.shape)
print("✅ Test set shape:", X_test_3ch.shape, y_test_cat.shape)

history = model.fit(
    train_gen,
    validation_data=(X_test_3ch, y_test_cat),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop, checkpoint]
)


✅ Training set shape: (243, 128, 216, 3) (243, 4)
✅ Test set shape: (61, 128, 216, 3) (61, 4)
Epoch 1/50


InvalidArgumentError: Graph execution error:

Detected at node compile_loss/loss/mul_1 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start

  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 499, in process_one

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 730, in execute_request

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/ipkernel.py", line 383, in do_execute

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/zmqshell.py", line 528, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-306-c0303830c23a>", line 23, in <cell line: 0>

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 113, in one_step_on_data

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 60, in train_step

  File "/usr/local/lib/python3.11/dist-packages/keras/src/trainers/trainer.py", line 383, in _compute_loss

  File "/usr/local/lib/python3.11/dist-packages/keras/src/trainers/trainer.py", line 351, in compute_loss

  File "/usr/local/lib/python3.11/dist-packages/keras/src/trainers/compile_utils.py", line 691, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/trainers/compile_utils.py", line 700, in call

  File "/usr/local/lib/python3.11/dist-packages/keras/src/losses/loss.py", line 67, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/losses/losses.py", line 33, in call

  File "<ipython-input-302-60f2b31d20d0>", line 13, in loss

Incompatible shapes: [32] vs. [1,4]
	 [[{{node compile_loss/loss/mul_1}}]] [Op:__inference_multi_step_on_iterator_307523]

In [None]:
# Predict
y_pred = model.predict(X_test_3ch)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test_cat, axis=1)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# Get true labels and predicted labels
y_pred = model.predict(X_test_3ch)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test_cat, axis=1)

# Confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Detailed report
print(classification_report(y_true, y_pred_classes, target_names=labels))


In [None]:
preds = model.predict(X_test_3ch)
print("Predicted:", np.argmax(preds, axis=1))
print("True:", np.argmax(y_test_cat, axis=1))
