# Table of Contents

1. Training the Model
   * Version 1
   * Version 2
   * Version 3 (With augmentation)
2. Testing

# Training the model

**Version 1**

**Now training the model with denoised dataset,grayscale,masking and 10 epoch cycles**

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Step 1: Load and preprocess the dataset
dataset_path = "/kaggle/working/sorted_images/folder1" 

# Load dataset (training and validation split)
train_dataset = image_dataset_from_directory(
    dataset_path,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(224, 224),  # ResNet input size
    batch_size=32
)

val_dataset = image_dataset_from_directory(
    dataset_path,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(224, 224),
    batch_size=32
)

# Access class names from the original dataset object
class_names = train_dataset.class_names

# Normalize pixel values (Min-Max Scaling)
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_dataset = train_dataset.map(lambda x, y: (normalization_layer(x), y))
val_dataset = val_dataset.map(lambda x, y: (normalization_layer(x), y))

# Step 2: Define the ResNet50 model
base_model = ResNet50(
    weights="/kaggle/input/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5", 
    include_top=False, 
    input_shape=(224, 224, 3)
)

x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling
x = Dense(128, activation="relu")(x)  # Add a dense layer
predictions = Dense(len(class_names), activation="softmax")(x)  # Output layer

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers (optional)
for layer in base_model.layers:
    layer.trainable = False

# Step 3: Compile the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Step 4: Train the model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10
)

# Step 5: Evaluate the model
loss, accuracy = model.evaluate(val_dataset)
print(f"Validation Accuracy: {accuracy:.2f}")
# Step 7: Save the final model
model.save("/kaggle/working/version1.h5")  # Use .keras extension

**Version 2**

**Again training the model but now with 20 epoch cycles**

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Step 1: Load and preprocess the dataset
dataset_path = "/kaggle/working/sorted_images/folder1"  

# Load dataset (training and validation split)
train_dataset = image_dataset_from_directory(
    dataset_path,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(224, 224),  # ResNet input size
    batch_size=32
)

val_dataset = image_dataset_from_directory(
    dataset_path,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(224, 224),
    batch_size=32
)

# Access class names from the original dataset object
class_names = train_dataset.class_names

# Normalize pixel values (Min-Max Scaling)
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_dataset = train_dataset.map(lambda x, y: (normalization_layer(x), y))
val_dataset = val_dataset.map(lambda x, y: (normalization_layer(x), y))

# Step 2: Define the ResNet50 model
base_model = ResNet50(
    weights="/kaggle/input/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5", 
    include_top=False, 
    input_shape=(224, 224, 3)
)

x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling
x = Dense(128, activation="relu")(x)  # Add a dense layer
predictions = Dense(len(class_names), activation="softmax")(x)  # Output layer

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers (optional)
for layer in base_model.layers:
    layer.trainable = False

# Step 3: Compile the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Step 4: Train the model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=20
)

# Step 5: Evaluate the model
loss, accuracy = model.evaluate(val_dataset)
print(f"Validation Accuracy: {accuracy:.2f}")
# Step 7: Save the final model
model.save("/kaggle/working/version2.h5")  # Use .keras extension

**Version 3**

**Again, training the model but now with augmented dataset.**

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.callbacks import CSVLogger  # Import CSVLogger

# Step 1: Load and preprocess the dataset
dataset_path = "/kaggle/working/augmented_spectrograms"

# Load dataset (training and validation split)
train_dataset = image_dataset_from_directory(
    dataset_path,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(224, 224), 
    batch_size=32
)

val_dataset = image_dataset_from_directory(
    dataset_path,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(224, 224),
    batch_size=32
)

# Access class names from the original dataset object
class_names = train_dataset.class_names

# Normalize pixel values (Min-Max Scaling)
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_dataset = train_dataset.map(lambda x, y: (normalization_layer(x), y))
val_dataset = val_dataset.map(lambda x, y: (normalization_layer(x), y))

# Step 2: Define the ResNet50 model
base_model = ResNet50(
    weights="/kaggle/input/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5", 
    include_top=False, 
    input_shape=(224, 224, 3)
)

x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling
x = Dense(128, activation="relu")(x)  # Add a dense layer
predictions = Dense(len(class_names), activation="softmax")(x)  # Output layer

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers (optional)
for layer in base_model.layers:
    layer.trainable = False

# Step 3: Compile the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Step 4: Set up CSVLogger callback
csv_logger = CSVLogger('/kaggle/working/training_log.csv', append=True)

# Step 5: Train the model with CSVLogger callback
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=20,
    callbacks=[csv_logger]  # Add the callback here
)

# Step 6: Evaluate the model
loss, accuracy = model.evaluate(val_dataset)
print(f"Validation Accuracy: {accuracy:.2f}")

# Step 7: Save the final model
model.save("/kaggle/working/version3.h5")  # Save the model

# Testing

In [None]:
import tensorflow as tf

# Loading the models
model1 = tf.keras.models.load_model("/kaggle/input/version1/keras/basic/1/version1.h5")
model2 = tf.keras.models.load_model("/kaggle/input/version1/keras/basic/1/version1.h5")
model3 = tf.keras.models.load_model("/kaggle/input/version3/keras/advanced/1/version3.h5")

# saving them in working directory
model1.save("/kaggle/working/converted_model1.h5")
model2.save("/kaggle/working/converted_model2.h5")
model3.save("/kaggle/working/converted_model3.h5")

In [None]:
import tensorflow as tf
import numpy as np
from PIL import Image 

# Loading the trained model
model_path1 = "/kaggle/working/converted_model1.h5"
model1 = tf.keras.models.load_model(model_path1)

model_path2 = "/kaggle/working/converted_model2.h5"
model2 = tf.keras.models.load_model(model_path2)

model_path3 = "/kaggle/working/converted_model3.h5"
model3 = tf.keras.models.load_model(model_path3)

**Via Images**

In [None]:
import numpy as np
import tensorflow as tf
from PIL import Image


class_labels = [
    "Class 0", "Class 1", "Class 2", "Class 3", "Class 4",
    "Class 5", "Class 6", "Class 7", "Class 8", "Class 9"
]


def preprocess_image(image_path):
    img = Image.open(image_path).convert("RGB")  # Convert grayscale to RGB
    img = img.resize((224, 224))  # Resize to 224x224
    img_array = np.array(img) / 255.0  # Normalize pixel values
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    return img_array.astype("float32")


image_path = "/kaggle/input/testing/101415-3-0-3_spectrogram.png"
processed_image = preprocess_image(image_path)

# Load trained models
model1 = tf.keras.models.load_model("/kaggle/working/converted_model1.h5")
model2 = tf.keras.models.load_model("/kaggle/working/converted_model2.h5")
model3 = tf.keras.models.load_model("/kaggle/working/converted_model3.h5")

# Function to get predicted class name
def get_class_name(predictions):
    predicted_class = np.argmax(predictions)  # Get index of max probability
    return class_labels[predicted_class]  # Get class name

# Predictions for each model
predictions1 = model1.predict(processed_image)
print(f"Predicted class for model 1: {get_class_name(predictions1)}")

predictions2 = model2.predict(processed_image)
print(f"Predicted class for model 2: {get_class_name(predictions2)}")

predictions3 = model3.predict(processed_image)
print(f"Predicted class for model 3: {get_class_name(predictions3)}")

**Via Sounds**

In [None]:
import os
import numpy as np
import librosa
import librosa.display
import noisereduce as nr
import matplotlib.pyplot as plt
from PIL import Image
import tensorflow as tf

# Define class labels
class_labels = [
    "Class 0", "Class 1", "Class 2", "Class 3", "Class 4",
    "Class 5", "Class 6", "Class 7", "Class 8", "Class 9"
]

# Function to apply noise masking
def apply_mask(spectrogram_db, threshold=-40):
    """ Masks low-intensity regions of the spectrogram. """
    return np.where(spectrogram_db < threshold, np.min(spectrogram_db), spectrogram_db)

# Function to generate Mel spectrogram
def generate_spectrogram(file_path):
    # Load audio file
    y, sr = librosa.load(file_path, sr=None)

    # Apply noise reduction
    y_denoised = nr.reduce_noise(y=y, sr=sr)

    # Generate Mel spectrogram
    spectrogram = librosa.feature.melspectrogram(y=y_denoised, sr=sr, n_fft=2048, hop_length=512)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

    # Apply noise masking
    spectrogram_db = apply_mask(spectrogram_db)

    # Plot the spectrogram
    fig, ax = plt.subplots(figsize=(10, 10))  # Square size for ResNet
    ax.set_axis_off()
    librosa.display.specshow(spectrogram_db, sr=sr, cmap="gray_r", fmax=8000)

    # Save temp spectrogram
    temp_path = "temp_spectrogram.png"
    plt.savefig(temp_path, bbox_inches="tight", pad_inches=0, dpi=100)
    plt.close(fig)

    # Resize for ResNet50 (224x224)
    img = Image.open(temp_path).convert("L")  # Convert to grayscale
    img = img.resize((224, 224), Image.Resampling.LANCZOS)  # Use LANCZOS resampling
    final_path = "final_spectrogram.png"
    img.save(final_path)

    return final_path  # Return final spectrogram path

# Function to preprocess the image for model input
def preprocess_image(image_path):
    img = Image.open(image_path).convert("RGB")  # Convert grayscale to RGB
    img = img.resize((224, 224))  # Resize to 224x224
    img_array = np.array(img) / 255.0  # Normalize pixel values
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    return img_array.astype("float32")

# Load trained models
model1 = tf.keras.models.load_model("/kaggle/working/converted_model1.h5")
model2 = tf.keras.models.load_model("/kaggle/working/converted_model2.h5")
model3 = tf.keras.models.load_model("/kaggle/working/converted_model3.h5")

# Function to get predicted class name
def get_class_name(predictions):
    predicted_class = np.argmax(predictions)  # Get index of max probability
    return class_labels[predicted_class]  # Get class name

# Path to the input audio file
audio_path = "/kaggle/input/testing/sample_audio.wav"

# Generate spectrogram
spectrogram_path = generate_spectrogram(audio_path)

# Preprocess image for model
processed_image = preprocess_image(spectrogram_path)

# Make predictions using all models
predictions1 = model1.predict(processed_image)
print(f"Predicted class for model 1: {get_class_name(predictions1)}")

predictions2 = model2.predict(processed_image)
print(f"Predicted class for model 2: {get_class_name(predictions2)}")

predictions3 = model3.predict(processed_image)
print(f"Predicted class for model 3: {get_class_name(predictions3)}")