<a href="https://colab.research.google.com/github/Meriam-Victor/Recognize_the_number_from_0_to_9_using_CNN-/blob/main/Recognize_the_number_from_0_to_9_using_CNN_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install --upgrade tensorflow


In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import librosa
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential


In [None]:
dataset_path = '/kaggle/input/0-9-dataset/0-9_dataset/0-9_dataset'
folders = os.listdir(dataset_path)


In [None]:
print(folders)

['two', 'four', 'five', 'nine', 'six', 'zero', 'eight', 'seven', 'three', 'one']


In [None]:

def load_wav_files_and_extract_mfccs(folder_path):
    mfccs_data = []
    labels = []
    try:
        for file in os.listdir(folder_path):
            if file.endswith('.wav'):
                audio_file_path = os.path.join(folder_path, file)
                audio_data, audio_sample_rate = librosa.load(audio_file_path)

                mfcc_fea = np.mean(librosa.feature.mfcc(y=audio_data, sr=audio_sample_rate, n_mfcc=40).T, axis=0)
                mfccs_data.append(mfcc_fea)
                labels.append(os.path.basename(folder_path))

    except FileNotFoundError:
        print(f"Folder not found: {folder_path}")
    except Exception as e:
        print(f"Error loading files from {folder_path}: {e}")
    return mfccs_data, labels


In [None]:

# Create empty lists to store MFCC features and labels
mfccs_data = []
labels = []

# Load all files and labels
for folder in folders:
    folder_path = os.path.join(dataset_path, folder)
    folder_mfccs, folder_labels = load_wav_files_and_extract_mfccs(folder_path)
    mfccs_data.extend(folder_mfccs)
    labels.extend(folder_labels)

In [None]:
print(len(mfccs_data))

23666


In [None]:
print(len(labels))

23666


In [None]:
# Convert lists to numpy arrays for easier manipulation
mfccs_data = np.array(mfccs_data)
labels = np.array(labels)

print(mfccs_data.shape)
print(labels.shape)

(23666, 40)
(23666,)


In [None]:
from sklearn.model_selection import train_test_split

# First, split the dataset into training and temporary sets
train_data, temp_data, train_labels, temp_labels = train_test_split(mfccs_data, labels, test_size=0.4, random_state=42)

# Then, split the temporary set into validation and testing sets
val_data, test_data, val_labels, test_labels = train_test_split(temp_data, temp_labels, test_size=0.5, random_state=42)

# Now you have your training, validation, and testing sets
print("Training data shape:", len(train_data))
print("Validation data shape:", len(val_data))
print("Testing data shape:", len(test_data))
print("Training labels shape:", len(train_labels))
print("Validation labels shape:", len(val_labels))
print("Testing labels shape:", len(test_labels))

Training data shape: 14199
Validation data shape: 4733
Testing data shape: 4734
Training labels shape: 14199
Validation labels shape: 4733
Testing labels shape: 4734


In [None]:

# def preprocess_audio(audio, target_length=16000):
#     try:
#         # Pad or truncate the audio to the target length
#         if len(audio) < target_length:
#             padding_before = (target_length - len(audio)) // 2
#             padding_after = target_length - len(audio) - padding_before
#             audio = np.pad(audio, (padding_before, padding_after), 'constant', constant_values=(0, 0))
#         else:
#             audio = audio[:target_length]
#         return audio
#     except Exception as e:
#         print(f"Error processing audio: {e}")

def preprocess_labels(labels):
    label_mapping = {'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9}

    # Convert string labels to numerical values
    labels = [label_mapping[label] for label in labels]

    # Ensure labels is a 1D tensor
    labels_tensor = tf.constant(labels, dtype=tf.int32)

    # One-hot encode the labels
    one_hot_labels = tf.one_hot(labels_tensor, depth=10)

    return one_hot_labels


In [None]:
# train_audio = [preprocess_audio(file) for file in train_data]
# val_audio = [preprocess_audio(file) for file in val_data]
# test_audio = [preprocess_audio(file) for file in test_data]


In [None]:
# print(f"Number of preprocessed audio files: {len(train_audio)}")
# print(f"Number of preprocessed audio files: {len(val_audio)}")
# print(f"Number of preprocessed audio files: {len(test_audio)}")


In [None]:

train_num_labels = [preprocess_labels(train_labels)]
val_num_labels = [preprocess_labels(val_labels)]
test_num_labels = [preprocess_labels(test_labels)]


In [None]:
print(f"Number of preprocessed labels: {len(train_labels)}")
print(f"Number of preprocessed labels: {len(val_labels)}")
print(f"Number of preprocessed labels: {len(test_labels)}")


Number of preprocessed labels: 14199
Number of preprocessed labels: 4733
Number of preprocessed labels: 4734


In [None]:
print("Train data shape:", np.array(train_data).shape)
print("Validation data shape:", np.array(val_data).shape)
print("Test data shape:", np.array(test_data).shape)


Train data shape: (14199, 40)
Validation data shape: (4733, 40)
Test data shape: (4734, 40)


In [None]:
print("Train labels shape:", np.array(train_num_labels).shape)
print("Validation labels shape:", np.array(val_num_labels).shape)
print("Test labels shape:", np.array(test_num_labels).shape)


Train labels shape: (1, 14199, 10)
Validation labels shape: (1, 4733, 10)
Test labels shape: (1, 4734, 10)


In [None]:
# Remove the extra dimension from the labels
train_num_labels = np.squeeze(train_num_labels, axis=0)
val_num_labels = np.squeeze(val_num_labels, axis=0)
test_num_labels = np.squeeze(test_num_labels, axis=0)

In [None]:
print("Train labels shape:", np.array(train_num_labels).shape)
print("Validation labels shape:", np.array(val_num_labels).shape)
print("Test labels shape:", np.array(test_num_labels).shape)


Train labels shape: (14199, 10)
Validation labels shape: (4733, 10)
Test labels shape: (4734, 10)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Flatten, Conv1D, MaxPooling1D
from tensorflow.keras.regularizers import l2

def create_model():
    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(40, 1), kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Conv1D(filters=128, kernel_size=3, activation='relu', kernel_regularizer=l2(0.001)),
        MaxPooling1D(pool_size=2),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        Conv1D(filters=128, kernel_size=3, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(10, activation='softmax', kernel_regularizer=l2(0.001))
    ])

    model.compile(optimizer='adam',
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])

    return model

# Create the model
model = create_model()

# Print the model summary
model.summary()


In [None]:
batch_size = 16
callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto',
    baseline=None, restore_best_weights=False)

history = model.fit(train_data,train_num_labels ,validation_data=(val_data, val_num_labels),
            epochs=100,
            callbacks = [callback],batch_size=batch_size)

Epoch 1/100
[1m888/888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 12ms/step - accuracy: 0.2538 - loss: 2.2563 - val_accuracy: 0.4179 - val_loss: 1.8455
Epoch 2/100
[1m888/888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.3855 - loss: 1.9099 - val_accuracy: 0.4323 - val_loss: 1.7661
Epoch 3/100
[1m888/888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.4182 - loss: 1.8358 - val_accuracy: 0.4547 - val_loss: 1.7201
Epoch 4/100
[1m888/888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.4203 - loss: 1.8146 - val_accuracy: 0.4767 - val_loss: 1.6626
Epoch 5/100
[1m888/888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.4371 - loss: 1.7764 - val_accuracy: 0.4813 - val_loss: 1.6511
Epoch 6/100
[1m888/888[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.4378 - loss: 1.7685 - val_accuracy: 0.4752 - val_loss: 1.6498
Epoch 7/100
[1m888/

In [None]:

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_data, test_num_labels)
print("Test accuracy:", test_acc)


[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5444 - loss: 1.5668
Test accuracy: 0.5536544322967529


In [None]:
# Plot training & validation accuracy values
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.show()