## مسابقه بزرگ هوش مصنوعی

In [None]:
#Ali Badrloo
#Mahdi Alikhani
#Mohammadjavad Ghazikhani

In [None]:
import os
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.image import resize

In [2]:
# Define your folder structure
data_dir = 'trimmed_audio'
classes = ['ali_ghaderi', 'amin_taheri', 'faezeh_najafi', 'houman', 'kourosh',
            'mahdi', 'mahdi_joozdani', 'mani_hosseini', 'mehdi_gozali', 'mojtaba',
              'nazanin_hasani', 'negar', 'saba', 'sam', 'samyar_miri']

In [None]:
import cv2

# Load and preprocess audio data
def load_and_preprocess_data(data_dir, classes, target_shape=(128, 128)):
    data = []
    labels = []
    
    for i, class_name in enumerate(classes):
        class_dir = os.path.join(data_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith('.wav'):

                file_path = os.path.join(class_dir, filename)

                #Pre-Processing , trimming(scilence and noise)
                y, sr = librosa.load(file_path, sr=None)
                y_trimmed, _ = librosa.effects.trim(y, top_db=30)

                # Extract mel spectrogram
                S = librosa.feature.melspectrogram(y=y_trimmed, sr=sr, n_mels=128 * 5)
                S_db_mel = librosa.amplitude_to_db(S, ref=np.max)

                # Normalize the spectrogram
                S_db_mel_normalized = (S_db_mel - np.min(S_db_mel)) / (np.max(S_db_mel) - np.min(S_db_mel))

                # Resize the spectrogram to fixed size
                fixed_height = 128
                fixed_width = 640
                S_db_mel_resized = cv2.resize(S_db_mel_normalized, (fixed_width, fixed_height))

                mel_spectrogram = S_db_mel_resized
                mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
                
                data.append(mel_spectrogram)
                labels.append(i)
    
    return np.array(data), np.array(labels)

In [4]:
# Split data into training and testing sets
data, labels = load_and_preprocess_data(data_dir, classes)
labels = to_categorical(labels, num_classes=len(classes))  # Convert labels to one-hot encoding
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


In [5]:
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of Y_train:", y_train.shape)
print("Shape of Y_test:", y_test.shape)

Shape of X_train: (245, 128, 128, 1)
Shape of X_test: (62, 128, 128, 1)
Shape of Y_train: (245, 15)
Shape of Y_test: (62, 15)


In [6]:
print(y_train)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [7]:
# Create a neural network model
input_shape = X_train[0].shape
input_layer = Input(shape=input_shape)
x = Conv2D(32, (3, 3), activation='relu')(input_layer)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
output_layer = Dense(len(classes), activation='softmax')(x)
model = Model(input_layer, output_layer)

In [8]:
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

X_train shape: (245, 128, 128, 1)
y_train shape: (245, 15)


In [9]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
model.fit(X_train, y_train, epochs=12, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/12
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 177ms/step - accuracy: 0.0917 - loss: 2.6977 - val_accuracy: 0.3226 - val_loss: 2.5109
Epoch 2/12
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 135ms/step - accuracy: 0.4346 - loss: 2.3467 - val_accuracy: 0.3548 - val_loss: 2.1084
Epoch 3/12
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 136ms/step - accuracy: 0.6158 - loss: 1.6341 - val_accuracy: 0.4032 - val_loss: 1.9064
Epoch 4/12
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 138ms/step - accuracy: 0.6712 - loss: 1.0999 - val_accuracy: 0.5161 - val_loss: 1.6954
Epoch 5/12
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 138ms/step - accuracy: 0.8744 - loss: 0.5642 - val_accuracy: 0.4839 - val_loss: 2.0029
Epoch 6/12
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 141ms/step - accuracy: 0.8699 - loss: 0.4448 - val_accuracy: 0.5806 - val_loss: 1.5067
Epoch 7/12
[1m8/8[0m [32m━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1ff9a1cd750>

In [11]:
# Save the model to a file
model.save("my_model.h5")  # Saves the model in HDF5 format

