<a href="https://colab.research.google.com/github/programming-freak/WordForThought/blob/main/CREMAD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os

# Dataset identifier on Kaggle
dataset_name = "ejlok1/cremad"  # Replace with the correct Kaggle dataset identifier if different

# Directory to save the dataset
download_dir = "crema_dataset"

# Ensure the directory exists
os.makedirs(download_dir, exist_ok=True)

# Download and unzip the dataset
print("Downloading the dataset...")
os.system(f"kaggle datasets download -d {dataset_name} -p {download_dir} --unzip")

print("Download complete. Listing files:")
# List the downloaded files
files = os.listdir(download_dir)
print(files)

Downloading the dataset...
Download complete. Listing files:
['AudioWAV']


In [3]:
import os
import librosa
import numpy as np


dataset_path = '/content/crema_dataset/AudioWAV'


def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)


    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc = np.mean(mfcc.T, axis=0)


    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma = np.mean(chroma.T, axis=0)


    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spectral_contrast = np.mean(spectral_contrast.T, axis=0)


    features = np.hstack([mfcc, chroma, spectral_contrast])
    return features


features = []
labels = []

for file_name in os.listdir(dataset_path):
    if file_name.endswith('.wav'):
        file_path = os.path.join(dataset_path, file_name)
        feature_vector = extract_features(file_path)
        label = file_name.split('_')[1]
        features.append(feature_vector)
        labels.append(label)


features = np.array(features)
labels = np.array(labels)



In [4]:
from sklearn.preprocessing import LabelEncoder


encoder = LabelEncoder()
encoded_labels = encoder.fit_transform(labels)




In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels, test_size=0.2, random_state=42)


In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Reshape((X_train.shape[1], 1)),


    layers.Conv1D(64, 3, activation='relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(128, 3, activation='relu'),
    layers.MaxPooling1D(pool_size=2),


    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(np.unique(encoded_labels)), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()



In [14]:
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.5406 - loss: 1.2665 - val_accuracy: 0.5158 - val_loss: 1.3583
Epoch 2/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.5351 - loss: 1.2936 - val_accuracy: 0.5400 - val_loss: 1.2899
Epoch 3/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.5429 - loss: 1.2837 - val_accuracy: 0.5487 - val_loss: 1.3049
Epoch 4/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.5446 - loss: 1.2754 - val_accuracy: 0.5406 - val_loss: 1.3365
Epoch 5/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.5369 - loss: 1.2658 - val_accuracy: 0.5259 - val_loss: 1.2909
Epoch 6/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.5527 - loss: 1.2409 - val_accuracy: 0.5400 - val_loss: 1.2867
Epoch 7/20
[1m187/187[0

In [15]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5738 - loss: 1.2421
Test Accuracy: 57.29%
