In [None]:
!pip install librosa numpy pandas scikit-learn tensorflow matplotlib

In [None]:
pip list


In [1]:
import librosa
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt


In [2]:
import os

In [3]:
train_dir = './track-2-audio-identification-quest/training_split/training_split'
test_dir = './track-2-audio-identification-quest/testing_split/testing_split'

In [4]:
def get_list_of_files(dir_name):
    list_of_files = os.listdir(dir_name)
    all_files = []
    for entry in list_of_files:
        full_path = os.path.join(dir_name, entry)
        if os.path.isdir(full_path):
            all_files.extend(get_list_of_files(full_path))
        else:
            all_files.append(full_path)
    return all_files

In [5]:
# Get list of files
train_files = get_list_of_files(train_dir)
test_files = get_list_of_files(test_dir)

print(f"Number of training files: {len(train_files)}")
print(f"Number of testing files: {len(test_files)}")

Number of training files: 1453
Number of testing files: 637


In [6]:
# Load a sample audio file
sample_file = train_files[0]  # Just picking the first training file for now
audio, sr = librosa.load(sample_file, sr=None)  # sr=None to keep the original sample rate

print(f"Audio sample shape: {audio.shape}")
print(f"Sample rate: {sr}")

Audio sample shape: (441000,)
Sample rate: 44100


In [7]:
def extract_mfcc(file_path, n_mfcc=20):
    audio, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfccs, axis=1)  # Averaging over time

# Extract MFCC features for a sample file
mfcc_features = extract_mfcc(sample_file)
print(f"MFCC features shape: {mfcc_features.shape}")

MFCC features shape: (20,)


In [8]:
def extract_features_from_files(file_paths, feature_extractor):
    features = []
    for file_path in file_paths:
        feature = feature_extractor(file_path)
        features.append(feature)
    return np.array(features)

# Extract features for training and testing files
X_train = extract_features_from_files(train_files, extract_mfcc)
X_test = extract_features_from_files(test_files, extract_mfcc)

print(f"Training features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}")


Training features shape: (1453, 20)
Testing features shape: (637, 20)


In [9]:
import re

def extract_labels_from_filename(filename):
    # Example filename: '132_70_female.wav'
    print(f"Processing filename:{filename}")
    match = re.search(r'(\d+)_([\d]+)_(male|female)_(\d+)\.wav', filename)
    if match:
        age = int(match.group(2))
        gender = match.group(3)
        
        # Determine age label
        if age <= 15:
            age_label = 0
        elif 16 <= age <= 40:
            age_label = 1
        else:
            age_label = 2
        
        # Determine gender label
        gender_label = 0 if gender == 'male' else 1
        
        return age_label, gender_label
    else:
        raise ValueError("Filename does not match expected format")


In [10]:
def extract_labels_from_files(file_paths):
    age_labels = []
    gender_labels = []
    for file_path in file_paths:
        try:
            age_label, gender_label = extract_labels_from_filename(os.path.basename(file_path))
            age_labels.append(age_label)
            gender_labels.append(gender_label)
        except Exception as e:
            print(f"Error processing file {file_path}: {e}")
            
    return np.array(age_labels), np.array(gender_labels)


In [11]:
y_train_age, y_train_gender = extract_labels_from_files(train_files)

print(f"Training age labels shape: {y_train_age.shape}")
print(f"Training gender labels shape: {y_train_gender.shape}")

from sklearn.model_selection import train_test_split

# Split training data into training and validation sets
X_train, X_val, y_train_age, y_val_age, y_train_gender, y_val_gender = train_test_split(
    X_train, y_train_age, y_train_gender, test_size=0.2, random_state=42
)
#meaning age labels that i have made classified now
print(f"Validation features shape: {X_val.shape}")
print(f"Validation age labels shape: {y_val_age.shape}")
print(f"Validation gender labels shape: {y_val_gender.shape}")


Processing filename:0_19_male_0.wav
Processing filename:0_19_male_1.wav
Processing filename:0_19_male_2.wav
Processing filename:0_19_male_3.wav
Processing filename:0_19_male_4.wav
Processing filename:0_19_male_5.wav
Processing filename:0_19_male_6.wav
Processing filename:0_19_male_7.wav
Processing filename:0_19_male_8.wav
Processing filename:100_30_female_0.wav
Processing filename:100_30_female_1.wav
Processing filename:100_30_female_2.wav
Processing filename:100_30_female_3.wav
Processing filename:100_30_female_4.wav
Processing filename:100_30_female_5.wav
Processing filename:100_30_female_6.wav
Processing filename:100_30_female_7.wav
Processing filename:100_30_female_8.wav
Processing filename:101_23_male_0.wav
Processing filename:101_23_male_1.wav
Processing filename:101_23_male_10.wav
Processing filename:101_23_male_11.wav
Processing filename:101_23_male_12.wav
Processing filename:101_23_male_13.wav
Processing filename:101_23_male_14.wav
Processing filename:101_23_male_15.wav
Proces

In [12]:
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and validation data
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [14]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input
from tensorflow.keras.optimizers import Adam

# Define the model
input_layer = Input(shape=(X_train.shape[1], 1))

# LSTM layers
x = LSTM(128, return_sequences=True)(input_layer)
x = Dropout(0.5)(x)  # Add dropout to prevent overfitting

x = LSTM(64)(x)
x = Dropout(0.5)(x)  # Add dropout to prevent overfitting

x = Dense(64, activation='relu')(x)  # Dense layer
x = Dropout(0.5)(x)  # Add dropout to prevent overfitting

# Define two separate output layers
age_output = Dense(3, activation='softmax', name='age_output')(x)
gender_output = Dense(2, activation='softmax', name='gender_output')(x)

# Create the model
model = Model(inputs=input_layer, outputs=[age_output, gender_output])

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss={'age_output': 'sparse_categorical_crossentropy', 'gender_output': 'sparse_categorical_crossentropy'},
    metrics={'age_output': 'accuracy', 'gender_output': 'accuracy'}
)




In [15]:
model.summary()

In [16]:
#reshaping
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
print(X_train.shape)  
print(X_val.shape)    



(1162, 20, 1)
(291, 20, 1)


In [17]:
history = model.fit(
    X_train, 
    {'age_output': y_train_age, 'gender_output': y_train_gender},
    epochs=500,  
    batch_size=32,  
    validation_data=(X_val, {'age_output': y_val_age, 'gender_output': y_val_gender}),
    verbose=1  # Set to 1 to see progress during training
)


Epoch 1/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 62ms/step - age_output_accuracy: 0.5697 - age_output_loss: 1.0724 - gender_output_accuracy: 0.5728 - gender_output_loss: 0.6884 - loss: 1.7608 - val_age_output_accuracy: 0.7045 - val_age_output_loss: 1.0269 - val_gender_output_accuracy: 0.6632 - val_gender_output_loss: 0.6813 - val_loss: 1.7042
Epoch 2/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - age_output_accuracy: 0.7288 - age_output_loss: 0.9847 - gender_output_accuracy: 0.6705 - gender_output_loss: 0.6678 - loss: 1.6526 - val_age_output_accuracy: 0.7320 - val_age_output_loss: 0.8566 - val_gender_output_accuracy: 0.6804 - val_gender_output_loss: 0.6378 - val_loss: 1.4908
Epoch 3/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - age_output_accuracy: 0.7575 - age_output_loss: 0.7723 - gender_output_accuracy: 0.6718 - gender_output_loss: 0.6034 - loss: 1.3762 - val_age_output_accuracy: 0.7423 - v

In [18]:
# Extract features for test files
X_test = extract_features_from_files(test_files, extract_mfcc)
X_test = scaler.transform(X_test)  # Apply the same scaling as training data
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))  # Reshape for the model


In [19]:
print(f"Number of test files: {len(test_files)}")
print(f"Shape of X_test: {X_test.shape}")

Number of test files: 637
Shape of X_test: (637, 20, 1)


In [20]:
print(f"Shape of X_test: {X_test.shape}")

Shape of X_test: (637, 20, 1)


In [21]:
predictions = model.predict(X_test, batch_size=32)   #637/32

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step


In [31]:
# Define thresholds for fallback logic
lower_bound_threshold = 0.6 # Lower bound for confidence

# Extract probabilities for each sample
predicted_age_probs = predictions[0]  # Probabilities for age groups
predicted_gender_probs = predictions[1]
# Initialize an array for predicted labels
predicted_age_labels = np.argmax(predicted_age_probs, axis=1)
predicted_gender_labels = np.argmax(predicted_gender_probs, axis=1)

# Apply fallback mechanism so 0 can be included now,
for i, probs in enumerate(predicted_age_probs):
    if np.max(probs) < lower_bound_threshold:
        predicted_age_labels[i] = 0  # Assign to age group 0 if below threshold


# Print some predictions to check
print(predicted_age_labels[:10])  # Print first 10 predictions
print(predicted_gender_labels[:10])




[1 1 1 1 1 1 1 1 1 1]
[0 0 1 1 0 0 1 0 1 0]


In [32]:
import pandas as pd
import os

# Redefine the DataFrame with .wav extension removed
df = pd.DataFrame({
    'File_name': [os.path.splitext(os.path.basename(file))[0] for file in test_files],
    'Age_group': predicted_age_labels,
    'Gender': ['Male' if gender == 0 else 'Female' for gender in predicted_gender_labels]
})

print(f"Number of rows in DataFrame: {len(df)}")

# Print the first few rows to check the DataFrame
print(df.head())


Number of rows in DataFrame: 637
  File_name  Age_group  Gender
0         1          1    Male
1        10          1    Male
2      1000          1  Female
3       106          1  Female
4       107          1    Male


In [33]:
output_csv_path = 'predict.csv'
df.to_csv(output_csv_path, index=False)

#finished !!

In [None]:
#not req really from now just for testing...
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# After training the model and making predictions
y_val_pred = model.predict(X_val)
y_val_age_pred = np.argmax(y_val_pred[0], axis=1)
y_val_gender_pred = np.argmax(y_val_pred[1], axis=1)

# Confusion matrix for age and gender
age_cm = confusion_matrix(y_val_age, y_val_age_pred)
gender_cm = confusion_matrix(y_val_gender, y_val_gender_pred)

# Print confusion matrices
print("Age Confusion Matrix:")
print(age_cm)
print("Gender Confusion Matrix:")
print(gender_cm)

# Visualize the confusion matrix for age
plt.figure(figsize=(8, 6))
sns.heatmap(age_cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['0-15', '16-40', '41+'], 
            yticklabels=['0-15', '16-40', '41+'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Age Confusion Matrix')
plt.show()

# Visualize the confusion matrix for gender
plt.figure(figsize=(8, 6))
sns.heatmap(gender_cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Male', 'Female'], 
            yticklabels=['Male', 'Female'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Gender Confusion Matrix')
plt.show()


In [None]:
print(predicted_age_probs[:10])  # Print probabilities for the first 10 samples

In [22]:
# Create a list of test file names
test_file_names = [os.path.basename(file) for file in test_files]

# Define the index you want to check
sample_index = 93# For example, if the index is 1

# Get the filename for that index
filename = test_file_names[sample_index]

# Print the filename
print(f"The filename for index {sample_index} is: {filename}")



The filename for index 93 is: 240.wav


In [25]:

specific_age_probs = predicted_age_probs[sample_index]
specific_gender_probs = predicted_gender_probs[sample_index]

print(f"Probabilities for age for sample {sample_index}: {specific_age_probs}")
print(f"Probabilities for gender for sample {sample_index}: {specific_gender_probs}")


Probabilities for age for sample 93: [9.4649891e-07 9.1350216e-01 8.6496919e-02]
Probabilities for gender for sample 93: [0.22458996 0.77541006]
