In [None]:
!pip install librosa numpy pandas scikit-learn tensorflow matplotlib

In [None]:
pip list


In [2]:
import librosa
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt


In [3]:
import os

In [4]:
import random

In [5]:
train_dir = './track-2-audio-identification-quest/training_split/training_split'
test_dir = './track-2-audio-identification-quest/testing_split/testing_split'

In [6]:
def get_list_of_files(dir_name):
    list_of_files = os.listdir(dir_name)
    all_files = []
    for entry in list_of_files:
        full_path = os.path.join(dir_name, entry)
        if os.path.isdir(full_path):
            all_files.extend(get_list_of_files(full_path))
        else:
            all_files.append(full_path)
    return all_files

In [7]:
# Get list of files
train_files = get_list_of_files(train_dir)
test_files = get_list_of_files(test_dir)

print(f"Number of training files: {len(train_files)}")
print(f"Number of testing files: {len(test_files)}")

Number of training files: 1453
Number of testing files: 637


In [8]:
# Load a sample audio file
sample_file = train_files[0]  # Just picking the first training file for now
audio, sr = librosa.load(sample_file, sr=None)  # sr=None to keep the original sample rate

print(f"Audio sample shape: {audio.shape}")
print(f"Sample rate: {sr}")

Audio sample shape: (441000,)
Sample rate: 44100


In [10]:
def extract_mfcc(file_path, n_mfcc=40):
    # Load audio file
    audio, sr = librosa.load(file_path, sr=None)
    
    # Extract MFCC features
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfccs, axis=1)  # Averaging over time

def extract_mfcc_with_augmentation(file_path, n_mfcc=40):
    # Load audio file
    audio, sr = librosa.load(file_path, sr=None)

    # Apply random augmentations (pitch shift or time stretch)
    if random.choice([True, False]):
        audio = librosa.effects.pitch_shift(y=audio, sr=sr, n_steps=random.uniform(-2, 2))
    if random.choice([True, False]):
        audio = librosa.effects.time_stretch(y=audio, rate=random.uniform(0.8, 1.2))

    # Extract MFCC features
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfccs, axis=1)  # Averaging over time

def extract_features_from_files(file_paths, feature_extractor, augment=False):
    features = []
    for file_path in file_paths:
        # Apply augmentation if specified
        if augment:
            feature = feature_extractor(file_path)
        else:
            feature = feature_extractor(file_path)
        features.append(feature)
    return np.array(features)

# Extract features for training files with augmentation
X_train_augmented = extract_features_from_files(train_files, extract_mfcc_with_augmentation, augment=True)

# Extract features for training files without augmentation (original training data)
X_train_original = extract_features_from_files(train_files, extract_mfcc, augment=False)

# Combine original and augmented data
X_train = np.concatenate([X_train_original, X_train_augmented])

# Extract features for testing files without augmentation
X_test = extract_features_from_files(test_files, extract_mfcc, augment=False)

# Print the shape of the features
print(f"Original training features shape: {X_train_original.shape}")
print(f"Augmented training features shape: {X_train_augmented.shape}")
print(f"Combined training features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}")


Original training features shape: (1453, 40)
Augmented training features shape: (1453, 40)
Combined training features shape: (2906, 40)
Testing features shape: (637, 40)


In [11]:
import re

def extract_labels_from_filename(filename):
    # Example filename: '132_70_female.wav'
    print(f"Processing filename:{filename}")
    match = re.search(r'(\d+)_([\d]+)_(male|female)_(\d+)\.wav', filename)
    if match:
        age = int(match.group(2))
        gender = match.group(3)
        
        # Determine age label
        if age <= 15:
            age_label = 0
        elif 16 <= age <= 40:
            age_label = 1
        else:
            age_label = 2
        
        # Determine gender label
        gender_label = 0 if gender == 'male' else 1
        
        return age_label, gender_label
    else:
        raise ValueError("Filename does not match expected format")


In [12]:
def extract_labels_from_files(file_paths):
    age_labels = []
    gender_labels = []
    for file_path in file_paths:
        try:
            age_label, gender_label = extract_labels_from_filename(os.path.basename(file_path))
            age_labels.append(age_label)
            gender_labels.append(gender_label)
        except Exception as e:
            print(f"Error processing file {file_path}: {e}")
            
    return np.array(age_labels), np.array(gender_labels)


In [None]:
from sklearn.model_selection import train_test_split

# Extract labels for training files (labels are the same for both original and augmented data)
y_train_age, y_train_gender = extract_labels_from_files(train_files)

# Print shapes of labels
print(f"Training age labels shape: {y_train_age.shape}")
print(f"Training gender labels shape: {y_train_gender.shape}")

# Combine original and augmented features
X_train = np.concatenate([X_train_original, X_train_augmented])

# Since the labels remain the same, concatenate the labels twice (once for original, once for augmented)
y_train_age_combined = np.concatenate([y_train_age, y_train_age])
y_train_gender_combined = np.concatenate([y_train_gender, y_train_gender])

# Split the combined training data into training and validation sets
X_train_final, X_val, y_train_age_final, y_val_age, y_train_gender_final, y_val_gender = train_test_split(
    X_train, y_train_age_combined, y_train_gender_combined, test_size=0.2, random_state=42
)

# Print shapes of the final training and validation sets
print(f"Training features shape: {X_train_final.shape}")
print(f"Validation features shape: {X_val.shape}")
print(f"Training age labels shape: {y_train_age_final.shape}")
print(f"Validation age labels shape: {y_val_age.shape}")
print(f"Training gender labels shape: {y_train_gender_final.shape}")
print(f"Validation gender labels shape: {y_val_gender.shape}")


In [14]:
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and validation data
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [15]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Input
from tensorflow.keras.optimizers import Adam

# Define the model
input_layer = Input(shape=(X_train.shape[1], 1))
x = Conv1D(32, 3, activation='relu')(input_layer)
x = MaxPooling1D(2)(x)
x = Flatten()(x)
x = Dense(64, activation='relu')(x)

# Define two separate output layers
age_output = Dense(3, activation='softmax', name='age_output')(x)
gender_output = Dense(2, activation='softmax', name='gender_output')(x)

# Create the model
model = Model(inputs=input_layer, outputs=[age_output, gender_output])

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss={'age_output': 'sparse_categorical_crossentropy', 'gender_output': 'sparse_categorical_crossentropy'},
    metrics={'age_output': 'accuracy', 'gender_output': 'accuracy'}
)


In [16]:
model.summary()

In [17]:
#reshaping
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
print(X_train.shape)  
print(X_val.shape)    



(2906, 40, 1)
(582, 40, 1)


In [None]:
# Combine original and augmented features (should have 2906 samples now)
X_train = np.concatenate([X_train_original, X_train_augmented])

# Since labels remain the same for both original and augmented data, concatenate the labels twice
y_train_age_combined = np.tile(y_train_age, 2)  # Duplicating the labels to match augmented data
y_train_gender_combined = np.tile(y_train_gender, 2)

# Check the sizes after concatenation
print(f"Training features shape after augmentation: {X_train.shape}")
print(f"Training age labels shape after augmentation: {y_train_age_combined.shape}")
print(f"Training gender labels shape after augmentation: {y_train_gender_combined.shape}")

# Split the training data into training and validation sets
X_train_final, X_val, y_train_age_final, y_val_age, y_train_gender_final, y_val_gender = train_test_split(
    X_train, y_train_age_combined, y_train_gender_combined, test_size=0.2, random_state=42
)

# Check the sizes of the final training and validation sets
print(f"Final training features shape: {X_train_final.shape}")
print(f"Validation features shape: {X_val.shape}")
print(f"Final training age labels shape: {y_train_age_final.shape}")
print(f"Validation age labels shape: {y_val_age.shape}")
print(f"Final training gender labels shape: {y_train_gender_final.shape}")
print(f"Validation gender labels shape: {y_val_gender.shape}")

# Model training (should work now as all shapes match)
history = model.fit(
    X_train_final, 
    {'age_output': y_train_age_final, 'gender_output': y_train_gender_final},
    epochs=200,  
    batch_size=32,  
    validation_data=(X_val, {'age_output': y_val_age, 'gender_output': y_val_gender}),
    verbose=1  # Set to 1 to see progress during training
)


In [19]:
# Extract features for test files
X_test = extract_features_from_files(test_files, extract_mfcc)
X_test = scaler.transform(X_test)  # Apply the same scaling as training data
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))  # Reshape for the model


In [20]:
print(f"Number of test files: {len(test_files)}")
print(f"Shape of X_test: {X_test.shape}")

Number of test files: 637
Shape of X_test: (637, 40, 1)


In [21]:
print(f"Shape of X_test: {X_test.shape}")

Shape of X_test: (637, 40, 1)


In [22]:
predictions = model.predict(X_test, batch_size=32)   #637/32

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [32]:
# #not using for now
# # Define thresholds for fallback logic
# lower_bound_threshold = 0 # Lower bound for confidence

# Extract probabilities for each sample
predicted_age_probs = predictions[0]  # Probabilities for age groups
predicted_gender_probs = predictions[1]
# Initialize an array for predicted labels
predicted_age_labels = np.argmax(predicted_age_probs, axis=1)
predicted_gender_labels = np.argmax(predicted_gender_probs, axis=1)

# # Apply fallback mechanism so 0 can be included now,
# for i, probs in enumerate(predicted_age_probs):
#     if np.max(probs) < lower_bound_threshold:
#         predicted_age_labels[i] = 0  # Assign to age group 0 if below threshold


# Print some predictions to check
print(predicted_age_labels[:10])  # Print first 10 predictions
print(predicted_gender_labels[:10])




[2 2 2 1 2 0 2 2 1 2]
[1 0 1 1 0 1 1 1 1 0]


In [None]:
# Define threshold function
def threshold_gender_prediction(gender_probs, threshold=0.9):
    print(f"Gender probabilities: {gender_probs}")  # Debugging line
    return 0 if gender_probs[1] < threshold else 1

# Extract probabilities from model predictions
gender_probs = model.predict(X_test)[1]  # Ensure this is the correct output for gender

# Apply the threshold to each probability set
gender_predictions = [threshold_gender_prediction(probs) for probs in gender_probs]

# Print some of the predictions for debugging
print("Gender predictions with threshold:")
for i in range(5):  # Print first 5 samples
    print(f"Sample {i}: Gender probs: {gender_probs[i]}, Prediction: {gender_predictions[i]}")

# Example DataFrame update
df = pd.DataFrame({
    'File_name': [os.path.splitext(os.path.basename(file))[0] for file in test_files],
    'Age_group': predicted_age_labels,
    'Gender': ['Male' if gender == 0 else 'Female' for gender in gender_predictions]
})

print(df.head())


In [27]:
df = pd.DataFrame({
    'File_name': [os.path.splitext(os.path.basename(file))[0] for file in test_files],
    'Age_group': predicted_age_labels,
    'Gender': ['Male' if gender == 0 else 'Female' for gender in gender_predictions]
})

# Print DataFrame details
print(f"Number of rows in DataFrame: {len(df)}")
print(df.head())



Number of rows in DataFrame: 637
  File_name  Age_group  Gender
0         1          2    Male
1        10          2    Male
2      1000          2    Male
3       106          1  Female
4       107          2    Male


In [None]:
output_csv_path = 'lala.csv'
df.to_csv(output_csv_path, index=False)

#finished !!

In [None]:
print(predicted_age_probs[:10])  # Print probabilities for the first 10 samples

In [28]:
# Create a list of test file names
test_file_names = [os.path.basename(file) for file in test_files]

# Define the index you want to check
sample_index = 0# For example, if the index is 1

# Get the filename for that index
filename = test_file_names[sample_index]

# Print the filename
print(f"The filename for index {sample_index} is: {filename}")



The filename for index 0 is: 1.wav


In [30]:
specific_age_probs = predicted_age_probs[sample_index]
specific_gender_probs = predicted_gender_probs[sample_index]

print(f"Probabilities for age for sample {sample_index}: {specific_age_probs}")
print(f"Probabilities for gender for sample {sample_index}: {specific_gender_probs}")


Probabilities for age for sample 0: [0.37400037 0.11687841 0.50912124]
Probabilities for gender for sample 0: [0.2918719 0.7081281]
