In [None]:
# Install required packages
import subprocess
import sys

packages = ['librosa', 'pandas', 'seaborn', 'matplotlib', 'tensorflow', 'scikit-learn']
for package in packages:
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])


In [None]:
import os
import json
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import librosa
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import tensorflow as tf
from tensorflow import keras

print("TensorFlow:", tf.__version__)
print("Ready to start experiments")


In [None]:
# Update this path to point to your dataset
dataset_path = "/Users/alex/speech_data"  # <-- Change this to your data folder
results_dir = "experiment_results"
sample_rate = 16000
epochs = 10
test_size = 0.1

print("Dataset:", dataset_path)
print("Will train for", epochs, "epochs")


In [None]:
def count_samples_per_class(data_dir):
    class_counts = {}
    for class_name in os.listdir(data_dir):
        class_path = os.path.join(data_dir, class_name)
        if os.path.isdir(class_path):
            class_counts[class_name] = len(os.listdir(class_path))
    return class_counts

def load_audio_data(data_dir):
    audio_data = []
    labels = []
    
    for class_name in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_dir):
            continue
            
        for audio_file in os.listdir(class_dir):
            if audio_file.endswith(('.wav', '.mp3')):
                file_path = os.path.join(class_dir, audio_file)
                try:
                    audio, sr = librosa.load(file_path, sr=sample_rate)
                    audio_data.append(audio)
                    labels.append(class_name)
                except:
                    print(f"Couldn't load {file_path}")
                    
    return audio_data, labels

def make_label_encoder(labels):
    unique_labels = sorted(set(labels))
    label_to_int = {label: i for i, label in enumerate(unique_labels)}
    encoded = [label_to_int[label] for label in labels]
    return np.array(encoded), label_to_int


In [None]:
# Check what's in the dataset
print("Sample counts per class:")
counts = count_samples_per_class(dataset_path)
for class_name, count in counts.items():
    print(f"  {class_name}: {count} files")

# Load all the audio files
print("\nLoading audio files...")
audio_files, labels = load_audio_data(dataset_path)
y_encoded, label_mapping = make_label_encoder(labels)

# Pad/trim audio to same length and reshape for CNN
max_len = sample_rate  # 1 second of audio
X = []
for audio in audio_files:
    if len(audio) > max_len:
        audio = audio[:max_len]  # trim
    else:
        audio = np.pad(audio, (0, max_len - len(audio)))  # pad
    X.append(audio)

X = np.array(X).reshape(-1, max_len, 1)
print(f"Data shape: {X.shape}")
print(f"Classes: {list(label_mapping.keys())}")

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=test_size, random_state=42)
print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")


In [None]:
def create_cnn_model(activation_fn, input_shape, num_classes):
    model = keras.Sequential([
        keras.layers.Conv1D(16, 13, activation=activation_fn, input_shape=input_shape),
        keras.layers.MaxPooling1D(3),
        keras.layers.Dropout(0.3),
        
        keras.layers.Conv1D(32, 11, activation=activation_fn),
        keras.layers.MaxPooling1D(3),
        keras.layers.Dropout(0.3),
        
        keras.layers.Flatten(),
        keras.layers.Dense(128, activation=activation_fn),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(64, activation=activation_fn),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(num_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [None]:
def train_model(model, X_train, y_train, X_val, y_val, activation_name):
    print(f"\nTraining with {activation_name} activation...")
    
    start_time = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        verbose=1
    )
    training_time = time.time() - start_time
    
    # Get predictions and calculate metrics
    y_pred = model.predict(X_val)
    y_pred_classes = np.argmax(y_pred, axis=1)
    
    accuracy = accuracy_score(y_val, y_pred_classes)
    precision = precision_score(y_val, y_pred_classes, average='weighted')
    recall = recall_score(y_val, y_pred_classes, average='weighted')
    f1 = f1_score(y_val, y_pred_classes, average='weighted')
    
    results = {
        'activation': activation_name,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'training_time': training_time,
        'history': history.history
    }
    
    print(f"Final accuracy: {accuracy:.3f}")
    print(f"Training took {training_time:.1f} seconds")
    
    return results


In [None]:
# Set up the different activation functions to test
activation_functions = {
    'tanh': 'tanh',
    'relu': 'relu', 
    'leaky_relu': tf.nn.leaky_relu
}

input_shape = (sample_rate, 1)
num_classes = len(label_mapping)

print(f"Input shape: {input_shape}")
print(f"Number of classes: {num_classes}")
print(f"Testing activations: {list(activation_functions.keys())}")


In [None]:
# Train models with each activation function
experiment_results = []

for activation_name, activation_fn in activation_functions.items():
    print(f"\n{'='*60}")
    print(f"Testing {activation_name.upper()} activation")
    print(f"{'='*60}")
    
    # Create and train model
    model = create_cnn_model(activation_fn, input_shape, num_classes)
    results = train_model(model, X_train, y_train, X_test, y_test, activation_name)
    experiment_results.append(results)

print("\n" + "="*60)
print("All experiments finished!")
print("="*60)


In [None]:
# Compare the results from different activation functions
print("\nRESULTS COMPARISON")
print("-" * 70)
print(f"{'Activation':<12} {'Accuracy':<10} {'F1 Score':<10} {'Time':<10}")
print("-" * 70)

best_result = None
best_accuracy = 0

for result in experiment_results:
    activation = result['activation']
    accuracy = result['accuracy'] 
    f1 = result['f1_score']
    time_s = result['training_time']
    
    print(f"{activation:<12} {accuracy:<10.3f} {f1:<10.3f} {time_s:<10.1f}s")
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_result = result

print("-" * 70)
print(f"Best: {best_result['activation']} with {best_accuracy:.3f} accuracy")

# Save results
os.makedirs(results_dir, exist_ok=True)
with open(f'{results_dir}/experiment_results.json', 'w') as f:
    # Remove history from results to keep file size reasonable
    save_results = []
    for r in experiment_results:
        save_r = r.copy()
        save_r.pop('history', None)  # Remove training history 
        save_results.append(save_r)
    json.dump(save_results, f, indent=2)

print(f"\nResults saved to {results_dir}/experiment_results.json")
