In [None]:
# Testing Tanh Activation

This notebook focuses specifically on testing the tanh activation function for speech command classification.

Tanh (hyperbolic tangent) outputs values between -1 and 1, which can sometimes help with gradient flow compared to other activations.

## Data setup
Your audio files should be organized in folders by class:
```
speech_data/
├── left/
├── right/
├── stop/
└── go/
```

## What this does
1. Loads audio data and converts to spectrograms
2. Trains a CNN using only tanh activations
3. Shows training progress and final results


In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


In [None]:
# Config - update the path to your data
data_path = "/Users/alex/speech_commands"  # <-- change this
sample_rate = 16000
epochs = 15

print(f"TensorFlow {tf.__version__}")
print(f"Using tanh activation function")


In [None]:
# Load the audio data
audio_data = []
labels = []

for class_dir in os.listdir(data_path):
    class_path = os.path.join(data_path, class_dir)
    if not os.path.isdir(class_path):
        continue
    
    print(f"Loading {class_dir} samples...")
    for audio_file in os.listdir(class_path):
        if audio_file.endswith('.wav'):
            file_path = os.path.join(class_path, audio_file)
            audio, _ = librosa.load(file_path, sr=sample_rate)
            
            # Pad or trim to 1 second
            if len(audio) > sample_rate:
                audio = audio[:sample_rate]
            else:
                audio = np.pad(audio, (0, sample_rate - len(audio)))
                
            audio_data.append(audio)
            labels.append(class_dir)

# Convert to arrays and encode labels
X = np.array(audio_data).reshape(-1, sample_rate, 1)
unique_labels = sorted(set(labels))
label_map = {label: i for i, label in enumerate(unique_labels)}
y = np.array([label_map[label] for label in labels])

print(f"Loaded {len(X)} samples")
print(f"Classes: {unique_labels}")
print(f"Data shape: {X.shape}")


In [None]:
# Split data for training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")

# Build CNN model with tanh activations
model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(16, 13, activation='tanh', input_shape=(sample_rate, 1)),
    tf.keras.layers.MaxPooling1D(3),
    tf.keras.layers.Dropout(0.3),
    
    tf.keras.layers.Conv1D(32, 11, activation='tanh'),
    tf.keras.layers.MaxPooling1D(3),
    tf.keras.layers.Dropout(0.3),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='tanh'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='tanh'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(len(unique_labels), activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("Model ready. Training...")

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epochs,
    verbose=1
)

# Evaluate
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nFinal test accuracy with tanh: {test_accuracy:.3f}")

# Get detailed results
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
print("\nClassification Report:")
print(classification_report(y_test, y_pred_classes, target_names=unique_labels))
