### Step 1: Environment Setup

In [1]:
import os
import pandas as pd
import librosa
import numpy as np
import soundfile as sf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tabulate

# Clone the GitHub repository to access the data locally
!git clone https://github.com/Rajveermathur/Wake-Word-Classifier-ASR.git

alexa_dir = 'Wake-Word-Classifier-ASR/data/alexa'
jarvis_dir = 'Wake-Word-Classifier-ASR/data/jarvis'

### Step 2: Dataset Creation

In [4]:
for filename in os.listdir(alexa_dir):
    if filename.endswith('.flac'):
        file_path = os.path.join(alexa_dir, filename)
        try:
            y, sr = librosa.load(file_path, sr=None)  # sr=None keeps the original sample rate
            print(f"✅ Loaded: {filename} | Sample Rate: {sr} | Duration: {len(y)/sr:.2f}s")
        except Exception as e:
            print(f"❌ Could not load {filename}: {e}")

Cloning into 'Wake-Word-Classifier-ASR'...
remote: Enumerating objects: 72, done.[K
remote: Counting objects: 100% (72/72), done.[K
remote: Compressing objects: 100% (69/69), done.[K
remote: Total 72 (delta 2), reused 72 (delta 2), pack-reused 0 (from 0)[K
Receiving objects: 100% (72/72), 2.13 MiB | 7.06 MiB/s, done.
Resolving deltas: 100% (2/2), done.
✅ Loaded: 19.flac | Sample Rate: 16000 | Duration: 1.70s
✅ Loaded: 22.flac | Sample Rate: 16000 | Duration: 2.26s
✅ Loaded: 7.flac | Sample Rate: 16000 | Duration: 3.70s
✅ Loaded: 0.flac | Sample Rate: 16000 | Duration: 3.30s
✅ Loaded: 13.flac | Sample Rate: 16000 | Duration: 1.98s
✅ Loaded: 15.flac | Sample Rate: 16000 | Duration: 2.02s
✅ Loaded: 24.flac | Sample Rate: 16000 | Duration: 2.10s
✅ Loaded: 16.flac | Sample Rate: 16000 | Duration: 1.94s
✅ Loaded: 14.flac | Sample Rate: 16000 | Duration: 2.22s
✅ Loaded: 28.flac | Sample Rate: 16000 | Duration: 3.14s
✅ Loaded: 26.flac | Sample Rate: 16000 | Duration: 2.74s
✅ Loaded: 4.flac

In [5]:
for filename in os.listdir(jarvis_dir):
    if filename.endswith('.wav'):
        file_path = os.path.join(jarvis_dir, filename)
        try:
            y, sr = librosa.load(file_path, sr=None)  # sr=None preserves original sampling rate
            print(f"✅ Loaded: {filename} | Sample Rate: {sr} | Duration: {len(y)/sr:.2f}s")
        except Exception as e:
            print(f"❌ Could not load {filename}: {e}")


✅ Loaded: 0eb17b69-7f9f-4db5-b68b-2c365cbdd660.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 00a97647-55b9-4f62-be20-8e4b0ee510b0.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 1ddadbc1-4d52-4df8-88c2-8615587e2608.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 0d7cfa1f-6de3-42f4-97e1-324cda1d1271.wav | Sample Rate: 16000 | Duration: 2.30s
✅ Loaded: 0b601785-a157-4263-920c-7890e3564efc.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 1e128153-2111-4a6f-8792-6c24220ee5b0.wav | Sample Rate: 16000 | Duration: 2.53s
✅ Loaded: 1d3548b5-a5e3-407a-96bb-502037d3b2b8.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 00c476c2-9e01-40df-96c8-a5153db5b157.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 1a617bc6-f87c-43d7-a0af-d482476276c5.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 01f46f97-7ea8-474a-9285-13b038477eec.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 2a713afc-d208-4eab-9dc1-fcdd862d8db6.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 

In [9]:
base_dir = "Wake-Word-Classifier-ASR/data"
data = []

for wake_word in os.listdir(base_dir):
    folder_path = os.path.join(base_dir, wake_word)
    if os.path.isdir(folder_path):
        for filename in os.listdir(folder_path):
            if (wake_word == 'alexa' and filename.endswith('.flac')) or \
               (wake_word == 'jarvis' and filename.endswith('.wav')):
                file_path = os.path.join(folder_path, filename)
                data.append({'filepath': file_path, 'wake_word': wake_word})

# print(data)
df = pd.DataFrame(data)
print(df.head())


                                      filepath wake_word
0  Wake-Word-Classifier-ASR/data/alexa/19.flac     alexa
1  Wake-Word-Classifier-ASR/data/alexa/22.flac     alexa
2   Wake-Word-Classifier-ASR/data/alexa/7.flac     alexa
3   Wake-Word-Classifier-ASR/data/alexa/0.flac     alexa
4  Wake-Word-Classifier-ASR/data/alexa/13.flac     alexa


In [10]:
# Define 2 labels
df['wake_word'].value_counts()

Unnamed: 0_level_0,count
wake_word,Unnamed: 1_level_1
alexa,30
jarvis,30


In [11]:
df['label'] = df['wake_word'].apply(lambda x: 0 if x == 'alexa' else 1)
print(df['label'].value_counts())
df.head()

label
0    30
1    30
Name: count, dtype: int64


Unnamed: 0,filepath,wake_word,label
0,Wake-Word-Classifier-ASR/data/alexa/19.flac,alexa,0
1,Wake-Word-Classifier-ASR/data/alexa/22.flac,alexa,0
2,Wake-Word-Classifier-ASR/data/alexa/7.flac,alexa,0
3,Wake-Word-Classifier-ASR/data/alexa/0.flac,alexa,0
4,Wake-Word-Classifier-ASR/data/alexa/13.flac,alexa,0


### Step 3: Feature Extraction


In [12]:
# Parameters
max_duration = 4.0
sample_rate = 16000
n_mfcc = 13

# Function to extract MFCC features
def extract_features(path):
    y, sr = librosa.load(path, sr=sample_rate, duration=max_duration)
    expected_len = int(max_duration * sample_rate)
    y = librosa.util.fix_length(y, size=expected_len)  # ← Fixed this line
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfcc, axis=1)

# Extract features for each file
features = []
valid_labels = []

for i, row in df.iterrows():
    try:
        mfcc_feat = extract_features(row['filepath'])
        features.append(mfcc_feat)
        valid_labels.append(row['label'])
    except Exception as e:
        print(f"❌ Error loading {row['filepath']}: {e}")

# Convert to arrays
X = np.array(features)
y = np.array(valid_labels)

# Normalize
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into train and test sets (70/30)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

### Train Wake Word Classifier

In [13]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

clf = SVC(kernel='linear', C=1.0, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

### Evaluate the Classifier

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score
from tabulate import tabulate


print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Classification Report:\n", classification_report(y_test, y_pred, target_names=['alexa', 'jarvis']))
print("🧾 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Predict on test set
y_pred = clf.predict(X_test)

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Accuracy
accuracy = accuracy_score(y_test, y_pred) * 100

# Create table for tabulate
headers = ["Predicted \\ Actual", "Alexa", "Jarvis"]
table = [
    ["Alexa", cm[0][0], cm[0][1]],
    ["Jarvis", cm[1][0], cm[1][1]]
]

# Display
print(tabulate(table, headers=headers, tablefmt="grid"))
print(f"\n✅ Overall Accuracy: {accuracy:.2f}%")


✅ Accuracy: 1.0
📊 Classification Report:
               precision    recall  f1-score   support

       alexa       1.00      1.00      1.00         9
      jarvis       1.00      1.00      1.00         9

    accuracy                           1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18

🧾 Confusion Matrix:
 [[9 0]
 [0 9]]
+----------------------+---------+----------+
| Predicted \ Actual   |   Alexa |   Jarvis |
| Alexa                |       9 |        0 |
+----------------------+---------+----------+
| Jarvis               |       0 |        9 |
+----------------------+---------+----------+

✅ Overall Accuracy: 100.00%
