### Dependencies Import

In [None]:
import os
import pandas as pd
import librosa
import numpy as np
import soundfile as sf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tabulate

In [4]:
alexa_dir = 'data/alexa'

for filename in os.listdir(alexa_dir):
    if filename.endswith('.flac'):
        file_path = os.path.join(alexa_dir, filename)
        try:
            y, sr = librosa.load(file_path, sr=None)  # sr=None keeps the original sample rate
            print(f"✅ Loaded: {filename} | Sample Rate: {sr} | Duration: {len(y)/sr:.2f}s")
        except Exception as e:
            print(f"❌ Could not load {filename}: {e}")


✅ Loaded: 0.flac | Sample Rate: 16000 | Duration: 3.30s
✅ Loaded: 1.flac | Sample Rate: 16000 | Duration: 3.66s
✅ Loaded: 10.flac | Sample Rate: 16000 | Duration: 2.02s
✅ Loaded: 11.flac | Sample Rate: 16000 | Duration: 2.02s
✅ Loaded: 12.flac | Sample Rate: 16000 | Duration: 1.78s
✅ Loaded: 13.flac | Sample Rate: 16000 | Duration: 1.98s
✅ Loaded: 14.flac | Sample Rate: 16000 | Duration: 2.22s
✅ Loaded: 15.flac | Sample Rate: 16000 | Duration: 2.02s
✅ Loaded: 16.flac | Sample Rate: 16000 | Duration: 1.94s
✅ Loaded: 17.flac | Sample Rate: 16000 | Duration: 2.38s
✅ Loaded: 18.flac | Sample Rate: 16000 | Duration: 3.78s
✅ Loaded: 19.flac | Sample Rate: 16000 | Duration: 1.70s
✅ Loaded: 2.flac | Sample Rate: 16000 | Duration: 2.42s
✅ Loaded: 20.flac | Sample Rate: 16000 | Duration: 1.74s
✅ Loaded: 21.flac | Sample Rate: 16000 | Duration: 2.22s
✅ Loaded: 22.flac | Sample Rate: 16000 | Duration: 2.26s
✅ Loaded: 23.flac | Sample Rate: 16000 | Duration: 1.82s
✅ Loaded: 24.flac | Sample Rate: 1

In [5]:
jarvis_dir = 'data/jarvis'

for filename in os.listdir(jarvis_dir):
    if filename.endswith('.wav'):
        file_path = os.path.join(jarvis_dir, filename)
        try:
            y, sr = librosa.load(file_path, sr=None)  # sr=None preserves original sampling rate
            print(f"✅ Loaded: {filename} | Sample Rate: {sr} | Duration: {len(y)/sr:.2f}s")
        except Exception as e:
            print(f"❌ Could not load {filename}: {e}")


✅ Loaded: 00a97647-55b9-4f62-be20-8e4b0ee510b0.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 00aba123-ae3a-4e0a-8603-9f7277b7d41f.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 00af045b-ead8-4379-9110-c038e0bdd855.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 00c476c2-9e01-40df-96c8-a5153db5b157.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 01f13214-d1eb-48b6-8f58-e397731158fd.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 01f46f97-7ea8-474a-9285-13b038477eec.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 0b601785-a157-4263-920c-7890e3564efc.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 0c20da21-b168-470d-9b99-98e2161ed91a.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 0c7910b3-4414-4d48-9e6f-292a1d099f6f.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 0d7cfa1f-6de3-42f4-97e1-324cda1d1271.wav | Sample Rate: 16000 | Duration: 2.30s
✅ Loaded: 0df7b7ff-d667-488d-b4c2-ea78b4b847fe.wav | Sample Rate: 16000 | Duration: 3.07s
✅ Loaded: 

### Dataset Creation

In [None]:
base_dir = "data"
data = []

for wake_word in os.listdir(base_dir):
    folder_path = os.path.join(base_dir, wake_word)
    if os.path.isdir(folder_path):
        for filename in os.listdir(folder_path):
            if (wake_word == 'alexa' and filename.endswith('.flac')) or \
               (wake_word == 'jarvis' and filename.endswith('.wav')):
                file_path = os.path.join(folder_path, filename)
                data.append({'filepath': file_path, 'wake_word': wake_word})

# print(data)
df = pd.DataFrame(data)
print(df.head())


[{'filepath': 'data\\alexa\\0.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\1.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\10.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\11.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\12.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\13.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\14.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\15.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\16.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\17.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\18.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\19.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\2.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\20.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\21.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\22.flac', 'wake_word': 'alexa'}, {'filepath': 'data\\alexa\\23.flac', 'wake

In [13]:
# Define 2 labels
df['wake_word'].value_counts()

wake_word
alexa     30
jarvis    30
Name: count, dtype: int64

In [14]:
df['label'] = df['wake_word'].apply(lambda x: 0 if x == 'alexa' else 1)
print(df['label'].value_counts())
df.head()

label
0    30
1    30
Name: count, dtype: int64


Unnamed: 0,filepath,wake_word,label
0,data\alexa\0.flac,alexa,0
1,data\alexa\1.flac,alexa,0
2,data\alexa\10.flac,alexa,0
3,data\alexa\11.flac,alexa,0
4,data\alexa\12.flac,alexa,0


### Feature Extraction


In [18]:
# Parameters
max_duration = 2.0
sample_rate = 16000
n_mfcc = 13

# Function to extract MFCC features
def extract_features(path):
    y, sr = librosa.load(path, sr=sample_rate, duration=max_duration)
    expected_len = int(max_duration * sample_rate)
    y = librosa.util.fix_length(y, size=expected_len)  # ← Fixed this line
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfcc, axis=1)

# Extract features for each file
features = []
valid_labels = []

for i, row in df.iterrows():
    try:
        mfcc_feat = extract_features(row['filepath'])
        features.append(mfcc_feat)
        valid_labels.append(row['label'])
    except Exception as e:
        print(f"❌ Error loading {row['filepath']}: {e}")

# Convert to arrays
X = np.array(features)
y = np.array(valid_labels)

# Normalize
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into train and test sets (70/30)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

### Train Wake Word Classifier 

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

clf = SVC(kernel='linear', C=1.0, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

✅ Accuracy: 0.9444444444444444
📊 Classification Report:
               precision    recall  f1-score   support

       alexa       0.90      1.00      0.95         9
      jarvis       1.00      0.89      0.94         9

    accuracy                           0.94        18
   macro avg       0.95      0.94      0.94        18
weighted avg       0.95      0.94      0.94        18

🧾 Confusion Matrix:
 [[9 0]
 [1 8]]


### Evaluate the Classifier

In [21]:
from sklearn.metrics import confusion_matrix, accuracy_score
from tabulate import tabulate


print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Classification Report:\n", classification_report(y_test, y_pred, target_names=['alexa', 'jarvis']))
print("🧾 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Predict on test set
y_pred = clf.predict(X_test)

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Accuracy
accuracy = accuracy_score(y_test, y_pred) * 100

# Create table for tabulate
headers = ["Predicted \\ Actual", "Alexa", "Jarvis"]
table = [
    ["Alexa", cm[0][0], cm[0][1]],
    ["Jarvis", cm[1][0], cm[1][1]]
]

# Display
print(tabulate(table, headers=headers, tablefmt="grid"))
print(f"\n✅ Overall Accuracy: {accuracy:.2f}%")


✅ Accuracy: 0.9444444444444444
📊 Classification Report:
               precision    recall  f1-score   support

       alexa       0.90      1.00      0.95         9
      jarvis       1.00      0.89      0.94         9

    accuracy                           0.94        18
   macro avg       0.95      0.94      0.94        18
weighted avg       0.95      0.94      0.94        18

🧾 Confusion Matrix:
 [[9 0]
 [1 8]]
+----------------------+---------+----------+
| Predicted \ Actual   |   Alexa |   Jarvis |
| Alexa                |       9 |        0 |
+----------------------+---------+----------+
| Jarvis               |       1 |        8 |
+----------------------+---------+----------+

✅ Overall Accuracy: 94.44%
