<a href="https://colab.research.google.com/github/Tai2970/Accent-Recognition-Project/blob/main/scripts/full_pipeline_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install librosa scikit-learn pandas numpy soundfile

In [None]:
from google.colab import files
import os, shutil

# STEP 1: Upload model
uploaded = files.upload()  # Upload final_model.pkl or any renamed version

# STEP 2: Ensure 'models' directory exists
os.makedirs("models", exist_ok=True)

# STEP 3: Rename whatever got uploaded to 'final_model.pkl'
for filename in uploaded:
    new_path = os.path.join("models", "final_model.pkl")
    shutil.move(filename, new_path)
    print(f"Saved model as: {new_path}")

In [None]:
# STEP 1: Delete any existing test_samples folder to avoid duplicates
import shutil
shutil.rmtree("data/test_samples", ignore_errors=True)

# STEP 2: Recreate the folder clean
os.makedirs("data/test_samples", exist_ok=True)

# STEP 3: Upload your new .wav files for prediction
from google.colab import files
uploaded = files.upload()

# STEP 4: Move uploaded .wav files to the 'test_samples' folder
for filename in uploaded:
    if filename.endswith(".wav"):
        shutil.move(filename, f"data/test_samples/{filename}")

In [None]:
import os
import librosa
import numpy as np
import joblib
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# Load trained final model
model = joblib.load("models/final_model.pkl")

# Folder with test .wav files
test_folder = "data/test_samples/"
accent_labels = {0: "American", 1: "British", 2: "Vietnamese"}

# Feature extraction: MFCCs, delta MFCCs, spectral contrast
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=16000)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    delta_mfccs = librosa.feature.delta(mfccs)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    return np.hstack((
        mfccs.mean(axis=1),
        delta_mfccs.mean(axis=1),
        contrast.mean(axis=1)
    ))

# Run prediction on each .wav file
print("Accent Predictions:")
print("-" * 40)

for filename in sorted(os.listdir(test_folder)):
    if filename.endswith(".wav"):
        file_path = os.path.join(test_folder, filename)
        try:
            features = extract_features(file_path).reshape(1, -1)
            prediction = model.predict(features)[0]
            label = accent_labels[prediction]
            print(f"{filename} ➜ Predicted Accent: {label}")
        except Exception as e:
            print(f"{filename} ➜ Error: {e}")