In [None]:
# Install dependencies (if not already installed)
!pip install librosa scikit-learn joblib

import librosa
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder
import os
import joblib

# 1️⃣ Feature Extraction
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=16000)

        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20).mean(axis=1)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr).mean()
        f0, _, _ = librosa.pyin(y, fmin=50, fmax=300, sr=sr)
        median_pitch = np.nanmedian(f0) if f0 is not None else 0
        zcr = librosa.feature.zero_crossing_rate(y=y).mean()
        rms = librosa.feature.rms(y=y).mean()
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr).mean()
        chroma = librosa.feature.chroma_stft(y=y, sr=sr).mean()

        return np.hstack([mfcc, spectral_centroid, median_pitch, zcr, rms, spectral_bandwidth, chroma])
    except Exception as e:
        print(f"Error extracting features from {file_path}: {e}")
        return None

# 2️⃣ Load Dataset
def load_dataset(dataset_path):
    dataset = []
    speakers = os.listdir(dataset_path)

    for speaker in speakers:
        speaker_path = os.path.join(dataset_path, speaker)
        if os.path.isdir(speaker_path):
            for audio_file in os.listdir(speaker_path):
                file_path = os.path.join(speaker_path, audio_file)
                if file_path.endswith(".wav"):
                    features = extract_features(file_path)
                    if features is not None:
                        dataset.append({"features": features, "label": speaker})

    if len(dataset) < 1:
        raise ValueError("No valid audio data found in the dataset path.")

    return dataset

# 3️⃣ Train Model
def train_model(dataset):
    X = [data["features"] for data in dataset]
    y = [data["label"] for data in dataset]

    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)

    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    param_grid = {
        'n_estimators': [100, 200],
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5]
    }

    gb = GradientBoostingClassifier(random_state=42)
    clf = GridSearchCV(gb, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    clf.fit(X_train, y_train)

    best_model = clf.best_estimator_
    y_pred = best_model.predict(X_test)

    print("\n✅ Best Parameters:", clf.best_params_)
    print("\n✅ Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\n✅ Classification Report:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))

    return best_model, scaler, label_encoder


# 4️⃣ Run Training
dataset_path = "/content/voices"  # ⚠️ Upload your dataset folder here in Colab
dataset = load_dataset(dataset_path)
model, scaler, label_encoder = train_model(dataset)

# 5️⃣ Save Model Files
joblib.dump(model, '/content/speaker_model.pkl')
joblib.dump(scaler, '/content/scaler.pkl')
joblib.dump(label_encoder, '/content/label_encoder.pkl')

print("\n✅ Model and files saved successfully!")

# 6️⃣ Download Model Files
from google.colab import files
files.download('/content/speaker_model.pkl')
files.download('/content/scaler.pkl')
files.download('/content/label_encoder.pkl')



✅ Best Parameters: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 100}

✅ Confusion Matrix:
 [[2 0 0]
 [0 2 0]
 [0 0 2]]

✅ Classification Report:
               precision    recall  f1-score   support

      Aateet       1.00      1.00      1.00         2
       mamta       1.00      1.00      1.00         2
       vibha       1.00      1.00      1.00         2

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6


✅ Model and files saved successfully!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>