In [60]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [61]:
data_dir=r"/content/drive/MyDrive/audio file"

In [None]:
# Feature extraction
def extract_features(file_path):
    try:
        audio, sr = librosa.load(file_path, duration=3, offset=0.5)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        return np.mean(mfccs.T, axis=0)  # ✅ Corrected mfccs
    except Exception as e:
        print("Error loading file:", file_path, "|", e)
        return None


In [None]:
# Extract features and labels
features = []
labels = []

for folder in os.listdir(data_dir):
    emotion = folder.split('_')[-1].lower()
    folder_path = os.path.join(data_dir, folder)
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        mfcc = extract_features(file_path)
        if mfcc is not None:
            features.append(mfcc)
            labels.append(emotion)


In [None]:
# Convert to numpy arrays
x = np.array(features)
y = np.array(labels)


In [None]:
# Debug: Check shape
print("Feature matrix shape:", x.shape)
print("Labels shape:", y.shape)

# Train/test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier()
model.fit(x_train, y_train)

# Predict
y_pred = model.predict(x_test)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)

In [None]:
print("Accuracy",accuracy_score(y_test,y_pred))
print("\nclassification_report:\n",classification_report(y_test,y_pred))

In [None]:
from google.colab import files
uploaded=files.upload()
def predict_emotion(file_path):
  mfcc=extract_features(file_path)
  if mfcc is not None:
    return model.predict(mfcc.reshape(1,-1))[0]
  else:
    return "could not process audio"

In [None]:
for fname in uploaded.keys():
  print(f"{fname}:{predict_emotion(fname)}")

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
data_dir = "/content/drive/MyDrive/team audio"


In [66]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# ✅ Path to your Google Drive folder (update this if needed)
data_dir = "/content/drive/MyDrive/team audio"

# ✅ Function to extract MFCC features from an audio file
def extract_features(file_path):
    try:
        audio, sr = librosa.load(file_path, duration=3, offset=0.5)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        return np.mean(mfccs.T, axis=0)
    except Exception as e:
        print(f"❌ Error loading: {file_path} | {e}")
        return None

# ✅ Feature extraction
features = []
labels = []

for folder in os.listdir(data_dir):
    folder_path = os.path.join(data_dir, folder)
    emotion = folder.lower()  # Label is the folder name

    if not os.path.isdir(folder_path):
        continue

    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)

        # ✅ Skip if it's not a file (e.g., skip inner folders)
        if not os.path.isfile(file_path):
            continue

        mfcc = extract_features(file_path)
        if mfcc is not None:
            features.append(mfcc)
            labels.append(emotion)

# ✅ Convert to numpy arrays
X = np.array(features)
y = np.array(labels)

print("✅ Feature shape:", X.shape)
print("✅ Labels:", set(y))
print("✅ Total samples:", len(y))

# ✅ Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ✅ Train the Random Forest model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# ✅ Predict & Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("🎯 Accuracy:", accuracy)
print("📊 Classification Report:")
print(classification_report(y_test, y_pred))


✅ Feature shape: (20, 13)
✅ Labels: {np.str_('vishali'), np.str_('bhuvana')}
✅ Total samples: 20
🎯 Accuracy: 1.0
📊 Classification Report:
              precision    recall  f1-score   support

     bhuvana       1.00      1.00      1.00         2
     vishali       1.00      1.00      1.00         2

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4



In [67]:
from google.colab import files
from IPython.display import Audio, display

uploaded = files.upload()

def predict_emotion(file_path):
    mfcc = extract_features(file_path)
    if mfcc is not None:
        return model.predict(mfcc.reshape(1, -1))[0]
    else:
        return "Could not process audio"

# ✅ Loop through uploaded files
for fname in uploaded.keys():
    print(f"🎧 Playing: {fname}")
    display(Audio(fname))  # 🔊 Play the uploaded audio

    emotion = predict_emotion(fname)
    print(f"🔍 Predicted Emotion: {emotion}")

Saving WhatsApp Audio 2025-07-18 at 12.04.12_64934e8e.waptt.opus to WhatsApp Audio 2025-07-18 at 12.04.12_64934e8e.waptt (2).opus
🎧 Playing: WhatsApp Audio 2025-07-18 at 12.04.12_64934e8e.waptt (2).opus


🔍 Predicted Emotion: bhuvana


In [64]:
for fname in uploaded.keys():
  print(f"{fname}:{predict_emotion(fname)}")

WhatsApp Audio 2025-07-18 at 12.04.12_64934e8e.waptt (1).opus:bhuvana
