<a href="https://colab.research.google.com/github/Srilekha-03/deep-learning/blob/main/environmental_audio_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the data
df = pd.read_csv('/content/sample_data/extracted_features_final.csv')  # Replace with your actual file path

# Encode the target column (Class Name)
label_encoder = LabelEncoder()
df['Encoded Class'] = label_encoder.fit_transform(df['Class Name'])  # Add encoded column

# Drop 'Class ID' and select features/target
df = df.drop(columns=['Class ID'])  # Drop 'Class ID' column
X = df.iloc[:, 1:-1].values  # Features (exclude 'Class Name' and 'Encoded Class')
y = df['Encoded Class'].values  # Encoded target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Dictionary to store results
results = {}

# 1. Random Forest
print("Training Random Forest...")
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
results['Random Forest'] = accuracy_score(y_test, y_pred_rf)

# 2. XGBoost
print("Training XGBoost...")
xgb = XGBClassifier(random_state=42)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
results['XGBoost'] = accuracy_score(y_test, y_pred_xgb)

# 3. Support Vector Machine (SVM)
print("Training SVM...")
svm = SVC(kernel='rbf', random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
results['SVM'] = accuracy_score(y_test, y_pred_svm)

# 4. Neural Network (MLP Classifier)
print("Training MLP...")
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42)
mlp.fit(X_train, y_train)
y_pred_mlp = mlp.predict(X_test)
results['MLP'] = accuracy_score(y_test, y_pred_mlp)

# Print results
print("\nModel Performance:")
for model, acc in results.items():
    print(f"{model}: Accuracy = {acc:.4f}")

# Hyperparameter Tuning for Random Forest
print("\nHyperparameter Tuning for Random Forest...")
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, None]
}
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3)
grid_search.fit(X_train, y_train)

# Best Model
best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)
print("Best Score (CV):", grid_search.best_score_)

# Evaluate the best model
y_pred_best = best_model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred_best, target_names=label_encoder.classes_))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_best))

joblib.dump(best_model, 'best_model.pkl')
print("\nBest model saved as 'best_model.pkl'!")



Training Random Forest...
Training XGBoost...
Training SVM...
Training MLP...

Model Performance:
Random Forest: Accuracy = 0.5309
XGBoost: Accuracy = 0.5235
SVM: Accuracy = 0.0889
MLP: Accuracy = 0.2395

Hyperparameter Tuning for Random Forest...
Best Parameters: {'max_depth': 20, 'n_estimators': 200}
Best Score (CV): 0.49259259259259264

Classification Report:
               precision    recall  f1-score   support

          Axe       0.36      0.36      0.36        14
 BirdChirping       0.79      0.69      0.73        16
     Chainsaw       0.69      0.73      0.71        15
     Clapping       0.83      0.25      0.38        20
         Fire       0.78      0.88      0.82        16
     Firework       0.25      0.38      0.30        13
    Footsteps       0.33      0.44      0.38        16
         Frog       0.83      0.29      0.43        17
    Generator       0.40      0.20      0.27        10
      Gunshot       0.55      0.35      0.43        17
      Handsaw       0.67     

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install scikit-learn==1.0.2




In [None]:
pip install --upgrade xgboost




In [None]:
from xgboost import XGBClassifier
from sklearn.ensemble import VotingClassifier

# Define individual classifiers
rf = RandomForestClassifier(random_state=42)
xgb = XGBClassifier(random_state=42)
svm = SVC(kernel='rbf', random_state=42, probability=True)  # Set probability=True for soft voting
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42)

# Create a voting classifier (soft voting)
voting_clf = VotingClassifier(estimators=[
    ('rf', rf), ('xgb', xgb), ('svm', svm), ('mlp', mlp)
], voting='soft')

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Evaluate on test set
y_pred_voting = voting_clf.predict(X_test)
results['Voting Classifier'] = accuracy_score(y_test, y_pred_voting)

# Print results
print("\nVoting Classifier Performance:")
print(f"Voting Classifier Accuracy: {results['Voting Classifier']:.4f}")



Voting Classifier Performance:
Voting Classifier Accuracy: 0.4741


In [None]:
from sklearn.ensemble import VotingClassifier

# Define individual classifiers
rf = RandomForestClassifier(random_state=42)
xgb = XGBClassifier(random_state=42)
svm = SVC(kernel='rbf', random_state=42, probability=True)  # Set probability=True for soft voting
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42)

# Create a voting classifier (soft voting)
voting_clf = VotingClassifier(estimators=[
    ('rf', rf), ('xgb', xgb), ('svm', svm), ('mlp', mlp)
], voting='soft')

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Evaluate on test set
y_pred_voting = voting_clf.predict(X_test)
results['Voting Classifier'] = accuracy_score(y_test, y_pred_voting)

# Print results
print("\nVoting Classifier Performance:")
print(f"Voting Classifier Accuracy: {results['Voting Classifier']:.4f}")



Voting Classifier Performance:
Voting Classifier Accuracy: 0.4741


In [None]:
from sklearn.ensemble import BaggingClassifier

# Bagging for SVM
bagging_svm = BaggingClassifier(base_estimator=SVC(kernel='rbf', random_state=42), n_estimators=50, random_state=42)
bagging_svm.fit(X_train, y_train)
y_pred_bagging_svm = bagging_svm.predict(X_test)
results['Bagging SVM'] = accuracy_score(y_test, y_pred_bagging_svm)

# Print results
print("\nBagging SVM Performance:")
print(f"Bagging SVM Accuracy: {results['Bagging SVM']:.4f}")



Bagging SVM Performance:
Bagging SVM Accuracy: 0.0889


In [None]:
from sklearn.ensemble import AdaBoostClassifier

# AdaBoost with Random Forest as base estimator
ada_boost = AdaBoostClassifier(base_estimator=RandomForestClassifier(random_state=42), n_estimators=50, random_state=42)
ada_boost.fit(X_train, y_train)
y_pred_ada = ada_boost.predict(X_test)
results['AdaBoost'] = accuracy_score(y_test, y_pred_ada)

# Print results
print("\nAdaBoost Performance:")
print(f"AdaBoost Accuracy: {results['AdaBoost']:.4f}")



AdaBoost Performance:
AdaBoost Accuracy: 0.5111


In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# Gradient Boosting
gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train, y_train)
y_pred_gb = gb.predict(X_test)
results['Gradient Boosting'] = accuracy_score(y_test, y_pred_gb)

# Print results
print("\nGradient Boosting Performance:")
print(f"Gradient Boosting Accuracy: {results['Gradient Boosting']:.4f}")



Gradient Boosting Performance:
Gradient Boosting Accuracy: 0.4914


In [None]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

# Define base models
base_learners = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('xgb', XGBClassifier(random_state=42)),
    ('svm', SVC(kernel='rbf', random_state=42)),
    ('mlp', MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42))
]

# Define meta-model
meta_model = LogisticRegression()

# Create a stacking classifier
stacking_clf = StackingClassifier(estimators=base_learners, final_estimator=meta_model)
stacking_clf.fit(X_train, y_train)

# Evaluate on test set
y_pred_stacking = stacking_clf.predict(X_test)
results['Stacking Classifier'] = accuracy_score(y_test, y_pred_stacking)

# Print results
print("\nStacking Classifier Performance:")
print(f"Stacking Classifier Accuracy: {results['Stacking Classifier']:.4f}")


Stacking Classifier Performance:
Stacking Classifier Accuracy: 0.2988


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras import layers, models
import joblib

# Load the data
df = pd.read_csv('/content/extracted_features_final.csv')  # Replace with your actual file path

# Encode the target column (Class Name)
label_encoder = LabelEncoder()
df['Encoded Class'] = label_encoder.fit_transform(df['Class Name'])  # Add encoded column

# Drop 'Class ID' and select features/target
df = df.drop(columns=['Class ID'])  # Drop 'Class ID' column
X = df.iloc[:, 1:-1].values  # Features (exclude 'Class Name' and 'Encoded Class')
y = df['Encoded Class'].values  # Encoded target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features for ANN
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the ANN Model using Keras
def create_ann_model(input_shape, num_classes):
    model = models.Sequential()

    # Input layer
    model.add(layers.InputLayer(input_shape=input_shape))

    # First hidden layer
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))  # Dropout to prevent overfitting

    # Second hidden layer
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))  # Dropout

    # Output layer
    model.add(layers.Dense(num_classes, activation='softmax'))  # Softmax for multi-class classification

    # Compile the model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

# Define input shape and number of output classes
input_shape = (X_train.shape[1],)  # Input shape is the number of features
num_classes = len(np.unique(y))    # Number of target classes (should match your data)

# Create the model
model = create_ann_model(input_shape, num_classes)

# Train the ANN model
print("Training ANN...")
model.fit(X_train, y_train, epochs=1000, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
y_pred_ann = np.argmax(model.predict(X_test), axis=-1)

# Print model accuracy
print("\nANN Model Performance:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_ann):.4f}")

# Print classification report and confusion matrix
print("\nClassification Report:")
print(classification_report(y_test, y_pred_ann, target_names=label_encoder.classes_))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_ann))

# Save the model
model.save('best_ann_model.h5')
print("\nBest ANN model saved as 'best_ann_model.h5'!")




Training ANN...
Epoch 1/1000
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.0691 - loss: 3.3290 - val_accuracy: 0.1679 - val_loss: 2.9303
Epoch 2/1000
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1343 - loss: 2.9649 - val_accuracy: 0.2321 - val_loss: 2.6682
Epoch 3/1000
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1813 - loss: 2.7876 - val_accuracy: 0.2914 - val_loss: 2.4671
Epoch 4/1000
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2259 - loss: 2.6006 - val_accuracy: 0.3062 - val_loss: 2.3164
Epoch 5/1000
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.2815 - loss: 2.4205 - val_accuracy: 0.3284 - val_loss: 2.2174
Epoch 6/1000
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2803 - loss: 2.3913 - val_accuracy: 0.3259 - val_loss: 2.1440
Epoch 7/100



               precision    recall  f1-score   support

          Axe       0.12      0.14      0.13        14
 BirdChirping       0.75      0.56      0.64        16
     Chainsaw       0.80      0.80      0.80        15
     Clapping       0.94      0.75      0.83        20
         Fire       0.75      0.94      0.83        16
     Firework       0.55      0.46      0.50        13
    Footsteps       0.58      0.44      0.50        16
         Frog       0.67      0.35      0.46        17
    Generator       0.33      0.40      0.36        10
      Gunshot       0.56      0.59      0.57        17
      Handsaw       0.75      1.00      0.86        12
   Helicopter       0.81      0.71      0.76        24
       Insect       0.72      0.65      0.68        20
         Lion       0.67      0.50      0.57        12
         Rain       0.27      0.25      0.26        12
      Silence       0.87      0.93      0.90        14
     Speaking       0.71      0.53      0.61        19
     Squi

In [4]:
pip install librosa numpy




In [7]:
# Reshape data for CNN (reshape to 2D if treating it like an image)
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build the CNN Model
def create_cnn_model(input_shape, num_classes):
    model = models.Sequential()

    # Convolutional Layer
    model.add(layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling1D(pool_size=2))

    # Second Convolutional Layer
    model.add(layers.Conv1D(64, kernel_size=3, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))

    # Flatten and Fully Connected Layers
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(num_classes, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Define input shape for CNN
input_shape_cnn = (X_train_cnn.shape[1], X_train_cnn.shape[2])

# Create and train the CNN model
cnn_model = create_cnn_model(input_shape_cnn, num_classes)
print("Training CNN...")
cnn_model.fit(X_train_cnn, y_train, epochs=500, batch_size=32, validation_data=(X_test_cnn, y_test))

# Evaluate the CNN model
y_pred_cnn = np.argmax(cnn_model.predict(X_test_cnn), axis=-1)

# Print CNN accuracy
print("\nCNN Model Performance:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_cnn):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_cnn, target_names=label_encoder.classes_))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_cnn))

# Save the CNN model
cnn_model.save('best_cnn_model.h5')
print("\nBest CNN model saved as 'best_cnn_model.h5'!")


Training CNN...
Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.0701 - loss: 3.2666 - val_accuracy: 0.1358 - val_loss: 2.9930
Epoch 2/500
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.1338 - loss: 2.9597 - val_accuracy: 0.2173 - val_loss: 2.6261
Epoch 3/500
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.1875 - loss: 2.7111 - val_accuracy: 0.2617 - val_loss: 2.4677
Epoch 4/500
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.2158 - loss: 2.5463 - val_accuracy: 0.2914 - val_loss: 2.3607
Epoch 5/500
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.2814 - loss: 2.3756 - val_accuracy: 0.3012 - val_loss: 2.2717
Epoch 6/500
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.3062 - loss: 2.3128 - val_accuracy: 0.3259 - val_loss: 2.1890
Epoch 7/500
[1m51/51[0m [32m━━━━━━━━━




CNN Model Performance:
Test Accuracy: 0.5457

Classification Report:
               precision    recall  f1-score   support

          Axe       0.46      0.43      0.44        14
 BirdChirping       0.75      0.75      0.75        16
     Chainsaw       0.48      0.73      0.58        15
     Clapping       0.91      0.50      0.65        20
         Fire       0.76      1.00      0.86        16
     Firework       0.27      0.31      0.29        13
    Footsteps       0.55      0.38      0.44        16
         Frog       0.71      0.29      0.42        17
    Generator       0.38      0.50      0.43        10
      Gunshot       0.53      0.47      0.50        17
      Handsaw       0.69      0.75      0.72        12
   Helicopter       0.71      0.50      0.59        24
       Insect       0.72      0.65      0.68        20
         Lion       0.30      0.25      0.27        12
         Rain       0.36      0.33      0.35        12
      Silence       0.85      0.79      0.81     

In [8]:
# Reshape data for RNN (reshape to 3D as required for LSTM input)
X_train_rnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_rnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build the RNN Model using LSTM
def create_rnn_model(input_shape, num_classes):
    model = models.Sequential()

    # LSTM Layer
    model.add(layers.LSTM(128, return_sequences=True, input_shape=input_shape))
    model.add(layers.Dropout(0.5))

    # Second LSTM Layer
    model.add(layers.LSTM(64))
    model.add(layers.Dropout(0.5))

    # Fully Connected Output Layer
    model.add(layers.Dense(num_classes, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Define input shape for RNN
input_shape_rnn = (X_train_rnn.shape[1], X_train_rnn.shape[2])

# Create and train the RNN model
rnn_model = create_rnn_model(input_shape_rnn, num_classes)
print("Training RNN...")
rnn_model.fit(X_train_rnn, y_train, epochs=100, batch_size=32, validation_data=(X_test_rnn, y_test))

# Evaluate the RNN model
y_pred_rnn = np.argmax(rnn_model.predict(X_test_rnn), axis=-1)

# Print RNN accuracy
print("\nRNN Model Performance:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_rnn):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rnn, target_names=label_encoder.classes_))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rnn))

# Save the RNN model
rnn_model.save('best_rnn_model.h5')
print("\nBest RNN model saved as 'best_rnn_model.h5'!")


  super().__init__(**kwargs)


Training RNN...
Epoch 1/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 149ms/step - accuracy: 0.0629 - loss: 3.2459 - val_accuracy: 0.0840 - val_loss: 3.0985
Epoch 2/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 101ms/step - accuracy: 0.0987 - loss: 3.0757 - val_accuracy: 0.1333 - val_loss: 2.9408
Epoch 3/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 106ms/step - accuracy: 0.1306 - loss: 2.9397 - val_accuracy: 0.1481 - val_loss: 2.8408
Epoch 4/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 128ms/step - accuracy: 0.1812 - loss: 2.7773 - val_accuracy: 0.1728 - val_loss: 2.7428
Epoch 5/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 79ms/step - accuracy: 0.1793 - loss: 2.7377 - val_accuracy: 0.1827 - val_loss: 2.7194
Epoch 6/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 157ms/step - accuracy: 0.2022 - loss: 2.6935 - val_accuracy: 0.2099 - val_loss: 2.6305
Epoc




RNN Model Performance:
Test Accuracy: 0.4395

Classification Report:
               precision    recall  f1-score   support

          Axe       0.50      0.43      0.46        14
 BirdChirping       0.62      0.81      0.70        16
     Chainsaw       0.27      0.27      0.27        15
     Clapping       0.82      0.45      0.58        20
         Fire       0.65      0.81      0.72        16
     Firework       0.31      0.31      0.31        13
    Footsteps       0.46      0.38      0.41        16
         Frog       0.44      0.24      0.31        17
    Generator       0.25      0.30      0.27        10
      Gunshot       0.33      0.12      0.17        17
      Handsaw       0.53      0.67      0.59        12
   Helicopter       0.62      0.42      0.50        24
       Insect       0.71      0.75      0.73        20
         Lion       0.29      0.17      0.21        12
         Rain       0.23      0.25      0.24        12
      Silence       0.64      0.64      0.64     

In [5]:
import librosa
import numpy as np

# Load your audio file
file_path = '/content/drive/MyDrive/Audio Files/21_12104.wav'  # Replace with the path to your audio file
y, sr = librosa.load(file_path, sr=None)

# Extract features
features = []

# 1. MFCCs (13 features)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
mfccs_mean = np.mean(mfccs, axis=1)
features.extend(mfccs_mean)

# 2. Chroma Features (1 feature)
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
chroma_mean = np.mean(chroma)
features.append(chroma_mean)

# 3. Spectral Contrast (6 features)
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
features.extend(spectral_contrast_mean)

# 4. Spectral Bandwidth (1 feature)
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
spectral_bandwidth_mean = np.mean(spectral_bandwidth)
features.append(spectral_bandwidth_mean)

# 5. Spectral Rolloff (1 feature)
spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
spectral_rolloff_mean = np.mean(spectral_rolloff)
features.append(spectral_rolloff_mean)

# 6. Zero-Crossing Rate (1 feature)
zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
zero_crossing_rate_mean = np.mean(zero_crossing_rate)
features.append(zero_crossing_rate_mean)

# Final feature vector (29 features)
audio_features = np.array(features)
print("Extracted Features:", audio_features)


Extracted Features: [-3.41216217e+02  2.37820328e+02 -7.58395233e+01  3.00383415e+01
  4.63048820e+01 -5.31961966e+00  2.13348999e+01 -6.41554880e+00
 -5.12659454e+00  1.60339432e+01  3.17877889e+00  3.73776340e+00
 -1.76511586e+00  6.29004717e-01  1.25360154e+01  7.84837768e+00
  1.10629346e+01  1.31765242e+01  1.76815033e+01  2.62041708e+01
  3.46152252e+01  1.75817916e+03  2.96563667e+03  4.92008501e-02]


In [None]:
# Reshape and scale the features
audio_features_scaled = scaler.transform(audio_features.reshape(1, -1))  # Shape must be (1, 29)

# Predict the class
prediction = np.argmax(model.predict(audio_features_scaled), axis=-1)

# Map the prediction to the original class name
predicted_class = label_encoder.inverse_transform(prediction)
print(f"Predicted Class: {predicted_class[0]}")
