In [1]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

def extract_features(audio_path=None, y=None, sr=22050, duration=5.0):
    """Extracts MFCC and other features from an audio file or audio data."""
    try:
      if y is None:
        y, sr = librosa.load(audio_path, sr=sr, duration=duration)

      if len(y) == 0:  # Handle empty audio files
          return None

      mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
      mfccs_delta = librosa.feature.delta(mfccs)
      mfccs_delta2 = librosa.feature.delta(mfccs, order=2)

      chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
      rms = librosa.feature.rms(y=y)
      spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
      spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
      spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
      zero_crossing_rate = librosa.feature.zero_crossing_rate(y=y)

      # Ensure all features have the same number of dimensions
      features = np.concatenate([
          np.mean(mfccs, axis=1),
          np.mean(mfccs_delta, axis=1),
          np.mean(mfccs_delta2, axis=1),
          np.mean(chroma_stft, axis=1),
          np.mean(rms, axis=1),
          np.mean(spectral_centroid, axis=1),
          np.mean(spectral_bandwidth, axis=1),
          np.mean(spectral_rolloff, axis=1),
          np.mean(zero_crossing_rate, axis=1)
      ])
      return features

    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None

def augment_audio(y, sr):
    """Applies audio augmentations."""
    augmented_data = []

    # Noise addition
    noise = np.random.randn(len(y))
    y_noise = y + 0.005 * noise
    augmented_data.append(y_noise)

    # Time stretching
    stretch_rate = np.random.uniform(0.8, 1.2)
    y_stretch = librosa.effects.time_stretch(y, rate=stretch_rate)
    augmented_data.append(y_stretch)

    # Pitch shifting
    n_steps = np.random.randint(-3, 3)
    y_pitch = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps) # Corrected line
    augmented_data.append(y_pitch)

    return augmented_data

def process_bird_calls(data_dir="birdcalls", duration=5.0, augment=True):
    """Processes bird call audio files and creates a dataset."""
    data = []
    labels = []

    for species_folder in os.listdir(data_dir):
        species_path = os.path.join(data_dir, species_folder)
        if os.path.isdir(species_path):
            species_name = species_folder.split(".", 1)[1]
            for audio_file in os.listdir(species_path):
                if audio_file.endswith((".wav", ".mp3")):
                    audio_path = os.path.join(species_path, audio_file)
                    features = extract_features(audio_path, duration=duration) # Corrected line
                    if features is not None:
                        data.append(features)
                        labels.append(species_name)

                        if augment:
                            y, sr = librosa.load(audio_path, sr=22050, duration=duration)
                            augmented_audios = augment_audio(y, sr)
                            for aug_audio in augmented_audios:
                                aug_features = extract_features(audio_path = None, y=aug_audio, sr=sr, duration=duration) # This line has been modified
                                if aug_features is not None:
                                    data.append(aug_features)
                                    labels.append(species_name)

    return np.array(data), np.array(labels)

# Main execution
data, labels = process_bird_calls()

# Label Encoding
encoder = LabelEncoder()
encoded_labels = encoder.fit_transform(labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, encoded_labels, test_size=0.2, random_state=42)

# Create Pandas DataFrames
train_df = pd.DataFrame(X_train)
train_df['label'] = y_train
test_df = pd.DataFrame(X_test)
test_df['label'] = y_test

# Save DataFrames to CSV (optional)
train_df.to_csv("train_bird_calls.csv", index=False)
test_df.to_csv("test_bird_calls.csv", index=False)

print("Dataset created and saved to CSV files.")
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

  return pitch_tuning(


Dataset created and saved to CSV files.
X_train shape: (636, 77), y_train shape: (636,)
X_test shape: (160, 77), y_test shape: (160,)


In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load the training and testing datasets
train_df = pd.read_csv("train_bird_calls.csv")
test_df = pd.read_csv("test_bird_calls.csv")

# Separate features (X) and labels (y)
X_train = train_df.drop("label", axis=1).values
y_train = train_df["label"].values
X_test = test_df.drop("label", axis=1).values
y_test = test_df["label"].values

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save the scaler to a .pkl file
joblib.dump(scaler, "scaler.pkl")
print("🔥 Scaler saved as scaler.pkl")

# Initialize Models
models = {
    "SVM": SVC(kernel='rbf', C=10, gamma='scale', random_state=42),
    "XGBoost": XGBClassifier(objective='multi:softmax', random_state=42, use_label_encoder=False, eval_metric='mlogloss'),
    "RandomForest": RandomForestClassifier(n_estimators=200, random_state=42)
}

# Train and Evaluate Models
best_model = None
best_accuracy = 0.0
best_model_name = ""

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    predictions = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, predictions)

    print(f"\n{name} Results:")
    print("Accuracy:", accuracy)
    print("Classification Report:\n", classification_report(y_test, predictions))

    # Save the best model based on accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model
        best_model_name = name

# Save the best model to a .pkl file
print(f"\n✅ Best Model: {best_model_name} with Accuracy: {best_accuracy}")
joblib.dump(best_model, "best_model.pkl")
print("🔥 Model saved as best_model.pkl")

🔥 Scaler saved as scaler.pkl

SVM Results:
Accuracy: 0.775
Classification Report:
               precision    recall  f1-score   support

           1       0.50      1.00      0.67         1
           2       1.00      0.71      0.83         7
           3       0.67      0.33      0.44         6
           4       1.00      1.00      1.00         3
           5       0.67      1.00      0.80         4
           6       1.00      1.00      1.00         6
           7       1.00      1.00      1.00         1
           8       0.50      0.67      0.57         3
           9       0.25      1.00      0.40         1
          10       1.00      1.00      1.00         3
          11       0.67      1.00      0.80         2
          12       0.57      0.80      0.67         5
          13       0.67      1.00      0.80         2
          14       0.33      1.00      0.50         1
          15       0.00      0.00      0.00         0
          16       1.00      1.00      1.00         

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.




XGBoost Results:
Accuracy: 0.64375
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      1.00      1.00         1
           2       1.00      0.29      0.44         7
           3       1.00      0.33      0.50         6
           4       0.60      1.00      0.75         3
           5       0.80      1.00      0.89         4
           6       0.80      0.67      0.73         6
           7       0.00      0.00      0.00         1
           8       0.40      0.67      0.50         3
           9       0.00      0.00      0.00         1
          10       0.75      1.00      0.86         3
          11       1.00      1.00      1.00         2
          12       1.00      0.40      0.57         5
          13       0.29      1.00      0.44         2
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         0
          16       0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



RandomForest Results:
Accuracy: 0.79375
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.50      1.00      0.67         1
           2       1.00      0.71      0.83         7
           3       1.00      0.33      0.50         6
           4       0.75      1.00      0.86         3
           5       0.50      1.00      0.67         4
           6       1.00      0.83      0.91         6
           7       1.00      1.00      1.00         1
           8       1.00      0.67      0.80         3
           9       0.20      1.00      0.33         1
          10       0.75      1.00      0.86         3
          11       0.67      1.00      0.80         2
          12       1.00      0.80      0.89         5
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         0
          16    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [3]:
# from flask import Flask, request, jsonify
# import pandas as pd
# import joblib
# from sklearn.preprocessing import StandardScaler

# app = Flask(__name__)

# # Load the best model and scaler
# model = joblib.load("best_model.pkl")

# @app.route('/')
# def home():
#     return "Bird Call Classification Flask API 🚀"

# @app.route('/predict', methods=['POST'])
# def predict():
#     try:
#         # Get uploaded CSV file
#         file = request.files['file']
#         data = pd.read_csv(file)

#         # Preprocessing
#         X = data.values
#         scaler = StandardScaler()
#         X_scaled = scaler.fit_transform(X)

#         # Make predictions
#         predictions = model.predict(X_scaled)

#         # Return predictions as JSON
#         result = {"predictions": predictions.tolist()}
#         return jsonify(result)

#     except Exception as e:
#         return jsonify({"error": str(e)})

# if __name__ == '__main__':
#     app.run(debug=True)


In [4]:
aaaaaaaaaaaaaaaaaaaaaaaaaaaa

NameError: name 'aaaaaaaaaaaaaaaaaaaaaaaaaaaa' is not defined

---------------------------------------------------

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load the training and testing datasets
train_df = pd.read_csv("train_bird_calls.csv")
test_df = pd.read_csv("test_bird_calls.csv")

# Separate features (X) and labels (y)
X_train = train_df.drop("label", axis=1).values
y_train = train_df["label"].values
X_test = test_df.drop("label", axis=1).values
y_test = test_df["label"].values

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Hyperparameter Grids
param_grids = {
    "SVM": {
        'C': [1, 10, 100],
        'gamma': ['scale', 0.1, 0.01],
        'kernel': ['rbf']
    },
    "XGBoost": {
        'n_estimators': [100, 200],
        'learning_rate': [0.1, 0.01],
        'max_depth': [3, 5]
    },
    "RandomForest": {
        'n_estimators': [100, 200],
        'max_depth': [None, 10],
        'min_samples_split': [2, 5]
    }
}

# Initialize Models
models = {
    "SVM": SVC(random_state=42),
    "XGBoost": XGBClassifier(objective='multi:softmax', random_state=42, use_label_encoder=False, eval_metric='mlogloss'),
    "RandomForest": RandomForestClassifier(random_state=42)
}

# Perform Grid Search & Model Comparison
best_model = None
best_accuracy = 0.0
best_model_name = ""

for name, model in models.items():
    print(f"\n🔥 Tuning {name}...")
    grid = GridSearchCV(model, param_grids[name], cv=3, verbose=1, n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    
    best_estimator = grid.best_estimator_
    predictions = best_estimator.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, predictions)

    print(f"\n{name} Results (Tuned):")
    print("Best Parameters:", grid.best_params_)
    print("Accuracy:", accuracy)
    print("Classification Report:\n", classification_report(y_test, predictions))

    # Save the best model based on accuracy
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = best_estimator
        best_model_name = name

# Save the best model to a .pkl file
print(f"\n✅ Best Model: {best_model_name} with Accuracy: {best_accuracy}")
joblib.dump(best_model, "best_model_tuned.pkl")
print("🔥 Model saved as best_model.pkl")



🔥 Tuning SVM...
Fitting 3 folds for each of 9 candidates, totalling 27 fits

SVM Results (Tuned):
Best Parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Accuracy: 0.8125
Classification Report:
               precision    recall  f1-score   support

           1       0.33      1.00      0.50         1
           2       1.00      0.71      0.83         7
           3       1.00      0.50      0.67         6
           4       1.00      1.00      1.00         3
           5       0.67      1.00      0.80         4
           6       0.86      1.00      0.92         6
           7       1.00      1.00      1.00         1
           8       0.50      0.67      0.57         3
           9       0.50      1.00      0.67         1
          10       1.00      1.00      1.00         3
          11       0.50      1.00      0.67         2
          12       0.67      0.80      0.73         5
          13       0.50      1.00      0.67         2
          14       0.50      1.00      0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.




XGBoost Results (Tuned):
Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
Accuracy: 0.65625
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.33      1.00      0.50         1
           2       1.00      0.43      0.60         7
           3       0.67      0.33      0.44         6
           4       0.50      0.67      0.57         3
           5       0.67      1.00      0.80         4
           6       0.75      1.00      0.86         6
           7       0.50      1.00      0.67         1
           8       0.00      0.00      0.00         3
           9       0.33      1.00      0.50         1
          10       1.00      1.00      1.00         3
          11       0.67      1.00      0.80         2
          12       1.00      0.80      0.89         5
          13       0.67      1.00      0.80         2
          14       1.00      1.00      1.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



RandomForest Results (Tuned):
Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}
Accuracy: 0.83125
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.50      1.00      0.67         1
           2       1.00      0.71      0.83         7
           3       1.00      0.50      0.67         6
           4       1.00      1.00      1.00         3
           5       0.67      1.00      0.80         4
           6       1.00      1.00      1.00         6
           7       0.50      1.00      0.67         1
           8       1.00      0.67      0.80         3
           9       1.00      1.00      1.00         1
          10       1.00      1.00      1.00         3
          11       0.67      1.00      0.80         2
          12       0.67      0.40      0.50         5
          13       0.67      1.00      0.80         2
          14       1.00      1.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# from flask import Flask, request, jsonify
# import pandas as pd
# import joblib
# from sklearn.preprocessing import StandardScaler

# app = Flask(__name__)

# # Load the best model and scaler
# model = joblib.load("best_model.pkl")

# @app.route('/')
# def home():
#     return "Bird Call Classification Flask API 🚀"

# @app.route('/predict', methods=['POST'])
# def predict():
#     try:
#         # Get uploaded CSV file
#         file = request.files['file']
#         data = pd.read_csv(file)

#         # Preprocessing
#         X = data.values
#         scaler = StandardScaler()
#         X_scaled = scaler.fit_transform(X)

#         # Make predictions
#         predictions = model.predict(X_scaled)

#         # Return predictions as JSON
#         result = {"predictions": predictions.tolist()}
#         return jsonify(result)

#     except Exception as e:
#         return jsonify({"error": str(e)})

# if __name__ == '__main__':
#     app.run(debug=True)
