<a href="https://colab.research.google.com/github/SnehalChhodavadiya/Marine_Audio_Analysis/blob/main/Noise_detention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Libraries

In [None]:
!pip install librosa soundfile numpy matplotlib scikit-learn


Data Source[MBARI Passive Acoustic Data (AWS)]https://registry.opendata.aws/pacific-sound/

Preprocessing code

In [4]:
import os
import librosa
import numpy as np
import soundfile as sf

# Load audio file
audio_file = 'MBARI_sample.wav'
audio, sr = librosa.load(audio_file, sr=16000)

# Trim silence
trimmed, _ = librosa.effects.trim(audio)

# Normalize volume
normalized = librosa.util.normalize(trimmed)

# Chunk audio into 5-second clips
chunk_duration = 5  # seconds
chunk_samples = chunk_duration * sr

chunks = []
for i in range(0, len(normalized), chunk_samples):
    chunk = normalized[i:i + chunk_samples]
    if len(chunk) == chunk_samples:
        chunks.append(chunk)
        # Save chunk
        sf.write(f"chunk_{i//chunk_samples}.wav", chunk, sr)

print(f"Total Chunks Created: {len(chunks)}")


KeyboardInterrupt: 

Extract MFCC Features

In [None]:
import librosa
import os

def extract_mfcc(file_path):
    y, sr = librosa.load(file_path, sr=16000)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfcc.T, axis=0)

# Process all chunks
X = []
y = []

# Example labeling: First 5 are natural, next 5 are polluted
for i in range(10):  # Use 10 clips for now
    file = f"chunk_{i}.wav"
    mfcc_feat = extract_mfcc(file)
    X.append(mfcc_feat)
    y.append(0 if i < 5 else 1)  # 0 = natural, 1 = polluted

X = np.array(X)
y = np.array(y)


ML Model Code

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

print("Model trained successfully.")


 Evaluation Code

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Predict
y_pred = model.predict(X_test)

# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, cmap="Blues", fmt="d", xticklabels=["Natural", "Polluted"], yticklabels=["Natural", "Polluted"])
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
