<a href="https://colab.research.google.com/github/Champei/mine/blob/main/Copy_of_audo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import zipfile

# Path to my ZIP in Google Drive
zip_path = '/content/drive/MyDrive/the-frequency-quest.zip'

# Folder where files will be extracted
extract_path = '/content/the-frequency-quest_folder'

# Make sure extraction folder exists
os.makedirs(extract_path, exist_ok=True)


In [4]:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# List all files inside, including in subfolders
all_files = []
for root, dirs, files in os.walk(extract_path):
    for name in files:
        all_files.append(os.path.join(root, name))

# Print all files
for f in all_files:
    print(f)


/content/the-frequency-quest_folder/sample_submission.csv
/content/the-frequency-quest_folder/test/test/72579-3-0-0.wav
/content/the-frequency-quest_folder/test/test/144007-5-0-9.wav
/content/the-frequency-quest_folder/test/test/205874-4-3-0.wav
/content/the-frequency-quest_folder/test/test/79377-9-0-12.wav
/content/the-frequency-quest_folder/test/test/180052-3-0-0.wav
/content/the-frequency-quest_folder/test/test/180127-4-0-11.wav
/content/the-frequency-quest_folder/test/test/171406-9-0-23.wav
/content/the-frequency-quest_folder/test/test/157867-8-0-26.wav
/content/the-frequency-quest_folder/test/test/76086-4-0-3.wav
/content/the-frequency-quest_folder/test/test/154758-5-0-4.wav
/content/the-frequency-quest_folder/test/test/106015-5-0-15.wav
/content/the-frequency-quest_folder/test/test/172314-9-0-51.wav
/content/the-frequency-quest_folder/test/test/60605-9-0-52.wav
/content/the-frequency-quest_folder/test/test/77751-4-1-0.wav
/content/the-frequency-quest_folder/test/test/110868-9-0-1

In [None]:
train_dir = '/content/the-frequency-quest_folder/train/train'
test_dir = '/content/the-frequency-quest_folder/test/test'

print("Train folders found:", os.listdir(train_dir))
print("Test files:", os.listdir(test_dir)[:5])


Train folders found: ['dog_bark', 'engine_idling', 'street_music', 'drilling', 'siren']
Test files: ['98202-9-1-27.wav', '144068-5-0-6.wav', '103199-4-2-2.wav', '24347-8-0-94.wav', '128240-3-0-42.wav']


In [None]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

# Set paths
train_dir = '/content/the-frequency-quest_folder/train/train'
test_dir = '/content/the-frequency-quest_folder/test/test'

# Audio categories
categories = ['dog_bark', 'drilling', 'engine_idling', 'siren', 'street_music']

# Function to extract features from an audio file
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)  # Load audio
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    return mfccs_mean

# Prepare training data
X = []
y = []

for label in categories:
    folder = os.path.join(train_dir, label)
    for file in tqdm(os.listdir(folder), desc=f'Processing {label}'):
        file_path = os.path.join(folder, file)
        features = extract_features(file_path)
        X.append(features)
        y.append(label)

X = np.array(X)
y = np.array(y)

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split data for validation
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Train a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=200, random_state=42)
clf.fit(X_train, y_train)

# Check validation accuracy
val_acc = clf.score(X_val, y_val)
print(f'Validation Accuracy: {val_acc*100:.2f}%')

# Prepare test data
test_files = os.listdir(test_dir)
test_features = []

for file in tqdm(test_files, desc='Processing test files'):
    file_path = os.path.join(test_dir, file)
    features = extract_features(file_path)
    test_features.append(features)

test_features = np.array(test_features)

# Predict test labels
preds = clf.predict(test_features)
pred_labels = le.inverse_transform(preds)

# Save to CSV
submission = pd.DataFrame({
    'ID': test_files,
    'Class': pred_labels
})

submission.to_csv('submission.csv', index=False)
print('Submission saved as submission.csv')


Processing dog_bark: 100%|██████████| 700/700 [00:42<00:00, 16.36it/s]
  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
Processing drilling: 100%|██████████| 700/700 [00:30<00:00, 22.62it/s]
Processing engine_idling: 100%|██████████| 700/700 [00:37<00:00, 18.88it/s]
Processing siren: 100%|██████████| 650/650 [00:26<00:00, 24.53it/s]
Processing street_music: 100%|██████████| 700/700 [00:26<00:00, 26.28it/s]


Validation Accuracy: 92.61%


  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
Processing test files: 100%|██████████| 740/740 [00:31<00:00, 23.26it/s]


Submission saved as submission.csv


In [None]:
!pip install tensorflow --upgrade


Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting tensorboard~=2.20.0 (from tensorflow)
  Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)
Downloading tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (620.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m620.7/620.7 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tensorboard-2.20.0-py3-none-any.whl (5.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m84.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorboard, tensorflow
  Attempting uninstall: tensorboard
    Found existing installation: tensorboard 2.19.0
    Uninstalling tensorboard-2.19.0:
      Successfully uninstalled tensorboard-2.19.0
  Attempting uninstall: tensorflow
    Found existing installation: tensorflow 2.19.0
    Uninstalling tensorflow-2.

In [None]:
import os
import numpy as np
import pandas as pd
import librosa, librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tqdm import tqdm

# Set paths

train_dir = '/content/the-frequency-quest_folder/train/train'
test_dir = '/content/the-frequency-quest_folder/test/test'

categories = ['dog_bark', 'drilling', 'engine_idling', 'siren', 'street_music']

# Helper - Extract Mel Spectrogram with fixed shape
n_mels = 128
duration = 3
sr = 22050
n_fft = 2048
hop_length = 512
expected_frames = int(np.ceil(sr * duration / hop_length))

def extract_mel_spec(file_path, n_mels=n_mels, duration=duration, sr=sr, n_fft=n_fft, hop_length=hop_length):
    try:
        y, sr = librosa.load(file_path, sr=sr, duration=duration)
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
        mel_db = librosa.power_to_db(mel, ref=np.max)
        # Pad or truncate to fixed number of frames
        if mel_db.shape[1] < expected_frames:
            pad_width = expected_frames - mel_db.shape[1]
            mel_db = np.pad(mel_db, ((0,0),(0,pad_width)), mode='constant')
        else:
            mel_db = mel_db[:, :expected_frames]
        return mel_db
    except Exception as e:
        print("Error:", e)
        return np.zeros((n_mels, expected_frames))

# Load and Augment data

X, y = [], []

for label in categories:
    folder = os.path.join(train_dir, label)
    for file in tqdm(os.listdir(folder), desc=f'Processing {label}'):
        path = os.path.join(folder, file)

        # Original
        mel = extract_mel_spec(path)
        X.append(mel)
        y.append(label)

        # Load full audio for augmentation
        y_audio, _ = librosa.load(path, sr=sr, duration=duration)

        # Add noise
        noise_audio = y_audio + 0.005 * np.random.randn(len(y_audio))
        mel_noise = librosa.feature.melspectrogram(y=noise_audio, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
        mel_noise_db = librosa.power_to_db(mel_noise, ref=np.max)
        if mel_noise_db.shape[1] < expected_frames:
            mel_noise_db = np.pad(mel_noise_db, ((0,0),(0,expected_frames - mel_noise_db.shape[1])), mode='constant')
        else:
            mel_noise_db = mel_noise_db[:, :expected_frames]
        X.append(mel_noise_db)
        y.append(label)

        # Pitch shift
        pitched_audio = librosa.effects.pitch_shift(y_audio, sr=sr, n_steps=2)
        mel_pitch = librosa.feature.melspectrogram(y=pitched_audio, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
        mel_pitch_db = librosa.power_to_db(mel_pitch, ref=np.max)
        if mel_pitch_db.shape[1] < expected_frames:
            mel_pitch_db = np.pad(mel_pitch_db, ((0,0),(0,expected_frames - mel_pitch_db.shape[1])), mode='constant')
        else:
            mel_pitch_db = mel_pitch_db[:, :expected_frames]
        X.append(mel_pitch_db)
        y.append(label)

X = np.array(X)
y = np.array(y)

print("Dataset shape:", X.shape, "Labels:", y.shape)

# Encode labels + prepare input

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# CNNs need 4D input (samples, height, width, channels)

X = np.expand_dims(X, -1)

# Split for validation

X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Build CNN Model

model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=X_train.shape[1:]),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(64, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(128, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(len(categories), activation='softmax')
])

# Compile with tuning

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train model

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stop],
    verbose=1
)

# Evaluate

val_acc = model.evaluate(X_val, y_val, verbose=0)[1]
print(f' Validation Accuracy: {val_acc*100:.2f}%')

# Prepare test data + predict

test_files = os.listdir(test_dir)
X_test = []

for file in tqdm(test_files, desc='Processing test files'):
    mel = extract_mel_spec(os.path.join(test_dir, file))
    X_test.append(mel)

X_test = np.array(X_test)
X_test = np.expand_dims(X_test, -1)

preds = model.predict(X_test)
pred_labels = le.inverse_transform(np.argmax(preds, axis=1))

# Saving submission
submission = pd.DataFrame({
    'ID': test_files,
    'Class': pred_labels
})

submission.to_csv('submission1.csv', index=False)
print('Submission saved as submission1.csv')


Processing dog_bark: 100%|██████████| 700/700 [01:26<00:00,  8.06it/s]
Processing drilling: 100%|██████████| 700/700 [01:39<00:00,  7.06it/s]
Processing engine_idling: 100%|██████████| 700/700 [01:36<00:00,  7.27it/s]
Processing siren: 100%|██████████| 650/650 [01:28<00:00,  7.35it/s]
Processing street_music: 100%|██████████| 700/700 [01:35<00:00,  7.33it/s]


Dataset shape: (10350, 128, 130) Labels: (10350,)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m486s[0m 2s/step - accuracy: 0.6753 - loss: 1.1845 - val_accuracy: 0.8498 - val_loss: 0.4722
Epoch 2/30
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m497s[0m 2s/step - accuracy: 0.8734 - loss: 0.3698 - val_accuracy: 0.8841 - val_loss: 0.3688
Epoch 3/30
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m472s[0m 2s/step - accuracy: 0.9341 - loss: 0.1991 - val_accuracy: 0.9382 - val_loss: 0.2186
Epoch 4/30
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m508s[0m 2s/step - accuracy: 0.9647 - loss: 0.1132 - val_accuracy: 0.9546 - val_loss: 0.1415
Epoch 5/30
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m497s[0m 2s/step - accuracy: 0.9825 - loss: 0.0591 - val_accuracy: 0.9551 - val_loss: 0.1574
Epoch 6/30
[1m259/259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m468s[0m 2s/step - accuracy: 0.9844 - loss: 0.0518 - val_accuracy: 0.9502 - val_loss: 0.1734
Epoch 7/30
[1m259/259

Processing test files: 100%|██████████| 740/740 [00:25<00:00, 28.80it/s]


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 553ms/step
Submission saved as submission1.csv


In [5]:
import os
import numpy as np
import pandas as pd
import librosa, librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tqdm import tqdm

# STEP 2: Set paths
train_dir = '/content/the-frequency-quest_folder/train/train'
test_dir = '/content/the-frequency-quest_folder/test/test'

categories = ['dog_bark', 'drilling', 'engine_idling', 'siren', 'street_music']

# Helper - Extract Mel Spectrogram with fixed shape
n_mels = 128
duration = 3
sr = 22050
n_fft = 2048
hop_length = 512
expected_frames = int(np.ceil(sr * duration / hop_length))

def extract_mel_spec(file_path, n_mels=n_mels, duration=duration, sr=sr, n_fft=n_fft, hop_length=hop_length):
    try:
        y, sr = librosa.load(file_path, sr=sr, duration=duration)
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
        mel_db = librosa.power_to_db(mel, ref=np.max)
        # Pad/truncate
        if mel_db.shape[1] < expected_frames:
            mel_db = np.pad(mel_db, ((0,0),(0,expected_frames - mel_db.shape[1])), mode='constant')
        else:
            mel_db = mel_db[:, :expected_frames]
        return mel_db
    except Exception as e:
        print("Error:", e)
        return np.zeros((n_mels, expected_frames))

# Augmentation functions
def add_noise(y, noise_factor=0.005):
    return y + noise_factor * np.random.randn(len(y))

def pitch_shift(y, sr, n_steps=2):
    return librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)

def time_stretch(y, rate=0.9):
    try:
        if len(y) < 2 * 512:  # too short for stretching
            return y
        return librosa.effects.time_stretch(y, rate=rate)
    except Exception:
              return y


def time_shift(y, shift_max=0.2):
    shift = int(np.random.uniform(-shift_max, shift_max) * len(y))
    return np.roll(y, shift)

# Load and Augment data
X, y_labels = [], []

for label in categories:
    folder = os.path.join(train_dir, label)
    for file in tqdm(os.listdir(folder), desc=f'Processing {label}'):
        path = os.path.join(folder, file)
        y_audio, _ = librosa.load(path, sr=sr, duration=duration)

        # Original
        X.append(extract_mel_spec(path))
        y_labels.append(label)

        # Augmentations
        aug_audios = [
            add_noise(y_audio),
            pitch_shift(y_audio, sr=sr, n_steps=2),
            time_stretch(y_audio, rate=0.9),
            time_shift(y_audio, shift_max=0.2)
        ]

        for aug in aug_audios:
            mel_aug = librosa.feature.melspectrogram(y=aug, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
            mel_aug_db = librosa.power_to_db(mel_aug, ref=np.max)
            if mel_aug_db.shape[1] < expected_frames:
                mel_aug_db = np.pad(mel_aug_db, ((0,0),(0,expected_frames - mel_aug_db.shape[1])), mode='constant')
            else:
                mel_aug_db = mel_aug_db[:, :expected_frames]
            X.append(mel_aug_db)
            y_labels.append(label)

X = np.array(X)
y_labels = np.array(y_labels)

print("Dataset shape:", X.shape, "Labels:", y_labels.shape)

# Encode labels + prepare input
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_encoded = le.fit_transform(y_labels)

# CNNs need 4D input (samples, height, width, channels)
X = np.expand_dims(X, -1)

# Split for validation
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Build stronger CNN Model
def conv_block(x, filters):
    x = layers.Conv2D(filters, (3,3), padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2,2))(x)
    return x

inputs = layers.Input(shape=X_train.shape[1:])
x = conv_block(inputs, 32)
x = conv_block(x, 64)
x = conv_block(x, 128)
x = conv_block(x, 256)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.4)(x)
outputs = layers.Dense(len(categories), activation='softmax')(x)
model = models.Model(inputs, outputs)

# Compile with optimizer and learning rate scheduler
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Callbacks
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=7, restore_best_weights=True)
lr_reduce = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Train model
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stop, lr_reduce],
    verbose=1
)

# Evaluate
val_acc = model.evaluate(X_val, y_val, verbose=0)[1]
print(f'Validation Accuracy: {val_acc*100:.2f}%')

# Prepare test data + predict
test_files = os.listdir(test_dir)
X_test = []

for file in tqdm(test_files, desc='Processing test files'):
    mel = extract_mel_spec(os.path.join(test_dir, file))
    X_test.append(mel)

X_test = np.array(X_test)
X_test = np.expand_dims(X_test, -1)

preds = model.predict(X_test)
pred_labels = le.inverse_transform(np.argmax(preds, axis=1))

# Saving submission
submission = pd.DataFrame({
    'ID': test_files,
    'Class': pred_labels
})
submission.to_csv('submission_improved.csv', index=False)
print('submission_improved.csv')


Processing dog_bark: 100%|██████████| 700/700 [01:20<00:00,  8.72it/s]
Processing drilling: 100%|██████████| 700/700 [01:32<00:00,  7.61it/s]
Processing engine_idling: 100%|██████████| 700/700 [01:36<00:00,  7.26it/s]
Processing siren: 100%|██████████| 650/650 [01:21<00:00,  7.95it/s]
Processing street_music: 100%|██████████| 700/700 [01:33<00:00,  7.47it/s]


Dataset shape: (17250, 128, 130) Labels: (17250,)


Epoch 1/50
[1m432/432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 43ms/step - accuracy: 0.6197 - loss: 1.0103 - val_accuracy: 0.8388 - val_loss: 0.4996 - learning_rate: 1.0000e-04
Epoch 2/50
[1m432/432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 24ms/step - accuracy: 0.8557 - loss: 0.4307 - val_accuracy: 0.8774 - val_loss: 0.3373 - learning_rate: 1.0000e-04
Epoch 3/50
[1m432/432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 24ms/step - accuracy: 0.8921 - loss: 0.3235 - val_accuracy: 0.8693 - val_loss: 0.3661 - learning_rate: 1.0000e-04
Epoch 4/50
[1m432/432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 24ms/step - accuracy: 0.9118 - loss: 0.2531 - val_accuracy: 0.9357 - val_loss: 0.1944 - learning_rate: 1.0000e-04
Epoch 5/50
[1m432/432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 25ms/step - accuracy: 0.9363 - loss: 0.1896 - val_accuracy: 0.9603 - val_loss: 0.1296 - learning_rate: 1.0000e-04
Epoch 6/50
[1m432/432[0m [32m━━━━━━━━

Processing test files: 100%|██████████| 740/740 [00:15<00:00, 47.19it/s]


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 52ms/step
submission_improved.csv
