In [1]:
import numpy as np
import os
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import pandas as pd
df = pd.read_csv("/kaggle/input/bnreel/BNREEL_text.csv")
df

Unnamed: 0,file_id,class,text,source,category
0,1000188341415358,Safe,Me on my way to watch Oppenheimer After Berbie,Facebook,Safe
1,1006146964533997,Safe,[NULL],Facebook,Safe
2,1007123741459823,Adult,বান্ধবী আমার লাগানির পিনিকে আছে Love in love,Facebook,Unsafe
3,1007420607202244,Safe,প্রত্যেকটা অফিসে এমন একজন কলিগ থাকবেই Laptop,Facebook,Safe
4,1008519870870927,Safe,হ্যালো শার্ক ট্যাঙ্ক বাংলাদেশ আই এম কামিং,Facebook,Safe
...,...,...,...,...,...
1916,3356285208118215619_62841927222,Safe,The amount of comfort this scene gives >>,Instagram,Safe
1917,3363424170779680862_62841927222_2,Safe,Anupam Roy's voice is line medicine for all br...,Instagram,Safe
1918,3363424170779680862_62841927222_3,Safe,Anupam Roy's voice is line medicine for all br...,Instagram,Safe
1919,3363424170779680862_62841927222_,Safe,Anupam Roy's voice is line medicine for all br...,Instagram,Safe


In [3]:
# df = df[df['category']=='Unsafe']

In [4]:
df['category'], uniques = pd.factorize(df['category'])
uniques

Index(['Safe', 'Unsafe'], dtype='object')

In [5]:
dir1 = "/kaggle/input/bnreel/Audio_V1/Audio_V1"
dir2 = "/kaggle/input/bnreel/Audio_V2/Audio_V2"
dir3 = "/kaggle/input/bnreel/UNBER_IG/UNBER_IG/Audio_IG"

In [30]:
pip install noisereduce resampy --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hNote: you may need to restart the kernel to use updated packages.


In [31]:
import os
import numpy as np
import pandas as pd
import librosa
import noisereduce as nr
import soundfile as sf
import resampy

In [32]:
def process_audio_for_vggish(file_path, target_window_duration=0.96, sample_rate=16000, 
                             n_fft=400, hop_length=160, n_mels=64, expected_frames=96):
    # Load and resample audio to 16kHz
    audio, orig_sr = librosa.load(file_path, sr=None)
    if orig_sr != sample_rate:
        audio = resampy.resample(audio, orig_sr, sample_rate)

    # Trim or pad to target duration (0.96 seconds for VGGish)
    target_samples = int(target_window_duration * sample_rate)
    if len(audio) > target_samples:
        audio = audio[:target_samples]
    else:
        padding = target_samples - len(audio)
        audio = np.pad(audio, (0, padding), mode='constant')

    # Pre-emphasis (optional but common in audio preprocessing)
    pre_emphasis = 0.96
    audio = np.append(audio[0], audio[1:] - pre_emphasis * audio[:-1])

    # Compute mel spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=audio,
        sr=sample_rate,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels,
        power=2.0
    )

    # Convert to log scale (dB)
    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

    # Transpose to shape (frames, bins)
    log_mel_spec = log_mel_spec.T  # Shape becomes (num_frames, 64)

    # Ensure the shape is exactly (96, 64)
    if log_mel_spec.shape[0] < expected_frames:
        pad_width = expected_frames - log_mel_spec.shape[0]
        log_mel_spec = np.pad(log_mel_spec, ((0, pad_width), (0, 0)), mode='constant')
    elif log_mel_spec.shape[0] > expected_frames:
        log_mel_spec = log_mel_spec[:expected_frames, :]

    return log_mel_spec  # Final shape: (96, 64)

In [33]:
X = []
y = []

In [34]:
for _, row in df.iterrows():
    reel_id = row["file_id"]
    label = row["category"]  

    for dir in list([dir1, dir2, dir3]):
        audio_file = os.path.join(dir, f"{reel_id}.wav")
        if os.path.exists(audio_file): 
            processed_audio = process_audio_for_vggish(audio_file)      
            X.append(processed_audio)
            y.append(label)

In [35]:
X = np.array(X, dtype=np.float32)

In [36]:
X.shape

(1905, 96, 64)

In [37]:
y = np.array(y)
y.shape

(1905,)

In [39]:
from sklearn.preprocessing import StandardScaler

X_reshaped = X.reshape(-1, X.shape[-1])  # (n_samples * 96, 64)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_reshaped)
X_scaled = X_scaled.reshape(-1, 96, 64)  # back to original shape

In [42]:
import numpy as np

combined = list(zip(X_scaled, y))

np.random.shuffle(combined)

X, y = zip(*combined)

X = np.array(X)
y = np.array(y)

In [43]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42, stratify=y_temp)

In [44]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import layers, Model

In [47]:
import tensorflow as tf
from tensorflow.keras import layers, models

def build_vggish_like_model(input_shape=(96, 64, 1), num_classes=1, dropout_rate=0.3):
    input_layer = tf.keras.Input(shape=input_shape)

    # Block 1
    x = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(input_layer)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(x)
    x = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)

    # Flatten and Dense layers
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(dropout_rate)(x)

    # Output layer
    output = layers.Dense(num_classes, activation='sigmoid' if num_classes == 1 else 'softmax')(x)

    model = tf.keras.Model(inputs=input_layer, outputs=output)
    return model


In [48]:
X_train = np.expand_dims(X_train, axis=-1)  # final shape: (n_samples, 96, 64, 1)
X_test = np.expand_dims(X_test, axis=-1)
X_val = np.expand_dims(X_val, axis=-1)

In [49]:
print(X_train.shape)

(1524, 96, 64, 1)


In [50]:
model = build_vggish_like_model(input_shape=(96, 64, 1), num_classes=1)

In [55]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [56]:
model.summary()

In [57]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)

In [None]:
history = model.fit(
    X_train, y_train,  # training data
    epochs=50,  # number of epochs
    batch_size=16,  # batch size
    validation_data=(X_val, y_val),  # validation data
    callbacks=[early_stopping, lr_scheduler]  # early stopping
)

Epoch 1/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 852ms/step - accuracy: 0.5374 - loss: 0.7277 - val_accuracy: 0.5526 - val_loss: 0.6896 - learning_rate: 0.0010
Epoch 2/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 816ms/step - accuracy: 0.5394 - loss: 0.6928 - val_accuracy: 0.5526 - val_loss: 0.6885 - learning_rate: 0.0010
Epoch 3/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 830ms/step - accuracy: 0.5264 - loss: 0.6950 - val_accuracy: 0.5526 - val_loss: 0.6880 - learning_rate: 0.0010
Epoch 4/50
[1m19/96[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m57s[0m 752ms/step - accuracy: 0.5338 - loss: 0.6929

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = (y_pred >= 0.5).astype(int)
# y_pred_classes = np.argmax(y_pred, axis=1)

In [None]:
print(classification_report(y_test, y_pred_classes))

In [None]:
cm = confusion_matrix(y_test, y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(6, 4))

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["Safe","Unsafe"], yticklabels=["Safe","Unsafe"])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")

plt.show()