In [54]:
import os
import librosa
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
from pydub import AudioSegment
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix

In [84]:
TEST_DIR = '/content/drive/MyDrive/all_splited_obf'
MODEL_PATH = '/content/drive/MyDrive/best_model.pth'
CLASSES = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]

In [85]:
def convert_to_wav(file_path):
    if file_path.lower().endswith(".mp3"):
        wav_path = file_path.rsplit(".", 1)[0] + ".wav"
        audio = AudioSegment.from_mp3(file_path)
        audio.export(wav_path, format="wav")
        return wav_path
    return file_path

In [86]:
def audio_to_melspectrogram(file_path, max_len=128, augment=False):
    audio, sr = librosa.load(file_path, sr=16000)

    # اعمال Data Augmentation (افزودن نویز و تغییر سرعت)
    if augment:
        noise = np.random.randn(len(audio))
        audio = audio + 0.005 * noise
        audio = librosa.effects.time_stretch(audio, rate=1.1)  # تغییر سرعت

    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

    if log_mel_spec.shape[1] > max_len:
        log_mel_spec = log_mel_spec[:, :max_len]
    else:
        log_mel_spec = np.pad(log_mel_spec, ((0, 0), (0, max_len - log_mel_spec.shape[1])), mode='constant')

    return log_mel_spec


In [87]:
class TestDataset(Dataset):
    def __init__(self, data_dir, max_len=128, augment=False, transform=None):
        self.file_paths = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith(".mp3")]
        self.max_len = max_len
        self.augment = augment
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        wav_path = convert_to_wav(file_path)
        mel_spec = audio_to_melspectrogram(wav_path, max_len=self.max_len, augment=self.augment)

        if self.transform:
            mel_spec = self.transform(mel_spec)

        return mel_spec, os.path.basename(file_path)

In [88]:
test_dataset = TestDataset(TEST_DIR, augment=True, transform=ToTensor())
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [89]:
class AudioClassifier(nn.Module):
    def __init__(self, num_classes):
        super(AudioClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(0.3)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

In [90]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AudioClassifier(num_classes=len(CLASSES)).to(device)
model.load_state_dict(torch.load(MODEL_PATH))
model.eval()

  model.load_state_dict(torch.load(MODEL_PATH))


AudioClassifier(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=16384, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=11, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (relu): ReLU()
)

In [91]:
predictions = []
file_names = []

In [92]:
with torch.no_grad():
    for data, files in test_loader:
        data = data.to(device).unsqueeze(1).squeeze(2).float()
        outputs = model(data)

        probabilities = torch.softmax(outputs, dim=1)
        _, predicted = probabilities.max(1)

        threshold = 0.5
        predicted[probabilities.max(1)[0] < threshold] = 10

        predictions.extend(predicted.cpu().numpy())
        file_names.extend(files)


In [93]:
output_df = pd.DataFrame({
    0: file_names,
    1: [CLASSES[pred] for pred in predictions]
})
output_df.to_csv("all_splited_obf_output_2.csv", index=False, header=False)