In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torchaudio
import librosa
import random
import matplotlib.pyplot as plt
from IPython.display import Audio
from IPython.display import FileLink
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import nn
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import math

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
def collect_file_info(root_dir):
    file_info = []

    for main_class in os.listdir(root_dir):
        main_class_path = os.path.join(root_dir, main_class)
        if os.path.isdir(main_class_path):
            for sub_class in os.listdir(main_class_path):
                sub_class_path = os.path.join(main_class_path, sub_class)
                if os.path.isdir(sub_class_path):
                    for file_name in os.listdir(sub_class_path):
                        file_path = os.path.join(sub_class_path, file_name)
                        if os.path.isfile(file_path):
                            file_info.append({
                                'path': file_path,
                                'label': main_class,
                                'sub_class': sub_class
                            })
    
    return file_info

In [4]:
def collect_file_info_no_sub_class(root_dir):
    file_info = []

    for main_class in os.listdir(root_dir):
        main_class_path = os.path.join(root_dir, main_class)
        if os.path.isdir(main_class_path):
            for file_name in os.listdir(main_class_path):
                file_path = os.path.join(main_class_path, file_name)
                if os.path.isfile(file_path):
                    file_info.append({
                        'path': file_path,
                        'label': main_class,
                        'sub_class': main_class
                    })
    
    return file_info

In [15]:
root_dir = 'charaNet/bird'
file_info_1 = collect_file_info_no_sub_class(root_dir)
root_dir = 'Other_mixed_neg'
file_info_2 = collect_file_info_no_sub_class(root_dir)
df_1 = pd.DataFrame(file_info_1)
df_2 = pd.DataFrame(file_info_2)
df = pd.concat([df_1, df_2], ignore_index=True)
df.loc[df['label'] != 'prikol', 'label'] = 'not_dog'
df['label'].unique()
df

Unnamed: 0,path,label,sub_class
0,charaNet/bird\Asian Koel\XC161198 0.mp3,not_dog,Asian Koel
1,charaNet/bird\Asian Koel\XC161199 0.mp3,not_dog,Asian Koel
2,charaNet/bird\Asian Koel\XC161199 1.mp3,not_dog,Asian Koel
3,charaNet/bird\Asian Koel\XC161199 2.mp3,not_dog,Asian Koel
4,charaNet/bird\Asian Koel\XC161431 2.mp3,not_dog,Asian Koel
...,...,...,...
7352,Other_mixed_neg\data\9_10971.wav,not_dog,data
7353,Other_mixed_neg\data\9_10972.wav,not_dog,data
7354,Other_mixed_neg\data\9_10973.wav,not_dog,data
7355,Other_mixed_neg\data\9_10974.wav,not_dog,data


In [16]:
df.to_csv('file_info.csv', index=False)

In [17]:
added = True
df = pd.read_csv("file_info.csv")
df

Unnamed: 0,path,label,sub_class
0,charaNet/bird\Asian Koel\XC161198 0.mp3,not_dog,Asian Koel
1,charaNet/bird\Asian Koel\XC161199 0.mp3,not_dog,Asian Koel
2,charaNet/bird\Asian Koel\XC161199 1.mp3,not_dog,Asian Koel
3,charaNet/bird\Asian Koel\XC161199 2.mp3,not_dog,Asian Koel
4,charaNet/bird\Asian Koel\XC161431 2.mp3,not_dog,Asian Koel
...,...,...,...
7352,Other_mixed_neg\data\9_10971.wav,not_dog,data
7353,Other_mixed_neg\data\9_10972.wav,not_dog,data
7354,Other_mixed_neg\data\9_10973.wav,not_dog,data
7355,Other_mixed_neg\data\9_10974.wav,not_dog,data


In [18]:
class_counts = df['label'].value_counts()
print("Количество элементов в каждом классе:")
print(class_counts)

Количество элементов в каждом классе:
label
not_dog    7357
Name: count, dtype: int64


In [19]:
class_names = df['label'].unique()
class_dict = {class_name: idx for idx, class_name in enumerate(class_names)}
class_dict = {'wolf': 2, 'dog': 1, 'not_dog': 0}
class_name_reverse = {idx: class_name for class_name, idx in class_dict.items()}
print("\nClass dictionary:")
print(class_dict)


Class dictionary:
{'wolf': 2, 'dog': 1, 'not_dog': 0}


In [20]:
data = df
data['label'] = df['label'].map(class_dict)

print(data['label'].value_counts())
data.head(5)

label
0    7357
Name: count, dtype: int64


Unnamed: 0,path,label,sub_class
0,charaNet/bird\Asian Koel\XC161198 0.mp3,0,Asian Koel
1,charaNet/bird\Asian Koel\XC161199 0.mp3,0,Asian Koel
2,charaNet/bird\Asian Koel\XC161199 1.mp3,0,Asian Koel
3,charaNet/bird\Asian Koel\XC161199 2.mp3,0,Asian Koel
4,charaNet/bird\Asian Koel\XC161431 2.mp3,0,Asian Koel


In [21]:
df_used = df
df_used['used'] = 0
df_used

Unnamed: 0,path,label,sub_class,used
0,charaNet/bird\Asian Koel\XC161198 0.mp3,0,Asian Koel,0
1,charaNet/bird\Asian Koel\XC161199 0.mp3,0,Asian Koel,0
2,charaNet/bird\Asian Koel\XC161199 1.mp3,0,Asian Koel,0
3,charaNet/bird\Asian Koel\XC161199 2.mp3,0,Asian Koel,0
4,charaNet/bird\Asian Koel\XC161431 2.mp3,0,Asian Koel,0
...,...,...,...,...
7352,Other_mixed_neg\data\9_10971.wav,0,data,0
7353,Other_mixed_neg\data\9_10972.wav,0,data,0
7354,Other_mixed_neg\data\9_10973.wav,0,data,0
7355,Other_mixed_neg\data\9_10974.wav,0,data,0


In [27]:
class Process(): #Передавать нужно зашафленый датафрейм!!!!
    def __init__(self, paths, labels, used, rate=16000, target_sec_length=30, min_sec_length=25, max_sec_length=40):
        self.paths = paths
        self.labels = labels
        self.used = used
        self.rate = rate
        self.target_sec_length = target_sec_length
        self.target_sample_length = self.rate * self.target_sec_length
        self.min_sec_length = min_sec_length
        self.max_sec_length = max_sec_length
        self.df = pd.DataFrame(columns=['paths', 'label', 'more', 'interval'])
        
    def get_df(self):
        return self.df
    
    def process_pathes(self):
        qwerty = len(self.paths)
        for i in range(qwerty):
            print(f"{i}/{qwerty}")
            return_value = self.length_processing(i)
            if return_value != 0:
                self.df.loc[len(self.df)] = return_value
        return self.get_df()
    def length_processing(self, idx): # Сделать отдельную функцию для обработки всех путей, переписать return
        try:
            return_dict = {}
            if self.used[idx] == 1:
                #print('1 ', end='')
                return 0
            waveform, sample_rate = torchaudio.load(self.paths[idx])
            waveform = waveform.mean(dim=0, keepdim=True)
            if sample_rate != self.rate:
                waveform = torchaudio.functional.resample(waveform, sample_rate, self.rate)
            current_seconds = waveform.shape[1] / self.rate
            if current_seconds <= self.target_sec_length: # Если меньше нужного
                paths_str = f'{self.paths[idx]}, '
                self.used[idx] = 1
                i = 1
                while current_seconds < self.min_sec_length:
                    if i % (len(self.paths)) == 0: #Закоментить после тестирования
                        print('infinite cycle!')
                        return self.length_processing(idx+1)
                    if self.used[(idx + i) % len(self.paths)] == 1:
                        i += 1
                        continue
                    if self.labels[idx] != 0 and (self.labels[(idx + i) % len(self.paths)] == 0 or self.labels[(idx + i) % len(self.paths)] == self.labels[idx]):
                        new_waveform, sample_rate = torchaudio.load(self.paths[(idx + i) % len(self.paths)])
                        new_waveform = new_waveform.mean(dim=0, keepdim=True)
                        if current_seconds + new_waveform.shape[1] / sample_rate <= self.max_sec_length: # Проверка на длительность, чтобы не обрезали слишком много
                            paths_str = paths_str + f'{self.paths[(idx + i) % len(self.paths)]}, '
                        self.used[(idx + i) % len(self.paths)] = 1
                        current_seconds += new_waveform.shape[1] / sample_rate
                        i += 1
                    if self.labels[idx] == 0 and self.labels[(idx + i) % len(self.paths)] == 0: # Проверка на совпадение лейбла
                        new_waveform, sample_rate = torchaudio.load(self.paths[(idx + i) % len(self.paths)])
                        new_waveform = new_waveform.mean(dim=0, keepdim=True)
                        if current_seconds + new_waveform.shape[1] / sample_rate <= self.max_sec_length: # Проверка на длительность, чтобы не обрезали слишком много
                            paths_str = paths_str + f'{self.paths[(idx + i) % len(self.paths)]}, ' 
                        self.used[(idx + i) % len(self.paths)] = 1
                        current_seconds += new_waveform.shape[1] / sample_rate
                        i+=1
                    else:
                        i += 1
                return_dict = {'paths': paths_str[:-2],
                            'label': self.labels[idx],
                            'more': 0,
                            'interval': ''}
            else: #Если больше нужного
                print('4 ', end='')
                interval_str = ''
                self.used[idx] = 1
                if waveform.shape[1] > self.target_sample_length:
                    count_of_targets = int(math.floor(waveform.shape[1] / (self.rate * (self.min_sec_length + 3))))
                    random_deviation = random.randrange(-3.0, 3.0)
                    start = int((waveform.shape[1] / self.rate) / 2 - (self.target_sec_length * count_of_targets / 2) + random_deviation)
                    end = int((waveform.shape[1] / self.rate) / 2 + (self.target_sec_length * count_of_targets / 2) + random_deviation)
                    for i in range(count_of_targets):
                        interval_str = interval_str + f'{start + i * (self.min_sec_length + 3)}-{start + (i+1) * (self.min_sec_length + 3)}, '
                    return_dict = {'paths': self.paths[idx],
                                'label': self.labels[idx],
                                'more': 1,
                                'interval': interval_str[:-2]}
            return return_dict
        except:
            print("ERROR!")

In [28]:
df_shuffled = df.sample(frac=1, random_state=52).reset_index(drop=True)
df_shuffled.head(5)

Unnamed: 0,path,label,sub_class,used
0,Other_mixed_neg\data\6_10629.wav,0,data,0
1,Other_mixed_neg\data\7_10757.wav,0,data,0
2,charaNet/bird\Greater Spotted Eagle\XC417961 3...,0,Greater Spotted Eagle,0
3,Other_mixed_neg\data\18_11853.wav,0,data,0
4,charaNet/bird\Greater Spotted Eagle\XC417961 1...,0,Greater Spotted Eagle,0


In [29]:
processor = Process(df_shuffled['path'], df_shuffled['label'], df_shuffled['used'])

In [30]:
new_df = processor.process_pathes()

0/7357
1/7357
2/7357
3/7357
4/7357
5/7357
6/7357
7/7357
8/7357
9/7357
10/7357
11/7357
12/7357
13/7357
14/7357
15/7357
16/7357
17/7357
18/7357
19/7357
20/7357
21/7357
22/7357
23/7357
24/7357
25/7357
26/7357
27/7357
28/7357
29/7357
30/7357
31/7357
32/7357
33/7357
34/7357
35/7357
36/7357
37/7357
38/7357
39/7357
40/7357
41/7357
42/7357
43/7357
44/7357
45/7357
46/7357
47/7357
48/7357
49/7357
50/7357
51/7357
52/7357
53/7357
54/7357
55/7357
56/7357
57/7357
58/7357
59/7357
60/7357
61/7357
62/7357
63/7357
64/7357
65/7357
66/7357
67/7357
68/7357
69/7357
70/7357
71/7357
72/7357
73/7357
74/7357
75/7357
76/7357
77/7357
78/7357
79/7357
80/7357
81/7357
82/7357
83/7357
84/7357
85/7357
86/7357
87/7357
88/7357
89/7357
90/7357
91/7357
92/7357
93/7357
94/7357
95/7357
96/7357
97/7357
98/7357
99/7357
100/7357
101/7357
102/7357
103/7357
104/7357
105/7357
106/7357
107/7357
108/7357
109/7357
110/7357
111/7357
112/7357
113/7357
114/7357
115/7357
116/7357
117/7357
118/7357
119/7357
120/7357
121/7357
122/7357
123

In [37]:
new_df = new_df.iloc[:-1]

In [42]:
new_df.to_csv("cut_file_info.csv", index=False)

In [44]:
new_df = pd.read_csv("cut_file_info.csv")
new_df

Unnamed: 0,paths,label,more,interval
0,"Other_mixed_neg\data\6_10629.wav, Other_mixed_...",0.0,0.0,
1,charaNet/bird\Greater Spotted Eagle\XC417961 1...,0.0,0.0,
2,charaNet/bird\Greater Spotted Eagle\XC417961 6...,0.0,0.0,
3,"charaNet/bird\Grey Treepie\XC716930 4.mp3, Oth...",0.0,0.0,
4,charaNet/bird\Large-billed Crow\XC547823 4.mp3...,0.0,0.0,
...,...,...,...,...
2189,"charaNet/bird\Grey Treepie\XC406884 1.mp3, Oth...",0.0,0.0,
2190,"charaNet/bird\Himalayan Monal\XC317228 2.mp3, ...",0.0,0.0,
2191,"charaNet/bird\House Sparrow\XC636549 2.mp3, ch...",0.0,0.0,
2192,"charaNet/bird\House Sparrow\XC642162 0.mp3, ch...",0.0,0.0,


# Получить аудио из полученного DataFrame

In [46]:
def get_sample(row, rate=16000):
    path_list, label, more, interval_list = row
    paths = path_list.replace('"', '').split(', ')
    waveform, sample_rate = torchaudio.load(paths[0])
    waveform = waveform.mean(dim=0, keepdim=True)
    if sample_rate != rate:
        waveform = torchaudio.functional.resample(waveform, sample_rate, rate)
    if not more: #Не нужно делить
        for path in paths[1:]:
            new_waveform, sample_rate = torchaudio.load(path)
            new_waveform = new_waveform.mean(dim=0, keepdim=True)
            if sample_rate != rate:
                new_waveform = torchaudio.functional.resample(new_waveform, sample_rate, rate)
            waveform = torch.cat((waveform, new_waveform), dim=1)
        return [waveform]
    else: # Нужно делить
        wave_list = []
        intervals = interval_list.replace('"', '').split(', ')
        for interval in intervals:
            try:
                if interval[0] == '-':
                        interval = interval[1:].split('-')
                        interval = list(map(int, interval))
                        interval[1] += interval[0] * 2
                else:
                    interval = list(map(int, interval.split('-')))
                interval = list(map(lambda x: x * rate, interval))
                wave_list.append(waveform[:, interval[0]:interval[1]])
            except:
                print("ERROR!")
        return wave_list
    return None

In [48]:
test = new_df.loc[0]
print(test, '\n\n')
check = get_sample(test)
check[0].shape

paths       Other_mixed_neg\data\6_10629.wav, Other_mixed_...
label                                                     0.0
more                                                      0.0
interval                                                  NaN
Name: 0, dtype: object 




torch.Size([1, 400000])

In [49]:
Audio(check[0].numpy(), rate=16000)

In [52]:
neg_count = 1
dog_count = 1
wolf_count= 1
sample_rate = 16000
for i, row in new_df.iterrows():
    waveforms = get_sample(row, sample_rate)
    if row['label'] == 0:
        save_path = f'new_dataset/negative/negative_new_{neg_count}.wav'
        neg_count += 1
    elif row['label'] == 1:
        save_path = f'new_dataset/dog/dog_{dog_count}.wav'
        dog_count += 1
    elif row['label'] == 2:
        save_path = f'new_dataset/wolf/wolf_{wolf_count}.wav'
        wolf_count += 1
        
    for waveform in waveforms:
        print(waveform.shape)
        torchaudio.save(save_path, waveform, sample_rate, format = "wav")
    print(f'{i}/{len(new_df)}')

torch.Size([1, 400000])
0/2194
torch.Size([1, 480000])
1/2194
torch.Size([1, 400000])
2/2194
torch.Size([1, 400000])
3/2194
torch.Size([1, 416000])
4/2194
torch.Size([1, 494136])
5/2194
torch.Size([1, 480000])
6/2194
torch.Size([1, 400000])
7/2194
torch.Size([1, 480000])
8/2194
torch.Size([1, 400000])
9/2194
torch.Size([1, 480000])
10/2194
torch.Size([1, 544000])
11/2194
torch.Size([1, 480000])
12/2194
torch.Size([1, 480000])
13/2194
torch.Size([1, 464000])
14/2194
torch.Size([1, 400000])
15/2194
torch.Size([1, 400000])
16/2194
torch.Size([1, 480000])
17/2194
torch.Size([1, 463233])
18/2194
torch.Size([1, 480000])
19/2194
torch.Size([1, 528000])
20/2194
torch.Size([1, 528000])
21/2194
torch.Size([1, 480000])
22/2194
torch.Size([1, 400000])
23/2194
torch.Size([1, 480000])
24/2194
torch.Size([1, 416000])
25/2194
torch.Size([1, 400000])
26/2194
torch.Size([1, 400000])
27/2194
torch.Size([1, 480000])
28/2194
torch.Size([1, 460560])
29/2194
torch.Size([1, 400000])
30/2194
torch.Size([1, 480