# Apresentação:

O propósito deste código é colocar em prática a metodologia de clusterização dos dados de forma automatizada, de modo a checar se é uma boa abordagem.

In [26]:
# Bibliotecas:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy import signal, fftpack
from scipy.fftpack import fft, ifft
from numpy.fft import rfft, rfftfreq

# Extraindo paths

In [2]:
def map_directory_to_files(root_dir):
    directory_files_map = {}

    for root, dirs, files in tqdm(os.walk(root_dir),desc="Extraindo paths"):
        for dir_name in dirs:
            dir_path = os.path.join(root, dir_name)
            file_list = [os.path.join(dir_path, file_name) for file_name in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, file_name))]
            directory_files_map[dir_name] = file_list

    return directory_files_map

In [3]:
# Gerando dicionário com os dados:
arqs = map_directory_to_files(r"D:\BirdCLEF\birdclef_2024\train_audio")

Extraindo paths: 183it [00:11, 15.35it/s]


In [4]:
arqs.keys()

dict_keys(['asbfly', 'ashdro1', 'ashpri1', 'ashwoo2', 'asikoe2', 'asiope1', 'aspfly1', 'aspswi1', 'barfly1', 'barswa', 'bcnher', 'bkcbul1', 'bkrfla1', 'bkskit1', 'bkwsti', 'bladro1', 'blaeag1', 'blakit1', 'blhori1', 'blnmon1', 'blrwar1', 'bncwoo3', 'brakit1', 'brasta1', 'brcful1', 'brfowl1', 'brnhao1', 'brnshr', 'brodro1', 'brwjac1', 'brwowl1', 'btbeat1', 'bwfshr1', 'categr', 'chbeat1', 'cohcuc1', 'comfla1', 'comgre', 'comior1', 'comkin1', 'commoo3', 'commyn', 'compea', 'comros', 'comsan', 'comtai1', 'copbar1', 'crbsun2', 'cregos1', 'crfbar1', 'crseag1', 'dafbab1', 'darter2', 'eaywag1', 'emedov2', 'eucdov', 'eurbla2', 'eurcoo', 'forwag1', 'gargan', 'gloibi', 'goflea1', 'graher1', 'grbeat1', 'grecou1', 'greegr', 'grefla1', 'grehor1', 'grejun2', 'grenig1', 'grewar3', 'grnsan', 'grnwar1', 'grtdro1', 'gryfra', 'grynig2', 'grywag', 'gybpri1', 'gyhcaf1', 'heswoo1', 'hoopoe', 'houcro1', 'houspa', 'inbrob1', 'indpit1', 'indrob1', 'indrol2', 'indtit1', 'ingori1', 'inpher1', 'insbab1', 'insowl1'

In [5]:
qtd = [len(arqs[key]) for key in arqs.keys()]
print(sum(qtd))

24459


# Gerando arquivos:

In [6]:
import psutil

In [7]:
# Obter informações de memória
memory_info = psutil.virtual_memory()

# Imprimir memória total, usada e disponível
print(f"Total: {memory_info.total / (1024 ** 3):.2f} GB")
print(f"Usada: {memory_info.used / (1024 ** 3):.2f} GB")
print(f"Disponível: {memory_info.available / (1024 ** 3):.2f} GB")

Total: 15.91 GB
Usada: 5.90 GB
Disponível: 10.01 GB


In [57]:
dict_maximos = {}
for key in tqdm(arqs.keys(), desc="Extraindo Informações"):
    dict_maximos[key] = []  # Inicializar uma lista vazia para cada chave
    for sound in arqs[key]:
        try:
            y, sr = librosa.load(sound)
            duracao = librosa.get_duration(y=y, sr=sr)
    
            # Calcular a FFT do sinal
            fft_result = np.fft.fft(y)
            fft_magnitude = np.abs(fft_result)
    
            # Frequências correspondentes
            freqs = np.fft.fftfreq(len(fft_result), 1/sr)
    
            # Criar uma série do pandas com as magnitudes da FFT e as frequências correspondentes
            fft_series = pd.Series(fft_magnitude, index=freqs)
    
            # Encontrar a frequência com a maior magnitude
            max_magnitude = fft_series.max()
            max_frequency = fft_series.idxmax()
    
            par_ordenado = [max_magnitude, max_frequency]
    
            # Adicionar o par ordenado ao vetor de valores na chave correspondente
            dict_maximos[key].append(par_ordenado)
        except Exception as e:
            print(f'{key}:{sound}\nErro no processamento: {e}')

Extraindo Informações:  11%|██████▏                                                 | 20/182 [14:55<1:59:28, 44.25s/it]

blrwar1:D:\BirdCLEF\birdclef_2024\train_audio\blrwar1\XC826766.ogg
Erro no processamento: 


Extraindo Informações: 100%|███████████████████████████████████████████████████████| 182/182 [2:17:30<00:00, 45.33s/it]


# Salvando informações:

In [10]:
dict_maximos.keys()

dict_keys(['asbfly', 'ashdro1', 'ashpri1', 'ashwoo2', 'asikoe2', 'asiope1', 'aspfly1', 'aspswi1', 'barfly1', 'barswa', 'bcnher', 'bkcbul1', 'bkrfla1', 'bkskit1', 'bkwsti', 'bladro1', 'blaeag1', 'blakit1', 'blhori1', 'blnmon1', 'blrwar1', 'bncwoo3', 'brakit1', 'brasta1', 'brcful1', 'brfowl1', 'brnhao1', 'brnshr', 'brodro1', 'brwjac1', 'brwowl1', 'btbeat1', 'bwfshr1', 'categr', 'chbeat1', 'cohcuc1', 'comfla1', 'comgre', 'comior1', 'comkin1', 'commoo3', 'commyn', 'compea', 'comros', 'comsan', 'comtai1', 'copbar1', 'crbsun2', 'cregos1', 'crfbar1', 'crseag1', 'dafbab1', 'darter2', 'eaywag1', 'emedov2', 'eucdov', 'eurbla2', 'eurcoo', 'forwag1', 'gargan', 'gloibi', 'goflea1', 'graher1', 'grbeat1', 'grecou1', 'greegr', 'grefla1', 'grehor1', 'grejun2', 'grenig1', 'grewar3', 'grnsan', 'grnwar1', 'grtdro1', 'gryfra', 'grynig2', 'grywag', 'gybpri1', 'gyhcaf1', 'heswoo1', 'hoopoe', 'houcro1', 'houspa', 'inbrob1', 'indpit1', 'indrob1', 'indrol2', 'indtit1', 'ingori1', 'inpher1', 'insbab1', 'insowl1'

In [17]:
import json

In [58]:
# exportando dicionário original
# Nome do arquivo onde o dicionário será salvo
file_name = 'maximum_data.json'

# Abrir um arquivo em modo de escrita e salvar o dicionário como JSON
with open(file_name, 'w') as json_file:
    json.dump(dict_maximos, json_file)

In [22]:
data = dict_maximos.copy()

In [16]:
# Encontrar o comprimento máximo das listas
max_length = max(len(lst) for lst in data.values())

# Preencher cada lista com NaN até que todas tenham o mesmo comprimento
for key in data:
    while len(data[key]) < max_length:
        data[key].append(np.nan)

# Criar o DataFrame
df = pd.DataFrame(data);df

Unnamed: 0,asbfly,ashdro1,ashpri1,ashwoo2,asikoe2,asiope1,aspfly1,aspswi1,barfly1,barswa,...,whbwoo2,whcbar1,whiter2,whrmun,whtkin2,woosan,wynlau1,yebbab1,yebbul3,zitcis1
0,"[62.63154930825944, 7616.237669403206]","[581.4713380209458, -3106.0089404728797]","[583.29180432136, 1171.7198744990792]","[411.89118667493807, 4396.330979719823]","[701.7222252932513, 1431.5490081680277]","[258.18789021125684, 2461.0574725776964]","[829.5876316710334, 53.45377016028775]","[244.44370492638075, 1169.9958881578948]","[241.26899207054825, 6133.703374777975]","[223.52828239620106, 925.7838094206442]",...,"[3418.8408843505135, 17.34619140625]","[227.51776130823356, 1195.1819569408738]","[82.10256164413107, 1984.5982142857142]","[221.27904412846544, 575.3272804054054]","[522.3543343780275, -2832.8068729204947]","[391.2591622503642, -3874.1171881666037]","[652.3053786479752, 4291.625426890969]","[285.8828517839271, 3863.306442521453]","[3512.0413373896836, 9.664200457815566]","[29.816714500345473, -9833.953962308917]"
1,"[32.182617088722594, 0.0]","[667.1716950571001, 595.0515322214146]","[377.6107537713034, -3665.985113127578]","[94.18562931514143, 328.75586854460096]","[419.59607578099593, 1515.0852274639026]","[134.57836140752013, 139.3830128205128]","[822.7398809935665, 52.81394675925926]","[160.7731460751597, -5053.769155649038]","[131.24885654562786, 10.599380065971589]","[1428.3937254836167, 292.4425787361655]",...,"[2972.6852591236775, 21.533203125000004]","[407.9535580390919, -1310.8431842565653]","[150.6149070573063, 3523.701870932755]","[349.0760413651145, 5047.466801385681]","[776.0305364198301, -2958.2764365623716]","[1130.978520315939, -20.25505337968803]","[653.9694598088055, 0.0]","[1494.2348833272265, 6019.826561965368]","[1097.9677472549802, 16.500489402270706]","[397.5572341201643, 7277.583218349376]"
2,"[373.1795431899132, 7751.9069290877205]","[490.07175586922517, 310.66870144284127]","[469.999030921335, 3060.9492549189813]","[374.39856663332887, 269.15678879310343]","[1519.9469995590111, 92.49632461040869]","[130.2976189412213, 183.1840034965035]","[74.91757328824124, 4581.797497155859]","[155.18865860597714, -59.42908504358217]","[303.40138137872975, 9.708252950018345]","[150.2354975749183, 5498.336046789762]",...,"[101.45475186777433, 2287.245611496913]","[1985.9223084755813, 59.670421450772025]","[337.3117177220866, -3350.266231686216]","[954.0612957041137, 269.3013251582279]","[500.16590249477815, -200.06694863943434]","[995.5509563237304, 103.06122448979592]","[155.57691269295339, 4376.765516868512]","[468.56847975961074, 3113.6845071517414]","[145.75986600990953, 46.88744363152667]","[2200.5427964190776, 9.745416826391864]"
3,"[52.51063754139899, 1653.504562882576]","[751.0377785087493, -2641.9055706521735]","[160.65342658791738, 894.9603391684901]","[360.57824046422513, 121.3649603590512]","[4928.156268147541, 90.00224759008397]","[604.7462974768612, 161.73828124999997]","[144.25233421891141, 2774.356378482031]","[1395.5576726148706, -48.60779972752044]","[2012.8756887484442, 0.0]","[304.62708578422723, 3945.987134145255]",...,"[235.22917699039888, -2812.209498590512]","[166.59328429201244, 1243.5140131662295]","[300.9685168410473, -80.68057080131723]","[122.32157364528925, 449.11466544722686]","[1599.0119223955073, 2808.8541666666665]","[24.083233775946645, -4045.7996927126214]","[90.1361910436983, 98.15124317250796]","[715.9365544534618, -3689.6501375201087]","[126.31085997172853, 2320.1711554276317]","[94.48989885917867, 7093.581152737299]"
4,"[5070.62998063275, 200.34480701036554]","[673.8627297111926, 3155.4789911048692]","[1963.3111710558976, 123.81749341527654]","[175.32984260749967, -4482.0725029205605]","[1512.6745693929265, -99.89310189236906]","[613.244260335554, 17.868276991999096]","[88.14103849820502, 5005.844799440298]","[67.22535520102839, 689.258443311652]","[79.11585892100611, 3891.293597588507]","[332.60515868494707, 3817.799673507463]",...,"[112.7502773612859, -1494.5419520547944]","[1413.1135430732916, 1189.0167459736456]","[88.53039792960433, 2126.4843352812873]","[146.65597912697862, -354.67169610526645]","[30.170756896343608, 3041.4795660486543]","[23.243885341075593, 19.94654605263158]","[225.72261770866234, -3.2088674371505834]","[76.00923854516753, 4920.966323128549]","[1353.7546878081857, 2330.588138065888]","[386.15214846984196, 7254.583697552447]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,,,,,,,,,,"[62.26611434444408, 3068.7478611128226]",...,,,,,,"[165.51847166905472, 114.67614580609239]",,,,"[1866.803778028125, 13.456470144433174]"
496,,,,,,,,,,"[57.43445022236318, 3338.468433469269]",...,,,,,,"[69.35593141803393, 9987.358893171806]",,,,"[1330.7209059801164, 7.777008340036044]"
497,,,,,,,,,,"[54.83161061076759, 269.76497672954935]",...,,,,,,"[246.25032983358557, 2828.7955377374938]",,,,"[396.7883542939248, 68.96530397674562]"
498,,,,,,,,,,"[132.84564991612734, 149.0]",...,,,,,,"[31.777715728529138, -641.0571734605202]",,,,"[33.55708098494209, 549.6229891666558]"


In [23]:
df.to_csv('birdclef_magnitudes.csv',index=False)