In [8]:
import torch
from s3prl.nn import S3PRLUpstream

torch.cuda.set_device(1)
used_device =  torch.device("cuda" if torch.cuda.is_available() else "cpu")

# model = S3PRLUpstream("hubert").to(used_device)
model = S3PRLUpstream("wav2vec2").to(used_device)
# model = S3PRLUpstream("tera_100hr").to(used_device)
model.eval()

def print_model_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total Parameters: {total_params}")
    print(f"Trainable Parameters: {trainable_params}")

# print(model)
print_model_parameters(model)

Total Parameters: 95044608
Trainable Parameters: 95044608


In [16]:
import torch.nn as nn
print(model)
num_layers = 0

# # 遍历模型的所有模块和子模块
# for layer in model.modules():
#     # 检查当前模块是否是层的实例
#     if isinstance(layer, nn.Module):
#         num_layers += 1
# # 打印层数
# print(f"Total number of layers in the model: {num_layers}")

S3PRLUpstream(
  (upstream): UpstreamExpert(
    (model): Wav2Vec2Model(
      (feature_extractor): ConvFeatureExtractionModel(
        (conv_layers): ModuleList(
          (0): Sequential(
            (0): Conv1d(1, 512, kernel_size=(10,), stride=(5,), bias=False)
            (1): Dropout(p=0.0, inplace=False)
            (2): Fp32GroupNorm(512, 512, eps=1e-05, affine=True)
            (3): GELU(approximate='none')
          )
          (1-4): 4 x Sequential(
            (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), bias=False)
            (1): Dropout(p=0.0, inplace=False)
            (2): GELU(approximate='none')
          )
          (5-6): 2 x Sequential(
            (0): Conv1d(512, 512, kernel_size=(2,), stride=(2,), bias=False)
            (1): Dropout(p=0.0, inplace=False)
            (2): GELU(approximate='none')
          )
        )
      )
      (post_extract_proj): Linear(in_features=512, out_features=768, bias=True)
      (dropout_input): Dropout(p=0.1, inplace=Fa

In [10]:
with torch.no_grad():
    wavs = torch.randn(2, 16000 * 2).to(used_device)
    wavs_len = torch.LongTensor([16000 * 1, 16000 * 2]).to(used_device)
    all_hs, all_hs_len = model(wavs, wavs_len)
    num_layers = len(all_hs)

for hs, hs_len in zip(all_hs, all_hs_len):
    # assert isinstance(hs, torch.FloatTensor)
    # assert isinstance(hs_len, torch.LongTensor)

    batch_size, max_seq_len, hidden_size = hs.shape
    assert hs_len.dim() == 1   
    
print(all_hs[0].shape,all_hs_len[0].shape)
print('num_layers',num_layers)

torch.Size([2, 100, 768]) torch.Size([2])
num_layers 13


In [51]:
######################################################wav2vec2######################################################
import json
import os
import torch
import numpy as np
from tqdm import tqdm
import pandas as pd
from s3prl.nn import S3PRLUpstream

torch.cuda.set_device(1)
used_device =  torch.device("cuda" if torch.cuda.is_available() else "cpu")

# model = S3PRLUpstream("hubert").to(used_device)
model = S3PRLUpstream("wav2vec2").to(used_device)
model.eval()

def print_model_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total Parameters: {total_params}")
    print(f"Trainable Parameters: {trainable_params}")

# print(model)
print_model_parameters(model)

refer_info_path = '/mnt/work/dataset/preprocessed/great_bird/data_info'+'/'+'ori_test_info.json'
# refer_info_path = '/mnt/work/dataset/preprocessed/great_bird/data_info'+'/'+'ori_data_info_caller.json'


wav_data_path = '/mnt/work/dataset/preprocessed/great_bird/original_data'

def process_subset_file(data_path):     
    # total data list
    data_path_list = [] # npy file path
    c_id_index_list = []
    cID_list = []
    cID_type_list = []
    with open(data_path, 'r') as f:
        data_info = json.load(f)

    for caller_ID in data_info:
        for caller_type in data_info[caller_ID]:
            for item in data_info[caller_ID][caller_type]:
                
                # reduce too len
                # before 240214 
                # if item['len_mel'] >= 199 or item['len_mel'] <= 99:
                #     continue
                
                # after 240214
                if item['len_mel'] >= 399 or item['len_mel'] <= 99:
                    continue
                
                class_id_index = item['class_id_index']

                # wav file path
                wav_path = item['original_wav']
                
                data_path_list.append(wav_path)
                # class_id_index eg 20221W67_0_0
                # c_id_index_list.append(class_id_index)
                # caller id
                cID_list.append(caller_ID)
                # caller type 
                cID_type_list.append(caller_type)
                
    return data_path_list, cID_list, cID_type_list


data_path_list, cID_list, cID_type_list = process_subset_file(refer_info_path)


def unpad(hs, hs_len, index, layer):
    if layer == -1:
        hs_unpadded = []
        for h, lens in zip(hs, hs_len):
            l = lens[index]
            hs_unpadded.append(h[index, :l])
    else:
        l = hs_len[index]
        hs_unpadded = hs[index, :l]
    return hs_unpadded

def get_batch(wav_paths, wav_ids, start, end):
    def getitem(index):
        import soundfile as sf

        path = wav_paths[index]
        wav, curr_sample_rate = sf.read(path, dtype="float32")
        wav /= np.max(np.abs(wav))

        feats = torch.from_numpy(wav).float()
        return feats

    def collate(wavs, padding_value: int = 0):
        from torch.nn.utils.rnn import pad_sequence

        padded_wavs = pad_sequence(
            wavs, batch_first=True, padding_value=padding_value
        )
        return padded_wavs

    end_id = min(end, len(wav_paths))
    wavs = [getitem(index) for index in range(start, end_id)]
    ids = [wav_ids[index] for index in range(start, end_id)]
    wavs_len = [len(wav) for wav in wavs]
    padded_wavs = collate(wavs)
    
    return padded_wavs, wavs_len, ids

def get_features(model, wavs, wavs_len, layer=-1, device="cuda"):
    with torch.no_grad():
        wavs_len = torch.LongTensor(wavs_len).to(device)
        # print('wavs_len',wavs_len)
        all_hs, all_hs_len = model(wavs.to(device), wavs_len)
        # print('all_hs',all_hs[0].shape,'all_hs_len',all_hs_len)

    for layer_id, (hs, hs_len) in enumerate(zip(all_hs, all_hs_len)):
        hs = hs.to("cpu")
        hs_len = hs_len.to("cpu")
        assert isinstance(hs, torch.FloatTensor)
        assert isinstance(hs_len, torch.LongTensor)

        if layer == layer:
            hidden_states = hs
            hidden_states_len = hs_len

        assert hs_len.dim() == 1

    if layer == -1:
        hidden_states = [hs.to("cpu") for hs in all_hs]
        hidden_states_len = [hs_len.to("cpu") for hs_len in all_hs_len]
        
    return hidden_states, hidden_states_len

def pack_features(hidden_states, hidden_states_len, ids, layer, features_dict, features_list):
    num_inputs = len(ids)
    for input_id in range(num_inputs):
        wav_id = ids[input_id]
        
        # print(hidden_states_len, input_id, layer)
        hs = unpad(hidden_states, hidden_states_len, input_id, layer)
        features_dict[wav_id] = hs
        
        features_list.append(hs)
        # features_list_1D.append(hs)
        
    return features_dict, features_list

def extract_features(model, wav_paths, wav_ids, batch_size, layer_id, device="cuda"):
    num_wavs = len(wav_paths)
    features_dict = {}
    features_list = []
    
    means_total = []
    std_total = []
    mean_std_total = []
    uid_total = []
    
    i = 0
    for bid in tqdm(range(0, num_wavs, batch_size)):
        # if i >= 5:
        #     break
        i += 1
        end_id = min(bid + batch_size, num_wavs)
        
        padded_wavs, wavs_len, ids = get_batch(wav_paths, wav_ids, bid, end_id)
        hidden_states, hidden_states_len = get_features(model, padded_wavs, wavs_len, layer_id, used_device)
        
        # print('hidden_states',hidden_states.shape)
        # print(ids)
        
        hidden_states = hidden_states.squeeze(dim=0)
        
        means = torch.mean(hidden_states, dim=0)
        means_total.append(means.numpy())
        
        stds = torch.std(hidden_states, dim=0)
        std_total.append(stds.numpy())
        
        mean_std = torch.cat((means, stds), dim=0)
        mean_std_total.append(mean_std.numpy())
        
        uid_total.append(ids)
        
        features_dict, features_list = pack_features(hidden_states, hidden_states_len, ids, layer_id, features_dict, features_list)
        
    return features_dict, features_list, means_total, std_total, mean_std_total,  uid_total

# output [batch size, time length, 768]
features_dict, features_list, means_total, std_total, mean_std_total, uid_total = extract_features(
            model = model,
            wav_paths = data_path_list,
            wav_ids = cID_type_list,
            batch_size = 1,
            layer_id = 12, # -1 all layers
            device = used_device
        )

# means_total = []
# std_total = []
# mean_std_total = []
# uid_total = []

# for uid, rep in features_dict.items():
#     print(rep.shape)
    
#     # means and stds
#     means = torch.mean(rep, dim=0)
#     means_total.append(means.numpy())
    
#     stds = torch.std(rep, dim=0)
#     std_total.append(stds.numpy())
    
#     mean_std = torch.cat((means, stds), dim=0)
#     mean_std_total.append(mean_std)
    
#     uid_total.append(uid)
    
    # print('uid', uid)
    # print('means',means_total[0])
    # print('stds',std_total[0])
    
# print(len(means_total[1]))
# save the embeddings
# save path

#################################################wav2vec2#################################################
save_path = '/mnt/work/Animal/output/embeddings/wav2vec2/first_ex'

em_save = True
# em_save = False
if em_save:
    save_df = pd.DataFrame(means_total)
    save_df.to_csv(os.path.join(save_path, 'embedding.csv'), index=False, header=False)
    print(save_df.head())
    
    caller_type_df = pd.DataFrame(uid_total) 
    caller_type_df.to_csv(os.path.join(save_path, 'caller_type_label.csv'), index=False, header=False)
    print(caller_type_df.head())




Total Parameters: 95044608
Trainable Parameters: 95044608


100%|██████████| 20018/20018 [17:37<00:00, 18.94it/s]


In [52]:
######################################################hubert######################################################
import json
import os
import torch
import numpy as np
from tqdm import tqdm
import pandas as pd
from s3prl.nn import S3PRLUpstream

torch.cuda.set_device(1)
used_device =  torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = S3PRLUpstream("hubert").to(used_device)
model.eval()

def print_model_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total Parameters: {total_params}")
    print(f"Trainable Parameters: {trainable_params}")

# print(model)
print_model_parameters(model)

refer_info_path = '/mnt/work/dataset/preprocessed/great_bird/data_info'+'/'+'ori_test_info.json'
# refer_info_path = '/mnt/work/dataset/preprocessed/great_bird/data_info'+'/'+'ori_data_info_caller.json'

wav_data_path = '/mnt/work/dataset/preprocessed/great_bird/original_data'

def process_subset_file(data_path):     
    # total data list
    data_path_list = [] # npy file path
    c_id_index_list = []
    cID_list = []
    cID_type_list = []
    with open(data_path, 'r') as f:
        data_info = json.load(f)

    for caller_ID in data_info:
        for caller_type in data_info[caller_ID]:
            for item in data_info[caller_ID][caller_type]:
                
                # reduce too len
                # before 240214 
                # if item['len_mel'] >= 199 or item['len_mel'] <= 99:
                #     continue
                
                # after 240214
                if item['len_mel'] >= 399 or item['len_mel'] <= 99:
                    continue
                
                class_id_index = item['class_id_index']

                # wav file path
                wav_path = item['original_wav']
                
                data_path_list.append(wav_path)
                # class_id_index eg 20221W67_0_0
                # c_id_index_list.append(class_id_index)
                # caller id
                cID_list.append(caller_ID)
                # caller type 
                cID_type_list.append(caller_type)
                
    return data_path_list, cID_list, cID_type_list


data_path_list, cID_list, cID_type_list = process_subset_file(refer_info_path)


def unpad(hs, hs_len, index, layer):
    if layer == -1:
        hs_unpadded = []
        for h, lens in zip(hs, hs_len):
            l = lens[index]
            hs_unpadded.append(h[index, :l])
    else:
        l = hs_len[index]
        hs_unpadded = hs[index, :l]
    return hs_unpadded

def get_batch(wav_paths, wav_ids, start, end):
    def getitem(index):
        import soundfile as sf

        path = wav_paths[index]
        wav, curr_sample_rate = sf.read(path, dtype="float32")
        wav /= np.max(np.abs(wav))

        feats = torch.from_numpy(wav).float()
        return feats

    def collate(wavs, padding_value: int = 0):
        from torch.nn.utils.rnn import pad_sequence

        padded_wavs = pad_sequence(
            wavs, batch_first=True, padding_value=padding_value
        )
        return padded_wavs

    end_id = min(end, len(wav_paths))
    wavs = [getitem(index) for index in range(start, end_id)]
    ids = [wav_ids[index] for index in range(start, end_id)]
    wavs_len = [len(wav) for wav in wavs]
    padded_wavs = collate(wavs)
    
    return padded_wavs, wavs_len, ids

def get_features(model, wavs, wavs_len, layer=-1, device="cuda"):
    with torch.no_grad():
        wavs_len = torch.LongTensor(wavs_len).to(device)
        # print('wavs_len',wavs_len)
        all_hs, all_hs_len = model(wavs.to(device), wavs_len)
        # print('all_hs',all_hs[0].shape,'all_hs_len',all_hs_len)

    for layer_id, (hs, hs_len) in enumerate(zip(all_hs, all_hs_len)):
        hs = hs.to("cpu")
        hs_len = hs_len.to("cpu")
        assert isinstance(hs, torch.FloatTensor)
        assert isinstance(hs_len, torch.LongTensor)

        if layer == layer:
            hidden_states = hs
            hidden_states_len = hs_len

        assert hs_len.dim() == 1

    if layer == -1:
        hidden_states = [hs.to("cpu") for hs in all_hs]
        hidden_states_len = [hs_len.to("cpu") for hs_len in all_hs_len]
        
    return hidden_states, hidden_states_len

def pack_features(hidden_states, hidden_states_len, ids, layer, features_dict, features_list):
    num_inputs = len(ids)
    for input_id in range(num_inputs):
        wav_id = ids[input_id]
        
        # print(hidden_states_len, input_id, layer)
        hs = unpad(hidden_states, hidden_states_len, input_id, layer)
        features_dict[wav_id] = hs
        
        features_list.append(hs)
        # features_list_1D.append(hs)
        
    return features_dict, features_list

def extract_features(model, wav_paths, wav_ids, batch_size, layer_id, device="cuda"):
    num_wavs = len(wav_paths)
    features_dict = {}
    features_list = []
    
    means_total = []
    std_total = []
    mean_std_total = []
    uid_total = []
    
    i = 0
    for bid in tqdm(range(0, num_wavs, batch_size)):
        # if i >= 5:
        #     break
        i += 1
        end_id = min(bid + batch_size, num_wavs)
        
        padded_wavs, wavs_len, ids = get_batch(wav_paths, wav_ids, bid, end_id)
        hidden_states, hidden_states_len = get_features(model, padded_wavs, wavs_len, layer_id, used_device)
        
        # print('hidden_states',hidden_states.shape)
        # print(ids)
        
        hidden_states = hidden_states.squeeze(dim=0)
        
        means = torch.mean(hidden_states, dim=0)
        means_total.append(means.numpy())
        
        stds = torch.std(hidden_states, dim=0)
        std_total.append(stds.numpy())
        
        mean_std = torch.cat((means, stds), dim=0)
        mean_std_total.append(mean_std.numpy())
        
        uid_total.append(ids)
        
        features_dict, features_list = pack_features(hidden_states, hidden_states_len, ids, layer_id, features_dict, features_list)
        
    return features_dict, features_list, means_total, std_total, mean_std_total,  uid_total

# output [batch size, time length, 768]
features_dict, features_list, means_total, std_total, mean_std_total, uid_total = extract_features(
            model = model,
            wav_paths = data_path_list,
            wav_ids = cID_type_list,
            batch_size = 1,
            layer_id = 12, # -1 all layers
            device = used_device
        )

# means_total = []
# std_total = []
# mean_std_total = []
# uid_total = []

# for uid, rep in features_dict.items():
#     print(rep.shape)
    
#     # means and stds
#     means = torch.mean(rep, dim=0)
#     means_total.append(means.numpy())
    
#     stds = torch.std(rep, dim=0)
#     std_total.append(stds.numpy())
    
#     mean_std = torch.cat((means, stds), dim=0)
#     mean_std_total.append(mean_std)
    
#     uid_total.append(uid)
    
    # print('uid', uid)
    # print('means',means_total[0])
    # print('stds',std_total[0])
    
# print(len(means_total[1]))
# save the embeddings
# save path
save_path = '/mnt/work/Animal/output/embeddings/hubert/first_ex'

em_save = True
# em_save = False
if em_save:
    save_df = pd.DataFrame(means_total)
    save_df.to_csv(os.path.join(save_path, 'embedding.csv'), index=False, header=False)
    print(save_df.head())
    
    caller_type_df = pd.DataFrame(uid_total) 
    caller_type_df.to_csv(os.path.join(save_path, 'caller_type_label.csv'), index=False, header=False)
    print(caller_type_df.head())



Total Parameters: 94697600
Trainable Parameters: 94697600


100%|██████████| 20018/20018 [13:26<00:00, 24.82it/s]


OSError: Cannot save file into a non-existent directory: '/mnt/work/Animal/output/embeddings/hubert/first_ex'

In [3]:
######################################################vq_apc######################################################
import json
import os
import torch
import numpy as np
from tqdm import tqdm
import pandas as pd
from s3prl.nn import S3PRLUpstream

# torch.cuda.set_device(1)
torch.cuda.set_device(0)

used_device =  torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = S3PRLUpstream("vq_apc").to(used_device)
model.eval()

def print_model_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total Parameters: {total_params}")
    print(f"Trainable Parameters: {trainable_params}")

# print(model)
print_model_parameters(model)

refer_info_path = '/mnt/work/dataset/preprocessed/great_bird/data_info'+'/'+'ori_test_info.json'
# refer_info_path = '/mnt/work/dataset/preprocessed/great_bird/data_info'+'/'+'ori_data_info_caller.json'
wav_data_path = '/mnt/work/dataset/preprocessed/great_bird/original_data'

def process_subset_file(data_path):     
    # total data list
    data_path_list = [] # npy file path
    c_id_index_list = []
    cID_list = []
    cID_type_list = []
    with open(data_path, 'r') as f:
        data_info = json.load(f)

    for caller_ID in data_info:
        for caller_type in data_info[caller_ID]:
            for item in data_info[caller_ID][caller_type]:
                
                # reduce too len
                # before 240214 
                # if item['len_mel'] >= 199 or item['len_mel'] <= 99:
                #     continue
                
                # after 240214
                if item['len_mel'] >= 399 or item['len_mel'] <= 99:
                    continue
                
                class_id_index = item['class_id_index']

                # wav file path
                wav_path = item['original_wav']
                
                data_path_list.append(wav_path)
                # class_id_index eg 20221W67_0_0
                # c_id_index_list.append(class_id_index)
                # caller id
                cID_list.append(caller_ID)
                # caller type 
                cID_type_list.append(caller_type)
                
    return data_path_list, cID_list, cID_type_list


data_path_list, cID_list, cID_type_list = process_subset_file(refer_info_path)


def unpad(hs, hs_len, index, layer):
    if layer == -1:
        hs_unpadded = []
        for h, lens in zip(hs, hs_len):
            l = lens[index]
            hs_unpadded.append(h[index, :l])
    else:
        l = hs_len[index]
        hs_unpadded = hs[index, :l]
    return hs_unpadded

def get_batch(wav_paths, wav_ids, start, end):
    def getitem(index):
        import soundfile as sf

        path = wav_paths[index]
        wav, curr_sample_rate = sf.read(path, dtype="float32")
        wav /= np.max(np.abs(wav))

        feats = torch.from_numpy(wav).float()
        return feats

    def collate(wavs, padding_value: int = 0):
        from torch.nn.utils.rnn import pad_sequence

        padded_wavs = pad_sequence(
            wavs, batch_first=True, padding_value=padding_value
        )
        return padded_wavs

    end_id = min(end, len(wav_paths))
    wavs = [getitem(index) for index in range(start, end_id)]
    ids = [wav_ids[index] for index in range(start, end_id)]
    wavs_len = [len(wav) for wav in wavs]
    padded_wavs = collate(wavs)
    
    return padded_wavs, wavs_len, ids

def get_features(model, wavs, wavs_len, layer=-1, device="cuda"):
    with torch.no_grad():
        wavs_len = torch.LongTensor(wavs_len).to(device)
        # print('wavs_len',wavs_len)
        all_hs, all_hs_len = model(wavs.to(device), wavs_len)
        # print('all_hs',all_hs[0].shape,'all_hs_len',all_hs_len)

    for layer_id, (hs, hs_len) in enumerate(zip(all_hs, all_hs_len)):
        hs = hs.to("cpu")
        hs_len = hs_len.to("cpu")
        assert isinstance(hs, torch.FloatTensor)
        assert isinstance(hs_len, torch.LongTensor)

        if layer == layer:
            hidden_states = hs
            hidden_states_len = hs_len

        assert hs_len.dim() == 1

    if layer == -1:
        hidden_states = [hs.to("cpu") for hs in all_hs]
        hidden_states_len = [hs_len.to("cpu") for hs_len in all_hs_len]
        
    return hidden_states, hidden_states_len

def pack_features(hidden_states, hidden_states_len, ids, layer, features_dict, features_list):
    num_inputs = len(ids)
    for input_id in range(num_inputs):
        wav_id = ids[input_id]
        
        # print(hidden_states_len, input_id, layer)
        hs = unpad(hidden_states, hidden_states_len, input_id, layer)
        features_dict[wav_id] = hs
        
        features_list.append(hs)
        # features_list_1D.append(hs)
        
    return features_dict, features_list

def extract_features(model, wav_paths, wav_ids, batch_size, layer_id, device="cuda"):
    num_wavs = len(wav_paths)
    features_dict = {}
    features_list = []
    
    means_total = []
    std_total = []
    mean_std_total = []
    uid_total = []
    
    i = 0
    for bid in tqdm(range(0, num_wavs, batch_size)):
        # if i >= 5:
        #     break
        i += 1
        end_id = min(bid + batch_size, num_wavs)
        
        padded_wavs, wavs_len, ids = get_batch(wav_paths, wav_ids, bid, end_id)
        hidden_states, hidden_states_len = get_features(model, padded_wavs, wavs_len, layer_id, used_device)
        
        # print('hidden_states',hidden_states.shape)
        # print(ids)
        
        hidden_states = hidden_states.squeeze(dim=0)
        
        means = torch.mean(hidden_states, dim=0)
        means_total.append(means.numpy())
        
        stds = torch.std(hidden_states, dim=0)
        std_total.append(stds.numpy())
        
        mean_std = torch.cat((means, stds), dim=0)
        mean_std_total.append(mean_std.numpy())
        
        uid_total.append(ids)
        
        features_dict, features_list = pack_features(hidden_states, hidden_states_len, ids, layer_id, features_dict, features_list)
        
    return features_dict, features_list, means_total, std_total, mean_std_total,  uid_total

# output [batch size, time length, 768]
features_dict, features_list, means_total, std_total, mean_std_total, uid_total = extract_features(
            model = model,
            wav_paths = data_path_list,
            wav_ids = cID_type_list,
            batch_size = 1,
            layer_id = 12, # -1 all layers
            device = used_device
        )

# means_total = []
# std_total = []
# mean_std_total = []
# uid_total = []

# for uid, rep in features_dict.items():
#     print(rep.shape)
    
#     # means and stds
#     means = torch.mean(rep, dim=0)
#     means_total.append(means.numpy())
    
#     stds = torch.std(rep, dim=0)
#     std_total.append(stds.numpy())
    
#     mean_std = torch.cat((means, stds), dim=0)
#     mean_std_total.append(mean_std)
    
#     uid_total.append(uid)
    
    # print('uid', uid)
    # print('means',means_total[0])
    # print('stds',std_total[0])
    
# print(len(means_total[1]))
# save the embeddings
# save path
save_path = '/mnt/work/Animal/output/embeddings/vq_apc/first_ex'

em_save = True
# em_save = False
if em_save:
    save_df = pd.DataFrame(means_total)
    save_df.to_csv(os.path.join(save_path, 'embedding.csv'), index=False, header=False)
    print(save_df.head())
    
    caller_type_df = pd.DataFrame(uid_total) 
    caller_type_df.to_csv(os.path.join(save_path, 'caller_type_label.csv'), index=False, header=False)
    print(caller_type_df.head())



Total Parameters: 4630096
Trainable Parameters: 4630096


100%|██████████| 20018/20018 [11:40<00:00, 28.57it/s]


        0         1         2         3         4         5         6    \
0  0.671216  0.589576 -0.779488 -0.407928 -0.257567 -0.544122 -0.991560   
1  0.735563  0.751959 -0.653569 -0.427348 -0.463505 -0.302425 -1.059371   
2  0.711596  0.530423 -0.895867 -0.525112 -0.434542 -0.171986 -1.082592   
3  0.837570  0.883121 -0.830981 -0.392587 -0.226456 -0.279167 -1.066869   
4  0.563029  0.687522 -0.653995 -0.474979 -0.207672 -0.269855 -0.934507   

        7         8         9    ...       502       503       504       505  \
0  0.616165 -0.582342  0.992087  ... -1.142268 -0.842943  0.814530  0.189002   
1  0.360494 -0.849294  0.961290  ... -1.110213 -0.544218  0.813327  0.299149   
2  0.442133 -0.756097  0.593512  ... -1.144335 -0.644250  0.809140  0.529178   
3  0.640621 -0.738344  0.961360  ... -1.176123 -0.720220  0.897317  0.117179   
4  0.580482 -0.832433  1.138621  ... -1.246126 -0.677829  0.969136  0.369084   

        506       507       508       509       510       511  
0  0

In [6]:
length_of_first_row = save_df.shape[1]
print("Length of the first row:", length_of_first_row)

Length of the first row: 512


In [2]:
features_dict.keys()

for uid, rep in features_dict.items(): 
    print(uid,len(rep))
    for l in range(len(rep)):
        mean = torch.mean(rep[l], dim=0)
        print(mean.shape)

B101_0 270
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])

In [4]:

len(features_list)
# features_list

1

In [None]:
import soundfile as sf
import os

print(data_path_list[0])
print(len(data_path_list))
print(cID_type_list[0])

file_path = '/mnt/work/dataset/preprocessed/great_bird/original_data/20221B101_0_69.wav'
if os.path.exists(file_path):
    print(f"The file {file_path} exists.")
else:
    print(f"The file {file_path} does not exist.")

wav, curr_sample_rate = sf.read('/mnt/work/dataset/preprocessed/great_bird/original_data/20221B101_0_69.wav', dtype="float32")
print(len(wav))
