In [55]:
import torch
import numpy as np
import datetime
import glob
import os
import pandas

from torch.utils.data import DataLoader

from model.custom_model import CustomAudioCLIP
from prediction_scripts._utils import AudioList

In [56]:
def initModel(mpath):
    m = CustomAudioCLIP(num_target_classes=2).load_from_checkpoint(mpath, num_target_classes=2)
    return m

In [58]:
m = initModel("/app/assets/ckpt-epoch=21-val_loss=0.12-lr=0.005.ckpt")
torch.save(m, "/app/assets/anode.pth")

# OTHER SCRIPTS

In [81]:
import numpy as np
import torch
import yaml
import itertools
import glob
import pandas as pd

from torch.quantization import quantize_dynamic
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import precision_recall_curve
from sklearn.preprocessing import LabelEncoder

from prediction_scripts._utils import openAudioFile, splitSignal

class AudioList():

    def __init__(self, length_segments = 3, sample_rate=44100):
        self.sample_rate = sample_rate
        self.length_segments = length_segments

    def read_audio(self, audio_path):
        """Read the audio, change the sample rate and randomly pick one channel"""
        sig, _ = openAudioFile(audio_path, sample_rate=self.sample_rate)
        return sig

    def split_segment(self, array):
        splitted_array = splitSignal(array, rate=self.sample_rate, seconds=self.length_segments, overlap=0, minlen=3)
        return splitted_array

    def get_labels(self, splitted_list, label):
        arrays_label = []
        for array in splitted_list:
            array_label = (array, label)
            arrays_label.append(array_label)
        return arrays_label

    def get_processed_list(self, audio_path):

        list_segments = []

        for item in audio_path:
            track = self.read_audio(item)        
            label = item.split("/")[-2]
            list_divided = self.split_segment(track)
            list_arr_label = self.get_labels(list_divided, label)
            list_segments.append(list_arr_label)
        return list_segments

class AudioLoader(Dataset):
    def __init__(self, list_data, label_encoder, sr=44100, transform=None):
        self.data = list_data
        self.label_encoder = label_encoder
        self.transform = transform
        self.sr=sr

    def __len__(self):
        return len(self.data)

    def process_data(self, data):

        array, label = data
        array = array.reshape(1, -1)
        array = torch.tensor(array)

        label_encoded = self.label_encoder.one_hot_sample(label)
        label_class = torch.argmax(label_encoded)

        return (array, label_class)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        tensor, label = self.process_data(self.data[idx])
        return tensor, label

    def get_labels(self):
        list_labels = []
        for x,y in self.data:
            list_labels.append(y)
        return list_labels

class EncodeLabels():
    """
    Function that encodes names of folders as numerical labels
    Wrapper around sklearn's LabelEncoder
    """
    def __init__(self, path_to_folders):
        self.path_to_folders = path_to_folders
        self.class_encode = LabelEncoder()
        self._labels_name()

    def _labels_name(self):
        labels = glob.glob(self.path_to_folders + "/*")
        labels = [l.split("/")[-1] for l in labels]
        self.class_encode.fit(labels)
        
    def __getLabels__(self):
        return self.class_encode.classes_

    def to_one_hot(self, codec, values):
        value_idxs = codec.transform(values)
        return torch.eye(len(codec.classes_))[value_idxs]

    def one_hot_sample(self, label):
        t_label = self.to_one_hot(self.class_encode, [label])
        return t_label

def initModel(model_path):
    m = torch.load(model_path).eval()
    m_q = quantize_dynamic(m, qconfig_spec={torch.nn.Linear, torch.nn.Conv2d}, dtype=torch.qint8)
    return m_q
    
def getPredLoader(list_arrays, l):
    list_preds = AudioLoader(list_arrays, label_encoder)
    predLoader = DataLoader(list_preds, batch_size=1, num_workers=4, pin_memory=False)
    return predLoader

def predict(testLoader, model):

    proba_list = []
    label_list = []

    for array, label in testLoader:
        tensor = torch.tensor(array)
        output = model(tensor)
        output = np.exp(output.detach().numpy())
        proba_list.append(output[0][1])
        label_list.append(label[0])

    return (np.array(proba_list), np.array(label_list))

In [47]:
with open("/app/prediction_scripts/config.yaml") as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

In [48]:

###################
# Get the dataset #
###################

allFiles = [f for f in glob.glob(config["INPUT_PATH"] + "/**/*", recursive=True) if os.path.isfile(f)]
allFiles = [f for f in allFiles if f.endswith( (".WAV", ".wav", ".mp3") )]

# Instantiate the audio iterator class - cut the audio into segments
audio_list= AudioList(length_segments=config["SIG_LENGTH"], sample_rate=config["SAMPLE_RATE"])

list_test = audio_list.get_processed_list(allFiles)
list_test = list(itertools.chain.from_iterable(list_test))

###########################
# Create the labelEncoder #
###########################
label_encoder = EncodeLabels(path_to_folders=config["INPUT_PATH"])

# Save name of the folder and associated label in a json file
l = label_encoder.__getLabels__()
t = label_encoder.class_encode.transform(l)

folder_labels = []
for i, j in zip(l,t):
    item = {"Folder": i, "Label": int(j)}
    folder_labels.append(item)

In [79]:
audioloader = AudioLoader(list_test, label_encoder)
predLoader = DataLoader(audioloader, batch_size=1, num_workers=4, pin_memory=False)
model = initModel(config["MODEL"])
proba_list, labels = predict(predLoader, model)

  tensor = torch.tensor(array)


In [92]:
precision, recall, thresholds = precision_recall_curve(labels, proba_list)
thresholds = np.append(thresholds, 1)

array([0.96888715, 0.97043675, 0.97414303, 0.97651154, 0.9881323 ,
       0.98857063, 0.99071723, 0.99147403, 0.9926458 , 0.99370641,
       0.9941631 , 0.99424696, 0.99455768, 0.99456263, 0.99588394,
       0.99641901, 0.99698162, 0.9971925 , 0.9974699 , 0.99755955,
       0.9975943 , 0.99800658, 0.99805939, 0.99812984, 0.9981305 ,
       0.99817747, 0.99863452, 0.99890178, 1.        ])

In [94]:
d = {'thresholds': thresholds, 'precision': precision, 'recall': recall}
df = pd.DataFrame(data=d)

In [None]:
df.to_csv()

: 

: 

In [14]:
import pandas as pd
import glob
import os

path = '/Data/audioCLIP/results' # use your path
all_files = glob.glob(path + "/*")
print(all_files)
li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    df_conf = df[df["confidence"] > 0.99]
    print(len(df_conf))
    li.append(df)

frame = pd.concat(li, axis=0, ignore_index=True)

['/Data/audioCLIP/results/YELLMACS_20120131_105013.csv', '/Data/audioCLIP/results/YELLPAYP_20130124_083717.csv', '/Data/audioCLIP/results/YELLMJ2B_20140327_154332.csv', '/Data/audioCLIP/results/YELLSYL3_20130206_223122.csv', '/Data/audioCLIP/results/YELLMM8K_20080214_021244.csv', '/Data/audioCLIP/results/YELLSYL3_20130207_075037.csv', '/Data/audioCLIP/results/YELLFOPP_20141229_110225.csv', '/Data/audioCLIP/results/YELLMJ23_20111229_154155.csv']
275
808
37
0
0
13
568
578


In [17]:
! ffmpeg -i /Data/audioCLIP/files_to_split/YELLCRPA_20110112_060550.MP3 -c copy -map 0 -segment_time 00:30:00 -f segment /Data/audioCLIP/output%03d.mp3

ffmpeg version 4.3 Copyright (c) 2000-2020 the FFmpeg developers
  built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)
  configuration: --prefix=/opt/conda --cc=/opt/conda/conda-bld/ffmpeg_1597178665428/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-gnutls --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame
  libavutil      56. 51.100 / 56. 51.100
  libavcodec     58. 91.100 / 58. 91.100
  libavformat    58. 45.100 / 58. 45.100
  libavdevice    58. 10.100 / 58. 10.100
  libavfilter     7. 85.100 /  7. 85.100
  libavresample   4.  0.  0 /  4.  0.  0
  libswscale      5.  7.100 /  5.  7.100
  libswresample   3.  7.100 /  3.  7.100
[0;35m[mp3 @ 0x55be63d6ccc0] [0m[0;33mEstimating duration from bitrate, this may be inaccurate
[0mInput #0, mp3, from '/Data/audioCLIP/files_to_split/YELLCRPA_2011011