In [1]:
import ast
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import numpy as np
import os
os.chdir('../')
import pandas as pd

from pathlib import Path

from src.utils.utils import GetSortedSpeciesCode
from src.utils.threshold_app import countFileLabels, findThreshold

In [4]:
TARGET_SPECIES = GetSortedSpeciesCode()

In [11]:
df = pd.read_csv(Path.cwd().joinpath('data', 'test_overall.csv'), header=[0, 1], index_col=0)
dfIndex = df.index
thres = []
for sp in TARGET_SPECIES:
  thres.append(dfIndex[np.argmax(df.loc[:, (sp, 'f0.5')])])

thres

[0.55, 0.71, 0.68, 0.54, 0.62, 0.66, 0.55, 0.44, 0.59]

In [None]:
sLabelPaths = sorted(Path.cwd().joinpath('data', 'raw', 'Label').glob('*.txt'))
countDF = countFileLabels(sLabelPaths)

## Load Predict Probability CSV
predictDF = pd.read_csv(Path.cwd().joinpath('data', 'TEST_APP.csv'), header=0)

## Find threshold for each species
maxThresDict, spThresDict = findThreshold(predictDF, countDF)

In [None]:
df = pd.DataFrame.from_dict(spThresDict)
precisionRecallDF = df.applymap(lambda x: x[2:4])
precisionRecallDF['threshold'] = np.around(np.arange(0, 1, 0.05), decimals=2)
precisionRecallDF=precisionRecallDF[['threshold']+TARGET_SPECIES]
precisionRecallDF[['threshold']+TARGET_SPECIES].to_csv(Path.cwd().joinpath('precisionRecall.csv'), header=True, index=False)

In [None]:
f = lambda x,y,z : (1+z**2) * (x * y) / (z**2 * x + y)

resDFIndex = pd.MultiIndex.from_product(
  [TARGET_SPECIES, ['precision', 'recall', 'f0.5', 'f1', 'f2']], 
)
thresList = np.around(np.arange(0, 1, 0.05), decimals=2)
resDF = pd.DataFrame(columns=thresList, index=resDFIndex)

for sp in TARGET_SPECIES:
  for index, row in enumerate(precisionRecallDF[sp]):
    resDF.loc[(sp, 'precision'), thresList[index]] = np.round(row[0], decimals=4)
    resDF.loc[(sp, 'recall'), thresList[index]] = np.round(row[1], decimals=4)
    resDF.loc[(sp, 'f0.5'), thresList[index]] = np.round(f(row[0], row[1], 0.5), decimals=4)
    resDF.loc[(sp, 'f1'), thresList[index]] = np.round(f(row[0], row[1], 1.0), decimals=4)
    resDF.loc[(sp, 'f2'), thresList[index]] = np.round(f(row[0], row[1], 2.0), decimals=4)

resDF.fillna(0, inplace=True)
resDF.T.to_csv(Path.cwd().joinpath('one-min-report.csv'), header=True, index=True)

In [None]:
def filterTarget(l):
  res = []
  for x in l:
    if x in TARGET_SPECIES:
      res.append(x)
  return res

def countLabel(df:pd.DataFrame):
  spDict = {sp:0 for sp in TARGET_SPECIES}
  for _, row in df.iterrows():
    for sp in row['label']:
      if sp in TARGET_SPECIES:
        spDict[sp] += 1
  return spDict

In [None]:
df = pd.read_csv(Path.cwd().joinpath('data', 'LABEL_SEG.csv'), header=0)
df['label'] = df['label'].apply(lambda x: filterTarget(ast.literal_eval(x)))

xcDF = df[df['file'].str.contains('XC\d')]
mlDF = df[df['file'].str.contains('ML\d')]
selfDF = df[~df['file'].str.contains('ML\d|XC\d')]

xcLabel = countLabel(xcDF)
mlLabel = countLabel(mlDF)
selfLabel = countLabel(selfDF)

In [None]:
labelDF = pd.DataFrame.from_records(
  [xcLabel, mlLabel, selfLabel],
  index=['XC', 'ML', 'SELF']
)
labelDF = labelDF.T

plt.style.use('ggplot')
fig = plt.figure(figsize=(16, 9))
ax = plt.gca()
ax.bar(x=TARGET_SPECIES, height=labelDF['XC']+labelDF['ML']+labelDF['SELF'])
ax.bar(x=TARGET_SPECIES, height=labelDF['XC']+labelDF['SELF'])
ax.bar(x=TARGET_SPECIES, height=labelDF['SELF'])
print(labelDF)

In [None]:
import librosa
import librosa.display

audio, sr = librosa.load(
  str(Path.cwd().joinpath('data', 'raw', 'GW01FOREST_20210401_060600.wav')),
  sr=None
)
nrAudio, sr = librosa.load(
  str(Path.cwd().joinpath('data', 'raw', 'NrAudio', 'GW01FOREST_20210401_060600.wav')),
  sr=None
)
mel = librosa.feature.melspectrogram(
  y=audio, sr=sr, n_fft=1024, hop_length=512, 
  n_mels=128, fmin=1000, fmax=10000
)
nrMel = librosa.feature.melspectrogram(
  y=nrAudio, sr=sr, n_fft=1024, hop_length=512, 
  n_mels=128, fmin=1000, fmax=10000
)
image = librosa.pcen(mel * (2**31), sr=sr, time_constant=0.06, gain=0.8, bias=10, power=0.25)
nrImage = librosa.pcen(nrMel * (2**31), sr=sr, time_constant=0.06, gain=0.8, bias=10, power=0.25)


fig, ax = plt.subplots(4, 1, figsize=(128, 72))
ax = ax.flatten()
librosa.display.waveshow(audio, sr=sr, ax=ax[0], color='b')
ax[0].set_xlim(0, 60)
librosa.display.specshow(
  image, y_axis='linear', x_axis='time',
  sr=sr, ax=ax[1], fmin=1000, fmax=10000, cmap='YlOrRd'
)
librosa.display.waveshow(nrAudio, sr=sr, ax=ax[2], color='b')
ax[2].set_xlim(0, 60)
librosa.display.specshow(
  nrImage, y_axis='linear', x_axis='time',
  sr=sr, ax=ax[3], fmin=1000, fmax=10000, cmap='YlOrRd'
)