In [2]:
import sys # Python system library needed to load custom functions
import math # module with access to mathematical functions
import os # for changing the directory

import numpy as np  # for performing calculations on numerical arrays
import pandas as pd  # home of the DataFrame construct, _the_ most important object for Data Science

from IPython.display import Audio # for listening to our insects
import IPython
from scipy.fft import fft # function to calculate Fast Fourier Transform

import matplotlib.pyplot as plt  # allows creation of insightful plots
import seaborn as sns # another library to make even more beautiful plots

sys.path.append('../../src') # add the source directory to the PYTHONPATH. This allows to import local functions and modules.
# enable rendering plots under the code cell that created it
%matplotlib inline

from eda_utils import show_sampling, signal_generator, plot_random_spec, plot_spec, plot_waveform # functions to create plots for and from audio data
from gdsc_utils import download_directory, PROJECT_DIR # function to download GDSC data from S3 bucket and our root directory
from config import DEFAULT_BUCKET  # S3 bucket with the GDSC data
import warnings
warnings.filterwarnings("ignore")

os.chdir(PROJECT_DIR) # changing our directory to root

In [3]:
#import csv
df = pd.read_csv('notebooks/Dominik/effnet_baseline/20_crop-15e_noiseAug_CE_weighted_LS_pp_impulse2/val_predictions.csv')
df.head()

In [4]:
df_eval = df[['label', 'predicted_class_id']]
df_eval

In [5]:
import seaborn as sns
import matplotlib.pyplot as plt

### Confusion Matrix
from sklearn.metrics import confusion_matrix
#predictions = model.predict(x_test, steps=len(x_test), verbose=0)
#y_pred=model.predict(x_test)
#y_pred = np.round(y_pred)
y_pred = df_eval['predicted_class_id']

y_true=df_eval['label']

cm = confusion_matrix(y_true, y_pred)
np.save("notebooks/Dominik/effnet_baseline/20_crop-15e_noiseAug_CE_weighted_LS_pp_impulse2/cm.npy", cm)
## Get Class Labels
#labels = le.classes_
#class_names = labels
#class_names=[0:66]
# Plot confusion matrix in a beautiful manner
fig = plt.figure(figsize=(16, 14))
ax= plt.subplot()
sns.heatmap(cm, annot=True, ax = ax, fmt = 'g', cmap="magma", mask=cm==0, vmax=10); #annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted', fontsize=20)
ax.xaxis.set_label_position('bottom')
plt.xticks(rotation=90)
#ax.xaxis.set_ticklabels(class_names, fontsize = 10)
#ax.xaxis.tick_bottom()

ax.set_ylabel('True', fontsize=20)
#ax.yaxis.set_ticklabels(class_names, fontsize = 10)
plt.yticks(rotation=0)

plt.title('Confusion Matrix Validation Set', fontsize=20)

plt.savefig('notebooks/Dominik/effnet_baseline/20_crop-15e_noiseAug_CE_weighted_LS_pp_impulse2/conf_matrix_best_model.png')
plt.show()

In [6]:
from sklearn import metrics
report = (metrics.classification_report(y_true, y_pred, digits=3,  output_dict=True))
evaluation = pd.DataFrame(report).transpose()
evaluation

In [7]:
evaluation["accuracy"] = ""
#evaluation['class'] = ""
wrong = 0
for i in range(0,66):
    df_to_eval = df_eval[df_eval['label'] == i]
    for j in df_to_eval['predicted_class_id']:
        if j != i:
            wrong += 1
        else:
            continue
    #y_pred = df_to_eval['predicted_class_id']
    #y_true = df_to_eval['label']
    #evaluation['class'][i] = i
    evaluation['accuracy'][i] = (len(df_to_eval)-wrong)/len(df_to_eval)
    wrong = 0
evaluation

In [8]:
evaluation.to_csv('notebooks/Dominik/effnet_baseline/20_crop-15e_noiseAug_CE_weighted_LS_pp_impulse2/val_evaluation.csv') 

In [9]:
"""
df_f1_score = pd.DataFrame(columns=['class','accuracy','f1_score'])
wrong = 0
for i in range(0,66):
    df_to_eval = df_eval[df_eval['label'] == i]
    for j in df_to_eval['predicted_class_id']:
        if j != i:
            wrong += 1
        else:
            continue
    #y_pred = df_to_eval['predicted_class_id']
    #y_true = df_to_eval['label']
    df_f1_score['class'][i] = i
    df_f1_score['f1_score'][i] = wrong/len(df_to_eval)
    wrong=0
    #df_f1_score['f1_score'][i] = f1_score(y_true, y_pred)
df_f1_score.head()"""

In [10]:
#df_tetet = df_eval[df_eval['label'] == 0]
#len(df_tetet)

In [9]:
#evaluation.drop(evaluation.tail(3).index,inplace=True) #

In [10]:
#pd.options.display.float_format = "{:,.2f}".format

In [11]:
#evaluation.sort_values('accuracy').tail(15)

# EVALUATION OF CLASSES

In [25]:
#import csv
#df_old = pd.read_csv('notebooks/Dominik/effnet_baseline/15_crop-15e_noiseAug/val_evaluation.csv')
#df_old.head()

In [26]:
#df_old.sort_values('accuracy')

In [12]:
df_new = pd.read_csv('notebooks/Dominik/effnet_baseline/20_crop-15e_noiseAug_CE_weighted_LS_pp_impulse2/val_evaluation.csv')
df_new

In [13]:
df_new.sort_values('accuracy')

In [14]:
df_new.sort_values('accuracy').head(30)

In [15]:
df_new.sort_values('f1-score').head(8)

In [16]:
df_new.sort_values('accuracy').head(8)

In [17]:
df_new.sort_values('f1-score').head(6)

In [18]:
sorted_df = df_new.sort_values('accuracy')#.head(8)

In [19]:
worst = list(sorted_df['Unnamed: 0'])

In [20]:
df = pd.read_csv('data/metadata.csv')
df.head()

In [21]:
df = df[df['length']<75]

In [38]:
df_train = df[df['subset']=='train']
df_val = df[df['subset']=='validation']

In [None]:
for i in worst[:10]:
    print(i)
    paths = list(df[df['label']==int(i)].sample(4)['path'])
    plot_spec(paths)
    for path in paths:
        IPython.display.display(Audio(path))

In [None]:
for i in worst[:10]:
    print(i)
    paths = list(df_train[df_train['label']==int(i)].sample(4)['path'])
    plot_spec(paths)
    for path in paths:
        IPython.display.display(Audio(path))

In [None]:
for i in worst[:10]:
    print(i)
    paths = list(df_val[df_val['label']==int(i)].sample(2)['path'])
    plot_spec(paths)
    for path in paths:
        IPython.display.display(Audio(path))