In [None]:
import das.train 
from helper import RenderJSON
import helper, config_adapter
import pandas as pd, numpy as np,xarray as xr, yaml
from pathlib import Path
import h5py
import scipy.io

In [None]:
import itables
itables.init_notebook_mode(all_interactive=True )
itables.options.maxBytes = "1MB"
itables.options.lengthMenu = [25, 10, 50, 100, 200]
itables.options.buttons = ["copyHtml5", "csvHtml5", "excelHtml5"]
itables.options.layout={"topEnd": "pageLength", "top1": "searchBuilder"}

In [None]:
model_dir = Path("./model")
params = yaml.safe_load(Path("params.yaml").open("r"))
RenderJSON(params)

In [None]:
config_path = Path(params["config_path"])
config = config_adapter.load(config_path)
RenderJSON(config)

In [None]:
annotations = pd.read_csv(params["annotation_path"]).rename(columns={"name": "label", "start_seconds": "start", "stop_seconds": "end"}).sort_values("start")
annotations

In [None]:
fs, data = scipy.io.wavfile.read(params["audio_path"])
song = xr.Dataset()
song["data"] = xr.DataArray(data, dims="t")
song["t"] = np.arange(data.size)/fs
song["t"].attrs["fs"] = fs
song

In [None]:

song["label"] = xr.DataArray(np.full(song["data"].size, "noise", dtype=object), dims="t")
song["goal"] = xr.DataArray(np.full(song["data"].size, None, dtype=object), dims="t")
annotations["next_start"] = annotations["start"].shift(-1, fill_value=np.inf)
annotations["prev_end"] = annotations["end"].shift(1, fill_value=-np.inf)
for row in annotations.to_dict(orient="index").values():
    song["label"] = xr.where((song["t"] >= row["start"]) & (song["t"] < row["end"]), row["label"], song["label"])
    song["goal"] = xr.where((song["t"] >= (row["start"] + row["prev_end"])/2) & (song["t"] <= (row["end"] + row["next_start"])/2), row["goal"], song["goal"])
if song["goal"].isnull().any():
    raise Exception("null goal problem...")
song

In [None]:
labels_to_num = {l:i for i,l in enumerate(["noise"] + [l for l in annotations["label"].drop_duplicates() if not l=="noise"])}
num_to_labels = {i:l for l, i  in labels_to_num.items()}
song["syb"] = xr.DataArray(list(labels_to_num.keys()), dims="syb")
song["label_proba"] = (song["label"] == song["syb"]).astype(int)
display(song["label_proba"].groupby(song["goal"]).apply(lambda d: d.sum("t")).unstack().to_dataframe()["label_proba"].unstack("goal"))


In [None]:
goals = annotations["goal"].drop_duplicates().tolist()
datasets = {k: song.where(song["goal"] == k, drop=True) for k in goals}
all = {k: dict(x= datasets[k]["data"].to_numpy().reshape(-1, 1),y=datasets[k]["label_proba"].transpose("t", "syb").to_numpy()) for k in datasets}
attrs = dict(samplerate_x_Hz=fs, samplerate_y_Hz=fs, class_names=song["syb"].to_numpy(), class_types=["segment"]*song["syb"].size)
all["attrs"] = attrs
dataset_dir =Path("das_training_dataset.npy")

for folder in all:
    if folder != "attrs":
        (dataset_dir/folder).mkdir(exist_ok=True, parents=True)
        for arr in all[folder]:
            np.save(dataset_dir/folder/(arr + ".npy"),  all[folder][arr])
    else:
        np.save(dataset_dir/"attrs.npy", all["attrs"], allow_pickle=True)

In [None]:
model_dir.mkdir(exist_ok=True, parents=True)
model, desc, o = das.train.train(data_dir = dataset_dir, save_dir=str(model_dir), **config["das_train_params"], save_name="das")


In [None]:
import das.utils
results = das.utils.load_params(str(model_dir /"das"))
results["class_names"] = list(results["class_names"])
results["data_dir"] = str(results["data_dir"].resolve())
group_keys = {k:v for k, v in results.items() if not hasattr(v, "__getitem__") or isinstance(v, str)}
results["other"] = group_keys
results = {k:v for k,v in results.items() if not k in group_keys.keys()}
display(RenderJSON(results))

In [None]:
try:
    confusion_matrix = pd.DataFrame(results["conf_mat"])
    if len(confusion_matrix.columns) != len(results["class_names"]):
        print("Confusion Matrix error due to mismatched lengths... Unknown problem probably originating from das")
    else:
        confusion_matrix.columns=results["class_names"]
        confusion_matrix.index=results["class_names"]
        confusion_matrix= confusion_matrix.rename_axis("labeled")
        confusion_matrix= confusion_matrix.rename_axis("predicted", axis=1)
        display(confusion_matrix)
except:
    print("Problem with confusion matrix...")

In [None]:
report = pd.DataFrame(results["report"])
report = report[[col for col in report.columns if not "avg" in col] + [col for col in report.columns if "avg" in col]]
report = report.transpose().reset_index(names="syb")
accuracy = report[report["syb"] == "accuracy"]["f1-score"].iat[0]
display(f"Model accuracy: {accuracy}")
print(f"Model accuracy: {accuracy}")
report = report[report["syb"] != "accuracy"]
report.set_index("syb")