In [1]:
import comet_ml
import os
import sys
from pathlib import Path

sys.path.append(str(Path().resolve().parent))
sys.path.append(str(Path().resolve().parent.parent))

from omegaconf import OmegaConf, DictConfig
from src.trainer.trainer import EbirdTask, EbirdDataModule
import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import CometLogger
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, LearningRateMonitor
from typing import Any, Dict, Tuple, Type, cast
from src.dataset.utils import set_data_paths
import pdb
import numpy as np


import pandas as pd
import torch.nn as nn
from PIL import Image
import torch

In [2]:

conf = OmegaConf.load("./configs/defaults.yaml")
config_fn = "./configs/custom_meli-Copy1.yaml"

if os.path.isfile(config_fn):
    user_conf = OmegaConf.load(config_fn)
    conf = OmegaConf.merge(conf, user_conf)
else:
    raise FileNotFoundError(f"config_file={config_fn} is not a valid file")

conf = set_data_paths(conf)
conf = cast(DictConfig, conf)  # convince mypy that everything is alright


In [3]:
#find a bird
with open('/network/scratch/t/tengmeli/ecosystem-embedding/species_list.txt', encoding="utf-8") as f:
    lines = f.read().splitlines()
indices = np.load('/network/scratch/t/tengmeli/ecosystem-embedding/songbirds_idx.npy')

In [4]:
with open('/network/scratch/t/tengmeli/ecosystem-embedding/songbirds.txt', encoding="utf-8") as f:
    song= f.read().splitlines()

In [5]:
song = np.array(sorted(song))

In [6]:
humming = 'Agelaius phoeniceus'

In [40]:
np.where(np.array(song) ==humming)

(array([3]),)

In [41]:
humming ='Centronyx bairdii'
i =np.where(np.array(lines) == humming)
print(i)

(array([160]),)


In [42]:
i =np.where(np.array(song) == humming)

In [43]:
idx = 3

In [7]:
task = EbirdTask(conf)
datamodule = EbirdDataModule(conf)
trainer_args = cast(Dict[str, Any], OmegaConf.to_object(conf.trainer))

Training with Custom CE Loss


In [8]:
test_df = pd.read_csv(conf.data.files.test)

In [9]:
val_df = pd.read_csv(conf.data.files.val)
len(val_df)

451

In [10]:
from src.dataset.dataloader import get_path
from src.dataset.utils import load_file

In [11]:
def get_meta(df, index):
    meta = load_file(get_path(df, index, "meta"))
    return(meta)

def get_img(df, index):
    band_npy = load_file(get_path(df, index, "rgb"))
    return (band_npy)

def get_img(df, index, new_width = 256, new_height = 256):
    band_npy = load_file(get_path(df, index, "rgb"))
   
    im = Image.fromarray(np.transpose(band_npy, (1,2,0)))
    width, height = im.size   # Get dimensions

    left = (width - new_width)/2
    top = (height - new_height)/2
    right = (width + new_width)/2
    bottom = (height + new_height)/2

    # Crop the center of the image
    im = im.crop((left, top, right, bottom))
    return(im)

In [12]:
PATH = "./ckpt/songbird_smallimg64/epoch=212-step=204905.ckpt"

state_dict = torch.load(PATH)["state_dict"]

for key in list(state_dict.keys()):
    state_dict[key.replace('model.', '')] = state_dict.pop(key)


task.model.load_state_dict(state_dict)
task.model.eval()
m = nn.Sigmoid()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [13]:
datamodule.setup()
test_dataloader = datamodule.val_dataloader()

  cpuset_checked))


In [55]:
a = next(iter(test_dataloader))

In [14]:
def infer(batch, model):
    x = batch['sat'].squeeze(1).to(device)
    y = batch['target'].to(device)
    y_hat = model(x)
    pred = m(y_hat)
    return(pred)

In [15]:

preds = []
squares = []
hs= []
acc = []
sum_ = torch.empty((0,305))
for batch in test_dataloader:
    pred = infer(batch, task.model).detach().cpu()
    sum_  = torch.vstack([torch.abs(pred - batch['target']), sum_])

In [16]:
sum_.shape

torch.Size([451, 305])

In [17]:
most_freq = [276,   3, 130,  41,  62, 260,  96, 249, 205,  90,  71,  67, 261,
       137, 231,  93, 154,  30, 295, 134, 254, 272, 161, 282, 187, 264,
        26,  60, 141, 101, 292, 243, 257, 193, 165, 179, 240,  63, 211,
       188,  97, 180, 213, 255, 262, 233, 248,  80, 271, 283]

In [18]:
mae = sum_.mean()
mae

tensor(0.0340)

In [19]:
mse = ((sum_)**2).mean()
mse

tensor(0.0094)

In [20]:
mfmae = sum_[:, most_freq].mean()
mfmse = ((sum_[:, most_freq])**2).mean()
mfmae, mfmse

(tensor(0.1294), tensor(0.0371))

In [47]:

preds = []
squares = []
hs= []
acc = []
for batch in test_dataloader:
    pred = infer(batch, task.model)
    #torch.abs(infer(a, task.model).detach().cpu() - a['target']).sum()
    pred[pred>0.5] = 1
    pred[pred<0.5] = 0
    acc += [((pred.cpu()== batch["target"]).sum(axis = 1))/305]
    preds += [torch.abs(pred.cpu()- batch["target"]).sum().item()]
    
    squares += [(torch.abs(pred.cpu()- batch["target"])**2).sum().item()]
    #for n, i in enumerate(pred):
   #     i[i>0.5] = 1
     #   preds += [i]
    #    hs += batch["target"][n]
        #if i[idx] > 0.5 :
         #   preds += [i[idx].item()]
           # hs += [batch["hotspot_id"][n]]
            
            

In [55]:
acc = [np.array(a) for a in acc]

In [61]:
accs = []

In [62]:
for a in acc:
    for e in a:
        accs.append(e)

In [64]:
np.mean(accs)

0.9277598

In [37]:
mae = np.sum(preds)/(451*350)

In [38]:
mse = np.sum(squares)/(451*350)

448

In [46]:
((pred.cpu()== batch["target"]).sum(axis = 1))/305

tensor([0.9377, 0.9246, 0.8951])

In [None]:
targs = []
hst= []
for batch in test_dataloader:
    for c,i in enumerate(batch["original_target"]):
        if i[idx] > 0 :
            targs += [i[idx].item()]
            hst += [batch["hotspot_id"][c]]
            

In [None]:
len(preds), len(targs)

In [None]:
preds_ = np.array(preds)

In [None]:
predictions = preds_[preds_>0.80]

In [None]:
targs_ = np.array(targs)

In [None]:
targs_ = targs_[targs_>0.80]
len(targs_)

In [None]:
hotspots = [hs[i] for i in np.where(preds_[preds_>0.80])[0]]

In [None]:
hst_ = [hst[i] for i in np.where(targs_[targs_>0.80])[0]]

In [None]:
predictions, hotspots = zip(*sorted(zip(predictions, hotspots),  reverse = True))

In [None]:
len()

In [None]:
targets, hotspots_t = zip(*sorted(zip(targs_, hst_), reverse = True))

In [None]:
test_df[test_df["hotspot"] == "L275497"]["rgb"].item()

In [None]:
def get_img(df, hotspot, new_width = 256, new_height = 256):
    band_npy = load_file(Path(test_df[test_df["hotspot"] == hotspot]["rgb"].item()))
   
    im = Image.fromarray(np.transpose(band_npy, (1,2,0)))
    width, height = im.size   # Get dimensions

    left = (width - new_width)/2
    top = (height - new_height)/2
    right = (width + new_width)/2
    bottom = (height + new_height)/2

    # Crop the center of the image
    im = im.crop((left, top, right, bottom))
    return(im)

In [None]:
str("0.8")

In [None]:
for i,h in enumerate(list(hotspots_t)):
    
    im = get_img(test_df, h)
    im.save(os.path.join("./predictions/agelaius/targets" ,h + "_"+ str(targets[i]).replace(".","-")+".jpg"))

In [None]:
for i,h in enumerate(list(hotspots)):
    im = get_img(test_df, h)
    im.save(os.path.join("./predictions/agelaius/preds" ,h + "_"+ str(predictions[i]).replace(".","-")+ ".jpg"))

In [None]:
len([i for i in hotspots if i not in hotspots_t])

In [None]:
[i for i in hotspots if i not in hotspots_t]

In [None]:
len([i for i in hotspots_t if i not in hotspots])

In [None]:
len(test_df)

In [None]:
len(preds), len(targs)

In [None]:
np.where(np.array(hst) =='L3238822')

In [None]:
hotspots_t]

In [None]:
def training_step(batch):
    x, y = batch
    y_hat = model(x)
    loss = F.cross_entropy(y_hat, y)
    preds = ...
    return {"loss": loss, "other_stuff": preds}


def training_epoch_end(self, training_step_outputs):
    all_preds = torch.stack(training_step_outputs)
    ...
The matching pseudocode is:

outs = []
for batch in train_dataloader:
    # forward
    out = training_step(val_batch)
    outs.append(out)

    # clear gradients
    optimizer.zero_grad()

    # backward
    loss.backward()

    # update parameters
    optimizer.step()

training_epoch_end(outs)

In [None]:
trainer = pl.Trainer(**trainer_args)