This notebook aims at applying TiSSNet on all the demo data.

In [None]:
import datetime
import numpy as np
import torch
from torchvision.transforms import Resize
from matplotlib import pyplot as plt
from pathlib import Path
import pickle
from tqdm.notebook import tqdm
from scipy.signal import find_peaks
import pandas as pd
import seaborn as sns

from utils.data_reading.sound_data.station import StationsCatalog
from utils.physics.signal.make_spectrogram import make_spectrogram
from utils.detection.TiSSNet import TiSSNet, process_batch

In [None]:
catalog_path = "/media/plerolland/akoustik/MAHY"
tissnet_checkpoint = "../../../../data/models/TiSSNet/torch_save_checked_Pn"
out_dir = "../../../../data/detection/TiSSNet_Pn_raw/MAHY"  # where files will be saved
Path(out_dir).mkdir(parents=True, exist_ok=True)  # create output directory if needed

stations = StationsCatalog(catalog_path).filter_out_undated() # remove stations with no start / end dates

model_det = TiSSNet()
model_det.load_state_dict(torch.load(tissnet_checkpoint))

DELTA = datetime.timedelta(seconds=3600)  # duration of segments that are given to TiSSNet
OVERLAP = 0.02   # overlap between those segments (no link with STFT)
STEP = (1 - OVERLAP) * DELTA
batch_size = 1  # number of segments that are fed together to TiSSNet

# parameters of peak finding (TiSSNet outputs 1 value per spectrogram time bin, we use a peak finding algorithm to save only the peaks)
TISSNET_PROMINENCE = 0.05
ALLOWED_ERROR_S = 5
MIN_HEIGHT = 0.05

TIME_RES = 0.5342  # duration of each spectrogram pixel in seconds
FREQ_RES = 0.9375  # f of each spectrogram pixel in Hz

device = "cuda"  # if there is a GPU and CUDA is installed, device can be set to "cuda" instead
model_det.to(device)

In [None]:
for station in stations:
    station.other_kwargs["raw"] = True  # read raw data (i.e. not drift-corrected)
    manager = station.get_manager()
    out_file = f"{out_dir}/{station.dataset}_{station.name}.pkl" # results are saved as stacked pickle files

    print(f"Starting detection on {manager.name}")

    start, end = manager.dataset_start, manager.dataset_end
    steps = int(np.ceil((end - start)/STEP))
    start_idx = 0
    batch_dates, batch_process = [], []

    # if some detection has already been run, we start where it was stopped
    already_done = []
    if Path(out_file).exists():
        with open(out_file, "rb") as f:
            while True:
                try:
                    already_done.append(pickle.load(f))
                except EOFError:
                    break
        last_date = already_done[-1][0]
        start_idx = int(np.floor((last_date - start) / STEP))

    # we start detection
    for i in tqdm(range(steps)):
        if i < start_idx:
            continue # this is just to fill tqdm progress bar in case we loaded an old detection file

        # important : prefer index multiplication over incrementation to avoid rounding errors
        # (i.e. seg_start = start + i * STEP is way better than seg_start = seg_start + STEP)
        seg_start = start + i * STEP
        seg_end = min(end, seg_start + DELTA)
        if seg_start >= seg_end:
            break

        # add data to batch
        data = manager.get_segment(seg_start, seg_end)
        if len(data) / manager.sampling_f > 1:
            spectrogram = make_spectrogram(data, manager.sampling_f, t_res=TIME_RES, f_res=FREQ_RES, return_bins=False, normalize=True, vmin=-35, vmax=140).astype(np.float32)
            if spectrogram.shape[0] > 128:
                spectrogram = spectrogram[spectrogram.shape[0]-128:]
            elif spectrogram.shape[0] < 128:
                spectrogram = np.concatenate((np.zeros((128-spectrogram.shape[0], spectrogram.shape[1]), np.float32), spectrogram), axis=0)
            spectrogram = spectrogram[np.newaxis, :, :]  # add a dummy dimension, this stands for the channel number (here we are in grayscale, i.e. only one value for each pixel)
            input_data = Resize((128, spectrogram.shape[-1]))(torch.from_numpy(spectrogram)) # resize data
            batch_dates.append(seg_start)
            batch_process.append(input_data)

        # check if the batch is ready to be processed
        if len(batch_process) == batch_size or i == steps-1:
            if batch_size > 1 and (batch_process[-1].shape != batch_process[0].shape or batch_process[-2].shape != batch_process[-1].shape):
                # last (and probably the one before because of overlaps) batch has a last element shorter than the others, we thus make three batches
                rlastlast = process_batch(batch_process[-2], device, model_det)
                rlast = process_batch(batch_process[-1], device, model_det)
                rfirst = process_batch(batch_process[:-2], device, model_det)
                res = list(rfirst) + [rlastlast] + [rlast]
            else:
                res = process_batch(batch_process, device, model_det)

            # now proceed to peak finding for each window to keep only the peaks
            for i, (seg_start, r) in enumerate(zip(batch_dates, res)):
                actual_time_res = (seg_end-seg_start).total_seconds() / res.shape[1]
                peaks = find_peaks(r, height=0, distance=ALLOWED_ERROR_S / actual_time_res, prominence=TISSNET_PROMINENCE)
                time_s = peaks[0] * actual_time_res
                peaks = [(seg_start + datetime.timedelta(seconds=time_s[j]), peaks[1]["peak_heights"][j]) for j in range(len(time_s)) if peaks[1]["peak_heights"][j] > MIN_HEIGHT]

                with open(out_file, "ab") as f:
                    for i, (d, p) in enumerate(peaks):
                        pickle.dump([d, p.astype(np.float16)], f)  # we write detections as a list of (date, peak probability)

            batch_dates, batch_process = [], []

In [None]:
from utils.data_reading.sound_data.sound_file_manager import DatFilesManager
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
import torch
import math
import os
from concurrent.futures import ProcessPoolExecutor
import torch

BATCH_SIZE = 8
HEIGHT = 128

def process_and_make_spec(idx, start, STEP, DELTA, end, sampling_f, HEIGHT):
    seg_start = start + idx * STEP
    seg_end = min(end, seg_start + DELTA)
    data = global_manager.get_segment(seg_start, seg_end)
    if len(data) / sampling_f <= 1:
        return None

    spec = make_spectrogram(data, global_manager.sampling_f, t_res=TIME_RES, f_res=FREQ_RES, return_bins=False, normalize=True, vmin=-35, vmax=140).astype(np.float32)
    if spec.shape[0] > HEIGHT:
        spec = spec[-HEIGHT:]
    elif spec.shape[0] < HEIGHT:
        spec = np.pad(spec, ((HEIGHT - spec.shape[0], 0), (0, 0)), 'constant')
    return (seg_start, spec[np.newaxis, :, :])

_manager_cache = {}

def get_manager_for_worker():
    pid = os.getpid()
    if pid not in _manager_cache:
        _manager_cache[pid] = global_manager  # global_station est défini par le process parent
    return _manager_cache[pid]

for station in stations:
    print(f"Starting detection on {station.name}")
    out_file = f"{out_dir}/{station.dataset}_{station.name}.pkl"

    if "43" in station.name:
        station.path = "/media/plerolland/akoustik/MAHY/MAHY4_fixed/MAHY43"
    station.other_kwargs["raw"] = True  # read raw data (i.e. not drift-corrected)
    manager = station.get_manager()
    global_manager = manager  # pour les workers

    start, end = manager.dataset_start, manager.dataset_end
    steps = int(np.ceil((end - start) / STEP))
    start_idx = 0

    if Path(out_file).exists():
        with open(out_file, "rb") as f:
            while True:
                try: last_date = pickle.load(f)[0]
                except EOFError: break
        start_idx = int(np.floor((last_date - start) / STEP))

    with open(out_file, "ab") as f_out, \
         ProcessPoolExecutor() as executor:

        for i in tqdm(range(start_idx, steps, BATCH_SIZE)):
            idxs = [i + j for j in range(BATCH_SIZE) if (i + j) < steps]
            results = list(executor.map(
                process_and_make_spec,
                idxs,
                [start]*len(idxs),
                [STEP]*len(idxs),
                [DELTA]*len(idxs),
                [end]*len(idxs),
                [manager.sampling_f]*len(idxs),
                [HEIGHT]*len(idxs)
            ))

            results = [r for r in results if r is not None]
            if not results:
                continue

            times_loaded, spectros = zip(*results)

            try:
                batch_tensor = np.stack(spectros)
                preds = process_batch(batch_tensor, device, model_det)
                pairs = zip(times_loaded, preds)
            except ValueError:
                pairs = []
                for time_, spec in zip(times_loaded, spectros):
                    try:
                        spec_tensor = np.expand_dims(spec, 0)
                        pred = process_batch(spec_tensor, device, model_det)[0]
                        pairs.append((time_, pred))
                    except Exception as e:
                        print(f"Erreur dans le traitement du spectro isolé : {e}")

            for seg_start, pred in pairs:
                t_res = DELTA.total_seconds() / pred.shape[0]
                peaks = find_peaks(pred, height=0, distance=math.ceil(ALLOWED_ERROR_S / t_res), prominence=TISSNET_PROMINENCE)
                time_s = peaks[0] * t_res
                for j, t in enumerate(time_s):
                    if peaks[1]["peak_heights"][j] > MIN_HEIGHT:
                        date = seg_start + datetime.timedelta(seconds=t)
                        prob = peaks[1]["peak_heights"][j]
                        pickle.dump([date, prob.astype(np.float16)], f_out)

In [None]:
import datetime
import numpy as np
import torch
import math
from torchvision.transforms import Resize
from matplotlib import pyplot as plt
from pathlib import Path
import pickle
from tqdm import tqdm
from scipy.signal import find_peaks
import pandas as pd
import seaborn as sns

from utils.data_reading.sound_data.station import StationsCatalog
from utils.physics.signal.make_spectrogram import make_spectrogram
from utils.detection.TiSSNet import TiSSNet
from utils.detection.TiSSNet import process_batch

catalog_path = "/media/plerolland/akoustik/MAHY/MAHY.csv"
tissnet_checkpoint = "../../../../data/models/i_TiSSNet/torch_save_checked-reboot-3"
out_dir = "../../../../data/detection/i_TiSSNet_raw/MAHY"  # where files will be saved
Path(out_dir).mkdir(parents=True, exist_ok=True)  # create output directory if needed

stations = StationsCatalog(catalog_path).filter_out_undated() # remove stations with no start / end dates
print(stations)

model_det = TiSSNet()
model_det.load_state_dict(torch.load(tissnet_checkpoint))

DELTA = datetime.timedelta(seconds=100)  # duration of segments that are given to TiSSNet
OVERLAP = 0.02   # overlap between those segments (no link with STFT)
STEP = (1 - OVERLAP) * DELTA
batch_size = 1  # number of segments that are fed together to TiSSNet

# parameters of peak finding (TiSSNet outputs 1 value per spectrogram time bin, we use a peak finding algorithm to save only the peaks)
TISSNET_PROMINENCE = 0.05
ALLOWED_ERROR_S = 2
MIN_HEIGHT = 0.05

TIME_RES = 0.5342  # duration of each spectrogram pixel in seconds
FREQ_RES = 0.9375  # f of each spectrogram pixel in Hz
HEIGHT = 128

device = "cuda"  # if there is a GPU and CUDA is installed, device can be set to "cuda" instead
model_det.to(device)

from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
import torch

import os
from concurrent.futures import ProcessPoolExecutor
import torch

BATCH_SIZE = 256

def process_and_make_spec(idx, start, STEP, DELTA, end, sampling_f, HEIGHT):
    seg_start = start + idx * STEP
    seg_end = min(end, seg_start + DELTA)
    manager = get_manager_for_worker()
    data = manager.get_segment(seg_start, seg_end)
    if len(data) / sampling_f <= 1:
        return None

    spec = make_spectrogram(data, manager.sampling_f, t_res=TIME_RES, f_res=FREQ_RES, return_bins=False, normalize=True, vmin=-35, vmax=140).astype(np.float32)
    if spec.shape[0] > HEIGHT:
        spec = spec[-HEIGHT:]
    elif spec.shape[0] < HEIGHT:
        spec = np.pad(spec, ((HEIGHT - spec.shape[0], 0), (0, 0)), 'constant')
    return (seg_start, spec[np.newaxis, :, :])

_manager_cache = {}

def get_manager_for_worker():
    pid = os.getpid()
    if pid not in _manager_cache:
        if "43" in global_station.name:
            global_station.path = "/media/plerolland/akoustik/MAHY/MAHY4_fixed/MAHY43"
        global_station.other_kwargs["raw"] = True  # read raw data (i.e. not drift-corrected)
        _manager_cache[pid] = global_station.get_manager()  # global_station est défini par le process parent
    return _manager_cache[pid]

for station in stations:
    print(f"Starting detection on {station.name}")
    global_station = station  # pour les workers
    if "43" in station.name:
        station.path = "/media/plerolland/akoustik/MAHY/MAHY4_fixed/MAHY43"
    station.other_kwargs["raw"] = True  # read raw data (i.e. not drift-corrected)
    manager = station.get_manager()
    out_file = f"{out_dir}/{station.dataset}_{station.name}.pkl"

    start, end = manager.dataset_start, manager.dataset_end
    steps = int(np.ceil((end - start) / STEP))
    start_idx = 0

    if Path(out_file).exists():
        with open(out_file, "rb") as f:
            while True:
                try: last_date = pickle.load(f)[0]
                except EOFError: break
        start_idx = int(np.floor((last_date - start) / STEP))

    with open(out_file, "ab") as f_out, \
         ProcessPoolExecutor() as executor:

        for i in tqdm(range(start_idx, steps, BATCH_SIZE)):
            idxs = [i + j for j in range(BATCH_SIZE) if (i + j) < steps]
            results = list(executor.map(
                process_and_make_spec,
                idxs,
                [start]*len(idxs),
                [STEP]*len(idxs),
                [DELTA]*len(idxs),
                [end]*len(idxs),
                [manager.sampling_f]*len(idxs),
                [HEIGHT]*len(idxs)
            ))

            results = [r for r in results if r is not None]
            if not results:
                continue

            times_loaded, spectros = zip(*results)

            try:
                batch_tensor = np.stack(spectros)
                preds = process_batch(batch_tensor, device, model_det)
                pairs = zip(times_loaded, preds)
            except ValueError:
                pairs = []
                for time_, spec in zip(times_loaded, spectros):
                    try:
                        spec_tensor = np.expand_dims(spec, 0)
                        pred = process_batch(spec_tensor, device, model_det)[0]
                        pairs.append((time_, pred))
                    except Exception as e:
                        print(f"Erreur dans le traitement du spectro isolé : {e}")

            for seg_start, pred in pairs:
                t_res = DELTA.total_seconds() / pred.shape[0]
                peaks = find_peaks(pred, height=0, distance=math.ceil(ALLOWED_ERROR_S / t_res), prominence=TISSNET_PROMINENCE)
                time_s = peaks[0] * t_res
                for j, t in enumerate(time_s):
                    if peaks[1]["peak_heights"][j] > MIN_HEIGHT:
                        date = seg_start + datetime.timedelta(seconds=t)
                        prob = peaks[1]["peak_heights"][j]
                        pickle.dump([date, prob.astype(np.float16)], f_out)

In [None]:
import datetime
import numpy as np
import torch
from torchvision.transforms import Resize
from matplotlib import pyplot as plt
from pathlib import Path
import pickle
from tqdm.notebook import tqdm
from scipy.signal import find_peaks
import pandas as pd
import seaborn as sns

from utils.data_reading.sound_data.station import StationsCatalog
from utils.physics.signal.make_spectrogram import make_spectrogram
from utils.detection.TiSSNet import TiSSNet, process_batch

catalog_path = "/media/plerolland/akoustik/MAHY"
tissnet_checkpoint = "../../../../data/models/TiSSNet/torch_save_checked_2"
out_dir = "../../../../data/detection/TiSSNet_raw/MAHY"  # where files will be saved
Path(out_dir).mkdir(parents=True, exist_ok=True)  # create output directory if needed

stations = StationsCatalog(catalog_path).filter_out_undated() # remove stations with no start / end dates

model_det = TiSSNet()
model_det.load_state_dict(torch.load(tissnet_checkpoint))

DELTA = datetime.timedelta(seconds=3600)  # duration of segments that are given to TiSSNet
OVERLAP = 0.02   # overlap between those segments (no link with STFT)
STEP = (1 - OVERLAP) * DELTA
batch_size = 1  # number of segments that are fed together to TiSSNet

# parameters of peak finding (TiSSNet outputs 1 value per spectrogram time bin, we use a peak finding algorithm to save only the peaks)
TISSNET_PROMINENCE = 0.05
ALLOWED_ERROR_S = 5
MIN_HEIGHT = 0.05

TIME_RES = 0.5342  # duration of each spectrogram pixel in seconds
FREQ_RES = 0.9375  # f of each spectrogram pixel in Hz

device = "cuda"  # if there is a GPU and CUDA is installed, device can be set to "cuda" instead
model_det.to(device)


from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
import torch
import math
import os
from concurrent.futures import ProcessPoolExecutor
import torch

BATCH_SIZE = 8
HEIGHT = 128

def process_and_make_spec(idx, start, STEP, DELTA, end, sampling_f, HEIGHT):
    seg_start = start + idx * STEP
    seg_end = min(end, seg_start + DELTA)
    manager = get_manager_for_worker()
    data = manager.get_segment(seg_start, seg_end)
    if len(data) / sampling_f <= 1:
        return None

    spec = make_spectrogram(data, manager.sampling_f, t_res=TIME_RES, f_res=FREQ_RES, return_bins=False, normalize=True, vmin=-35, vmax=140).astype(np.float32)
    if spec.shape[0] > HEIGHT:
        spec = spec[-HEIGHT:]
    elif spec.shape[0] < HEIGHT:
        spec = np.pad(spec, ((HEIGHT - spec.shape[0], 0), (0, 0)), 'constant')
    return (seg_start, spec[np.newaxis, :, :])

_manager_cache = {}

def get_manager_for_worker():
    pid = os.getpid()
    if pid not in _manager_cache:
        if "43" in global_station.name:
            global_station.path = "/media/plerolland/akoustik/MAHY/MAHY4_fixed/MAHY43"
        global_station.other_kwargs["raw"] = True  # read raw data (i.e. not drift-corrected)
        _manager_cache[pid] = global_station.get_manager()  # global_station est défini par le process parent
    return _manager_cache[pid]

for station in stations:
    print(f"Starting detection on {station.name}")
    global_station = station  # pour les workers
    if "43" in station.name:
        station.path = "/media/plerolland/akoustik/MAHY/MAHY4_fixed/MAHY43"
    station.other_kwargs["raw"] = True  # read raw data (i.e. not drift-corrected)
    manager = station.get_manager()
    out_file = f"{out_dir}/{station.dataset}_{station.name}.pkl"

    start, end = manager.dataset_start, manager.dataset_end
    steps = int(np.ceil((end - start) / STEP))
    start_idx = 0

    if Path(out_file).exists():
        with open(out_file, "rb") as f:
            while True:
                try: last_date = pickle.load(f)[0]
                except EOFError: break
        start_idx = int(np.floor((last_date - start) / STEP))

    with open(out_file, "ab") as f_out, \
         ProcessPoolExecutor() as executor:

        for i in tqdm(range(start_idx, steps, BATCH_SIZE)):
            idxs = [i + j for j in range(BATCH_SIZE) if (i + j) < steps]
            results = list(executor.map(
                process_and_make_spec,
                idxs,
                [start]*len(idxs),
                [STEP]*len(idxs),
                [DELTA]*len(idxs),
                [end]*len(idxs),
                [manager.sampling_f]*len(idxs),
                [HEIGHT]*len(idxs)
            ))

            results = [r for r in results if r is not None]
            if not results:
                continue

            times_loaded, spectros = zip(*results)

            try:
                batch_tensor = np.stack(spectros)
                preds = process_batch(batch_tensor, device, model_det)
                pairs = zip(times_loaded, preds)
            except ValueError:
                pairs = []
                for time_, spec in zip(times_loaded, spectros):
                    try:
                        spec_tensor = np.expand_dims(spec, 0)
                        pred = process_batch(spec_tensor, device, model_det)[0]
                        pairs.append((time_, pred))
                    except Exception as e:
                        print(f"Erreur dans le traitement du spectro isolé : {e}")

            for seg_start, pred in pairs:
                t_res = DELTA.total_seconds() / pred.shape[0]
                peaks = find_peaks(pred, height=0, distance=math.ceil(ALLOWED_ERROR_S / t_res), prominence=TISSNET_PROMINENCE)
                time_s = peaks[0] * t_res
                for j, t in enumerate(time_s):
                    if peaks[1]["peak_heights"][j] > MIN_HEIGHT:
                        date = seg_start + datetime.timedelta(seconds=t)
                        prob = peaks[1]["peak_heights"][j]
                        pickle.dump([date, prob.astype(np.float16)], f_out)

In [None]:
from utils.data_reading.sound_data.sound_file_manager import DatFilesManager
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
import torch
import math
import os
from concurrent.futures import ProcessPoolExecutor
import torch

BATCH_SIZE = 8
HEIGHT = 128

def process_and_make_spec(idx, start, STEP, DELTA, end, sampling_f, HEIGHT):
    seg_start = start + idx * STEP
    seg_end = min(end, seg_start + DELTA)
    manager = get_manager_for_worker()
    print(seg_start, manager.dataset_start)
    data = manager.get_segment(seg_start, seg_end)
    if len(data) / sampling_f <= 1:
        return None

    spec = make_spectrogram(data, manager.sampling_f, t_res=TIME_RES, f_res=FREQ_RES, return_bins=False, normalize=True, vmin=-35, vmax=140).astype(np.float32)
    if spec.shape[0] > HEIGHT:
        spec = spec[-HEIGHT:]
    elif spec.shape[0] < HEIGHT:
        spec = np.pad(spec, ((HEIGHT - spec.shape[0], 0), (0, 0)), 'constant')
    return (seg_start, spec[np.newaxis, :, :])

_manager_cache = {}

def get_manager_for_worker():
    pid = os.getpid()
    if pid not in _manager_cache:
        _manager_cache[pid] = global_station.get_manager()  # global_station est défini par le process parent
    return _manager_cache[pid]

for station in stations:
    print(f"Starting detection on {station.name}")
    global_station = station  # pour les workers
    manager = station.get_manager()
    out_file = f"{out_dir}/{station.dataset}_{station.name}.pkl"


    start, end = manager.dataset_start, manager.dataset_end
    steps = int(np.ceil((end - start) / STEP))
    start_idx = 0

    if Path(out_file).exists():
        with open(out_file, "rb") as f:
            while True:
                try: last_date = pickle.load(f)[0]
                except EOFError: break
        start_idx = int(np.floor((last_date - start) / STEP))

    with open(out_file, "ab") as f_out, \
         ProcessPoolExecutor() as executor:

        for i in tqdm(range(start_idx, steps, BATCH_SIZE)):
            idxs = [i + j for j in range(BATCH_SIZE) if (i + j) < steps]
            results = list(executor.map(
                process_and_make_spec,
                idxs,
                [start]*len(idxs),
                [STEP]*len(idxs),
                [DELTA]*len(idxs),
                [end]*len(idxs),
                [manager.sampling_f]*len(idxs),
                [HEIGHT]*len(idxs)
            ))

            results = [r for r in results if r is not None]
            if not results:
                continue

            times_loaded, spectros = zip(*results)

            try:
                batch_tensor = np.stack(spectros)
                preds = process_batch(batch_tensor, device, model_det)
                pairs = zip(times_loaded, preds)
            except ValueError:
                pairs = []
                for time_, spec in zip(times_loaded, spectros):
                    try:
                        spec_tensor = np.expand_dims(spec, 0)
                        pred = process_batch(spec_tensor, device, model_det)[0]
                        pairs.append((time_, pred))
                    except Exception as e:
                        print(f"Erreur dans le traitement du spectro isolé : {e}")

            for seg_start, pred in pairs:
                t_res = DELTA.total_seconds() / pred.shape[0]
                peaks = find_peaks(pred, height=0, distance=math.ceil(ALLOWED_ERROR_S / t_res), prominence=TISSNET_PROMINENCE)
                time_s = peaks[0] * t_res
                for j, t in enumerate(time_s):
                    if peaks[1]["peak_heights"][j] > MIN_HEIGHT:
                        date = seg_start + datetime.timedelta(seconds=t)
                        prob = peaks[1]["peak_heights"][j]
                        pickle.dump([date, prob.astype(np.float16)], f_out)

Take a look at the results

In [None]:
detection_file = f"{out_dir}/MAHY0_MAHY01.pkl"
d = []
with open(detection_file, "rb") as f:
    while True:
        try:
            d.append(pickle.load(f))
        except EOFError:
            break
print(f"{len(detection_file)} detections found")


dates_plot = np.array(d)[:,0]
offset = 1
df = pd.DataFrame({'date': dates_plot})
df['date'] = pd.to_datetime(df['date'])

counts = df.resample('1H', on='date').size().asfreq('1H', fill_value=0)

sns.barplot(x=counts.index.strftime("%Hh"), y=counts.values)
plt.title(f"Hourly detections from {dates_plot[0].day:02d}/{dates_plot[0].month:02d}/{dates_plot[0].year}")
plt.xlabel("Hour")
plt.ylabel("Number of events")