In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from hydra import compose, initialize

with initialize(version_base=None, config_path="../config"):
    cfg = compose(config_name="inference_config", overrides=["data.size=768", "num_frames=9"])
    print(cfg)

In [None]:
import logging

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from falldet.data.video_dataset_factory import get_video_datasets
from falldet.visualization import visualize_video

logger = logging.getLogger(__name__)
logging.basicConfig(
    level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s", force=True
)
logging.getLogger("matplotlib").setLevel(logging.WARNING)
logging.getLogger("seaborn").setLevel(logging.WARNING)

sns.set_theme(
    style="white",
    context="paper",
    rc={"text.usetex": True, "font.family": "serif", "font.serif": ["Computer Modern Roman"]},
)

dataset = get_video_datasets(
    cfg, mode=cfg.data.mode, split=cfg.data.split, seed=cfg.data.seed, size=cfg.data.size
)
dataset = dataset.datasets[0]

In [None]:
df = pd.DataFrame(dataset.video_segments)

print(f"Total segments: {len(df)}")
print(f"Unique videos:  {df['video_path'].nunique()}")
print(f"Unique labels:  {df['label_str'].nunique()}")
print("\nDuration stats (seconds):")
print(df["duration"].describe().round(2))

In [None]:
label_order = df["label_str"].value_counts().index

fig, ax = plt.subplots(figsize=(9, 6))
sns.countplot(data=df, y="label_str", order=label_order, ax=ax, palette="Blues_r")

for container in ax.containers:
    ax.bar_label(container, padding=3, fontsize=9)

ax.set_xlabel("Count")
ax.set_ylabel("")
fig.tight_layout()
sns.despine()
fig.savefig("../outputs/plots/segment_label_distribution.pdf", bbox_inches="tight")
plt.show()

In [None]:
mean_dur = df["duration"].mean()
median_dur = df["duration"].median()

fig, ax = plt.subplots(figsize=(8, 4))
sns.histplot(data=df, x="duration", bins=50, kde=False, ax=ax, color="steelblue", log_scale=True)

ax.axvline(mean_dur, color="tomato", linestyle="--", label=f"Mean: {mean_dur:.1f}s")
ax.axvline(median_dur, color="orange", linestyle="-.", label=f"Median: {median_dur:.1f}s")

ax.set_xlabel("Duration (seconds)")
ax.set_ylabel("Count")
ax.set_title("Segment Duration Distribution")
ax.legend()
fig.tight_layout()
plt.show()

In [None]:
sns.histplot(
    df.query("label_str in ['fall', 'fallen', 'jump']"),
    x="duration",
    hue="label_str",
    multiple="stack",
)

In [None]:
# plot cumulative distribution of segment durations
fig, ax = plt.subplots(figsize=(8, 4))
sns.ecdfplot(data=df, x="duration", ax=ax)
ax.set_xlabel("Duration (seconds)")

df.duration.quantile([0.9, 0.95, 0.975, 0.99])

In [None]:
g = sns.catplot(
    df.query("duration < 12"),
    x="duration",
    y="label_str",
    kind="box",
    aspect=1.4,
    height=5,
    order=label_order,
    palette="Blues_r",
)
g.despine(left=False, bottom=False)
g.ax.set_xlabel("Duration (seconds)")
g.ax.set_ylabel("")

# plot veritcal lines for mean and median
median_dur = df["duration"].median()
plt.savefig("../outputs/plots/segment_duration_boxplot.pdf", bbox_inches="tight")
# g.ax.axvline(mean_dur, color="tomato", linestyle="--", label=f"Mean: {mean_dur:.1f}s")

In [None]:
df.iloc[0]

In [None]:
df.iloc[:3]

In [None]:
idx = 0
fig, ax = visualize_video(idx=idx, dataset=dataset)
fig.savefig(f"../outputs/plots/example_segment_{idx}.pdf", bbox_inches="tight")

In [None]:
frames = dataset[1640]