# Exploration — database/videos.csv

Ce notebook explore la table **videos** (métadonnées OpenCV/MediaInfo).

Fichier: `database/videos.csv`

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

DATA_PATH = Path('../database/videos.csv')
assert DATA_PATH.exists(), f'File not found: {DATA_PATH.resolve()}'

df = pd.read_csv(DATA_PATH)
df.shape, df.columns.tolist()

In [None]:
display(df)

for c in ['fps','width','height','frame_count','duration_sec']:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce')

if {'width','height'}.issubset(df.columns):
    df['aspect_ratio'] = df['width'] / df['height']
    df['orientation'] = np.where(df['width'] >= df['height'], 'landscape', 'portrait')

display(df.describe(include='all').T)

In [None]:
# FPS and duration distributions
fig, axes = plt.subplots(1, 3, figsize=(16, 4))

if 'fps' in df.columns:
    axes[0].hist(df['fps'].dropna(), bins=20)
    axes[0].set_title('FPS distribution')

if 'duration_sec' in df.columns:
    axes[1].hist(df['duration_sec'].dropna(), bins=20)
    axes[1].set_title('Duration (sec) distribution')

if 'frame_count' in df.columns:
    axes[2].hist(df['frame_count'].dropna(), bins=20)
    axes[2].set_title('Frame count distribution')

plt.tight_layout()
plt.show()

In [None]:
# Resolution overview
if {'width','height'}.issubset(df.columns):
    res_counts = df.assign(res=df['width'].astype('Int64').astype(str) + 'x' + df['height'].astype('Int64').astype(str)).groupby('res').size().sort_values(ascending=False)
    display(res_counts)

    plt.figure(figsize=(10, 4))
    res_counts.iloc[::-1].plot(kind='barh')
    plt.title('Resolution counts')
    plt.tight_layout()
    plt.show()

if 'orientation' in df.columns:
    display(df['orientation'].value_counts())