## Installs

In [3]:
# %%
!pip uninstall -y mediapipe || true
!pip install -U "numpy>=2.0,<2.3" pandas scikit-learn pillow tqdm opencv-python matplotlib


Found existing installation: mediapipe 0.10.21
Uninstalling mediapipe-0.10.21:
  Successfully uninstalled mediapipe-0.10.21
Collecting numpy<2.3,>=2.0
  Using cached numpy-2.2.6-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting opencv-python
  Using cached opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (19 kB)
Using cached numpy-2.2.6-cp311-cp311-win_amd64.whl (12.9 MB)
Using cached opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl (39.0 MB)
Installing collected packages: numpy, opencv-python

  Attempting uninstall: numpy

    Found existing installation: numpy 1.26.4

    Uninstalling numpy-1.26.4:

   ---------------------------------------- 0/2 [numpy]
   ---------------------------------------- 0/2 [numpy]
      Successfully uninstalled numpy-1.26.4
   ---------------------------------------- 0/2 [numpy]
   ---------------------------------------- 0/2 [numpy]
   ---------------------------------------- 0/2 [numpy]
   ---------------------------------------- 0/2 [numpy]

## Data Prep

In [None]:
# %%
from pathlib import Path
import zipfile, sys, shutil
from tqdm import tqdm

BASE = Path.cwd()
RAW  = BASE / "data" / "raw"
RAW.mkdir(parents=True, exist_ok=True)

# --- Find ZIPs heuristically ---
zips = list((BASE / "data").glob("*.zip"))
if not zips:
    raise SystemExit("No .zip files found in the current folder. Put the two zips here and re-run.")

def pick_zip(zips, must_contain):
    must = must_contain.lower()
    for z in zips:
        name = z.name.lower()
        if all(x in name for x in must.split()):
            return z
    return None

aff_zip = pick_zip(zips, "affec 256") or pick_zip(zips, "Affectnet_256") or pick_zip(zips, "affect 256")
raf_zip = pick_zip(zips, "raf") or pick_zip(zips, "RAF-DB") or pick_zip(zips, "rafdb")

if not aff_zip or not raf_zip:
    print("I found these ZIPs:\n - " + "\n - ".join(z.name for z in zips))
    raise SystemExit(
        "\nCould not auto-detect both archives.\n"
        "Rename your files so one contains 'affec' and '256', and the other contains 'raf', then re-run."
    )

print(f"AffectNet ZIP → {aff_zip.name}")
print(f"RAF-DB   ZIP → {raf_zip.name}")

# --- Extract helpers ---
def extract_all(zip_path: Path, dest: Path):
    if dest.exists() and any(dest.iterdir()):
        print(f"Already extracted: {dest}")
        return dest
    dest.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zf:
        members = zf.infolist()
        for m in tqdm(members, desc=f"Extracting {zip_path.name}", unit="file"):
            zf.extract(m, dest)
    return dest

AFF_DIR = RAW / "affectnet256"
RAF_DIR = RAW / "rafdb"

extract_all(aff_zip, AFF_DIR)
extract_all(raf_zip, RAF_DIR)

# Quick probe: count images we can see inside each extracted tree
IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp"}
def count_images(root: Path):
    return sum(1 for p in root.rglob("*") if p.suffix.lower() in IMG_EXTS)

print("\nFound counts after extraction:")
print(" - AffectNet-256 images:", count_images(AFF_DIR))
print(" - RAF-DB images:",      count_images(RAF_DIR))

# Show shallow directory listings to help us confirm structure
def peek(root: Path, depth=2, max_items=10):
    print(f"\nPeek: {root}")
    shown = 0
    for p in root.rglob("*"):
        rel = p.relative_to(root)
        if len(rel.parts) <= depth:
            print("  ", rel)
            shown += 1
            if shown >= max_items:
                print("  ...")
                break

peek(AFF_DIR)
peek(RAF_DIR)


SystemExit: No .zip files found in the current folder. Put the two zips here and re-run.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
