In [1]:
# inspect_one.py
from pathlib import Path
import numpy as np

def inspect(path):
    p = Path(path)
    print("path:", p)
    try:
        a = np.load(p)
    except Exception as e:
        print("load error:", e)
        return
    print("shape:", a.shape, "dtype:", a.dtype)
    print("min,max,mean:", float(np.nanmin(a)), float(np.nanmax(a)), float(np.nanmean(a)))
    print("any nan:", np.isnan(a).any(), "any inf:", np.isinf(a).any())
    print("abs max:", float(np.max(np.abs(a))))
    print("sample slice:", a.flatten()[:10])

# Example: change to your actual path(s)
ROOT = Path.cwd().parent /"embeddings"
inspect(ROOT / "train/837.npy")
inspect(ROOT / "train/005_010.npy")

path: c:\Users\lkmah\OneDrive\Desktop\Lokesh\VS Code\DeepFake_Detection_SIC\embeddings\train\837.npy
shape: (8, 1536) dtype: float32
min,max,mean: -0.2308262586593628 1244.763916015625 7.593655109405518
any nan: False any inf: False
abs max: 1244.763916015625
sample slice: [ 0.4245326  -0.05127138 -0.06334302 -0.09210514 -0.07871459 -0.13140175
  0.5028961  -0.122256    0.3583344  -0.16328725]
path: c:\Users\lkmah\OneDrive\Desktop\Lokesh\VS Code\DeepFake_Detection_SIC\embeddings\train\005_010.npy
shape: (8, 1536) dtype: float32
min,max,mean: -0.2421875 2.1171875 0.0782107338309288
any nan: False any inf: False
abs max: 2.1171875
sample slice: [ 0.21594238  0.0362854  -0.13696289 -0.02792358 -0.20605469  0.00984955
 -0.1184082  -0.15991211  0.23986816 -0.10656738]


In [1]:
# check_overlap.py
from pathlib import Path
ROOT = Path.cwd().parent
EMB_ROOT = ROOT / "embeddings"

splits = {}
for s in ["train","val","test"]:
    p = EMB_ROOT / s
    if not p.exists(): 
        print("Missing", p); continue
    stems = set([f.stem for f in p.glob("*.npy")])
    splits[s] = stems
    print(s, "count:", len(stems))

for a in ["train","val","test"]:
    for b in ["train","val","test"]:
        if a>=b: continue
        inter = splits[a].intersection(splits[b])
        if len(inter)>0:
            print("OVERLAP between", a, b, "count:", len(inter))
            print("Examples:", list(inter)[:10])
        else:
            print("No overlap between", a, b)

train count: 4066
val count: 761
test count: 255
No overlap between train val
No overlap between test train
No overlap between test val


In [6]:
# label_counts.py
import json
from collections import Counter
from pathlib import Path
import sys
import importlib.util

ROOT = Path.cwd().parent
sys.path.insert(0, str(ROOT))

# load train_temporal.py directly to avoid requiring process_data to be a package
tt_path = ROOT / "process_data" / "train_temporal.py"
spec = importlib.util.spec_from_file_location("process_data.train_temporal", str(tt_path))
train_temporal = importlib.util.module_from_spec(spec)
spec.loader.exec_module(train_temporal)
VideoEmbeddingDataset = train_temporal.VideoEmbeddingDataset

LABELS_JSON = ROOT / "data" / "labels.json"
with open(LABELS_JSON) as f:
    labels_map = json.load(f)
from glob import glob
for split in ["train","val","test"]:
    files = [Path("embeddings")/split/p.name for p in (Path("embeddings")/split).glob("*.npy")]
    cnt = Counter()
    for p in files:
        stem = p.stem
        cnt[labels_map.get(stem, "MISSING")] += 1
    print(split, cnt)

from collections import Counter

def check_split(split):
    ds = VideoEmbeddingDataset(split)
    labels = []
    for _, label, _ in ds:
        labels.append(int(label.item()))
    return Counter(labels)

print("Train:", check_split("train"))
print("Val:", check_split("val"))
print("Test:", check_split("test"))

FileNotFoundError: [Errno 2] No such file or directory: 'c:\\Users\\lkmah\\OneDrive\\Desktop\\Lokesh\\VS Code\\DeepFake_Detection_SIC\\process_data\\train_temporal.py'