In [2]:
# JupyterLab cell (run this in your notebook)

from pathlib import Path

# ---- settings ----
folder = Path(".")          # same folder as your notebook; change if needed
pattern = "*.data"          # change to whatever matches your files, e.g. "*.atom" or "sio2_*.data"
cut_from_line = 4626        # 1-indexed line number: keep lines 1..4625, delete 4626+

dry_run = False              # set False to actually overwrite files
backup = False             # keep a .bak copy before overwriting

# ---- helper ----
def truncate_file_at_line(path: Path, cut_from: int, make_backup: bool = True):
    # keep lines 1..(cut_from-1)
    keep_n = cut_from - 1
    bak_path = path.with_suffix(path.suffix + ".bak")

    with path.open("r", encoding="utf-8", errors="replace") as f:
        kept_lines = []
        for i, line in enumerate(f, start=1):
            if i <= keep_n:
                kept_lines.append(line)
            else:
                break

    # If file is shorter than cut point, do nothing
    # (we detect this by reading through without breaking)
    # Easiest: count total lines quickly only if needed; but we can infer here:
    # if we never broke early, file ended before cut point.
    # We'll check by trying to read one more line:
    with path.open("r", encoding="utf-8", errors="replace") as f:
        for _ in range(keep_n):
            if not f.readline():
                # file ended before keep_n
                return "shorter_than_cut", 0
        # attempt to read one more line: if none, then exactly keep_n lines
        if not f.readline():
            return "shorter_than_cut", 0

    if make_backup:
        bak_path.write_text(path.read_text(encoding="utf-8", errors="replace"),
                            encoding="utf-8", errors="replace")

    path.write_text("".join(kept_lines), encoding="utf-8", errors="replace")
    return "truncated", len(kept_lines)

# ---- run ----
files = sorted(folder.glob(pattern))
if not files:
    print(f"No files matched {pattern!r} in {folder.resolve()}")
else:
    print(f"Matched {len(files)} files in {folder.resolve()}")
    for p in files:
        if dry_run:
            # Just report current line count and what would happen
            # (counting lines once per file)
            with p.open("r", encoding="utf-8", errors="replace") as f:
                nlines = sum(1 for _ in f)
            action = "WOULD TRUNCATE" if nlines >= cut_from_line else "SKIP (shorter)"
            print(f"{action}: {p.name}  (lines={nlines}, keep={cut_from_line-1})")
        else:
            status, kept = truncate_file_at_line(p, cut_from_line, make_backup=backup)
            print(f"{status.upper()}: {p.name}  (kept_lines={kept})")

    if dry_run:
        print("\nDry run only. Set dry_run=False to overwrite files.")

Matched 30 files in /mnt/c/Users/sutto/desktop/SiO2_MQ_5000atom_1012cr_tersoff/sim1/sio2_tersoff_endstructures
TRUNCATED: sio2_endstructure (1).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (10).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (11).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (12).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (13).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (14).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (15).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (16).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (17).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (18).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (19).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (2).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (20).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (21).data  (kept_lines=4625)
TRUNCATED: sio2_endstructure (22).data  (kept_lines=4625)
TRUNCATED: sio2_endst