# FACTR 01 Setup (Test Version)

**Date:** 2025-09-09  
**Purpose:** Prepare Colab runtime with correct environment and GitHub repo access.

Steps:
1. Mount Drive + GitHub repo
2. Install pinned dependencies
3. Smoke test
4. Save environment snapshot
5. Optional: performance tweaks + secrets export

In [2]:
# === FACTR Colab Startup (private repo on Drive, PAT from Secrets) ===
from google.colab import drive, userdata
import os, urllib.parse, shutil

# Mount Drive
drive.mount('/content/drive')

# Repo info
DRIVE_ROOT = "/content/drive/MyDrive"
REPO_DIR   = f"{DRIVE_ROOT}/FATCR"
OWNER_REPO = "LukmaanViscomi/FATCR"
USERNAME   = "LukmaanViscomi"
CLEAN_URL  = f"https://github.com/{OWNER_REPO}.git"

# Git identity (per VM)
!git config --global user.name  "Colab User"
!git config --global user.email "colab@example.com"

# Ensure Drive root
os.makedirs(DRIVE_ROOT, exist_ok=True)
%cd $DRIVE_ROOT

# Clone (private) with PAT from Colab Secrets
pat = userdata.get("GITHUB_PAT")
assert pat, "‚ö†Ô∏è Add your token in Colab Secrets as key 'GITHUB_PAT'."
enc_pat  = urllib.parse.quote(pat, safe="")
enc_user = urllib.parse.quote(USERNAME, safe="")
AUTH_URL = f"https://{enc_user}:{enc_pat}@github.com/{OWNER_REPO}.git"

# Clean partial/broken clone
if os.path.isdir(REPO_DIR) and not os.path.isdir(os.path.join(REPO_DIR, ".git")):
    shutil.rmtree(REPO_DIR)

# Clone if missing
if not os.path.isdir(REPO_DIR):
    !git clone $AUTH_URL FATCR

# Use authenticated remote so pull/push works smoothly
%cd $REPO_DIR
!git remote set-url origin $AUTH_URL
!git pull --ff-only || true

# ‚úÖ Ensure notebooks directory exists
os.makedirs("notebooks", exist_ok=True)

!git status -sb
print("‚úÖ Repo ready:", REPO_DIR)
print("üìÇ notebooks directory ensured at:", os.path.join(REPO_DIR, "notebooks"))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive
/content/drive/MyDrive/FATCR
Already up to date.
## [32mmain[m...[31morigin/main[m
[31m??[m notebooks/
‚úÖ Repo ready: /content/drive/MyDrive/FATCR
üìÇ notebooks directory ensured at: /content/drive/MyDrive/FATCR/notebooks


In [2]:
%%bash
set -euo pipefail
# === Install pinned dependencies ===
pip install -q --upgrade "pip<25.3" wheel

# Baseline scientific stack
pip install -q "numpy==2.0.2" "pandas==2.2.3" "pyarrow>=15,<17" "jedi>=0.16"

# PyTorch trio
pip install -q "torch==2.5.1" "torchvision==0.20.1" "torchaudio==2.5.1"

# ASR stack
pip install -q "faster-whisper==1.1.1" "ctranslate2==4.4.0" "onnxruntime==1.18.1"

# WhisperX code only (avoid deps)
pip install -q --no-deps "git+https://github.com/m-bain/whisperx.git"

# Utilities
pip install -q yt-dlp ffmpeg-python scikit-learn matplotlib

Process is terminated.


In [None]:
# === Smoke test ===
import torch, numpy as np, pandas as pd, importlib, sys
print("Python:", sys.version.split()[0])
print("torch:", torch.__version__, "| cuda:", torch.cuda.is_available())
print("numpy:", np.__version__, "| pandas:", pd.__version__)
assert torch.__version__.startswith("2.5."), f"Unexpected torch: {torch.__version__}"
assert np.__version__.startswith("2.0."), f"Unexpected numpy: {np.__version__}"
assert pd.__version__.startswith("2.2."), f"Unexpected pandas: {pd.__version__}"
for name in ["faster_whisper","ctranslate2","onnxruntime","whisperx","yt_dlp"]:
    m = importlib.import_module(name if name!="yt_dlp" else "yt_dlp")
    print(f"{name}:", getattr(m, "__version__", "git"))
print("‚úÖ Setup smoke test passed.")

In [None]:
# === Environment snapshot ===
import platform, sys, subprocess, json, time, os, torch, pandas as pd, numpy as np
snap = {
  "when": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
  "python": sys.version,
  "platform": platform.platform(),
  "cuda_available": torch.cuda.is_available(),
  "pins": {"torch": torch.__version__, "numpy": np.__version__, "pandas": pd.__version__},
  "pip_freeze_head": subprocess.check_output(["pip","freeze"], text=True).splitlines()[:150],
}
os.makedirs("snapshots", exist_ok=True)
p = f"snapshots/ENV_SETUP_SNAPSHOT_{int(time.time())}.json"
with open(p,"w") as f: json.dump(snap,f,indent=2)
print("üì∏ Saved:", p)

In [None]:
# === Perf knobs + secrets export ===
import os, torch
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
if torch.cuda.is_available():
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True
print("‚úÖ Perf knobs set.")

from google.colab import userdata
api_key = userdata.get("OPENAI_API_KEY")
if api_key:
    os.environ["OPENAI_API_KEY"] = api_key
    print("‚úÖ OPENAI_API_KEY set in env for this runtime.")
else:
    print("‚ÑπÔ∏è No OPENAI_API_KEY in Colab Secrets (that‚Äôs fine until notebook 04).")

In [None]:
# === FACTR push (PAT from Colab Secrets, with checks) ===
from google.colab import userdata
import urllib.parse, os, subprocess, shlex

REPO_DIR = "/content/drive/MyDrive/FATCR"
os.chdir(REPO_DIR)

# Show status first
print("üìÇ Repo status:")
!git status -sb

# Pull latest (rebase) to avoid non-fast-forward errors
print("\nüîÑ Pulling latest (rebase)‚Ä¶")
pat = userdata.get("GITHUB_PAT")
assert pat, "Missing GITHUB_PAT in Colab Secrets."
enc_pat = urllib.parse.quote(pat, safe="")
PULL_URL = f"https://LukmaanViscomi:{enc_pat}@github.com/LukmaanViscomi/FATCR.git"
!git pull --rebase {PULL_URL} main  || true

# Stage only the files we actually track in this repo
!git add notebooks README.md jobs.csv .gitignore .github 2>/dev/null || true

# Commit only if there are changes
changes = subprocess.run(["git","diff","--cached","--quiet"]).returncode != 0
if changes:
    # You can change this default message if you like:
    msg = "Update notebooks/configs from Colab"
    print("\n‚úèÔ∏è Commit message:", msg)
    !git commit -m "{msg}"
else:
    print("\n‚ÑπÔ∏è Nothing staged to commit.")

# Push (inject PAT only for the network call)
print("\n‚¨ÜÔ∏è Pushing to main‚Ä¶")
PUSH_URL = PULL_URL
!git push {PUSH_URL} HEAD:main

print("\n‚úÖ Push complete.")

