In [None]:
# ONE-CLICK SETUP & DRY-RUN (run this cell)
# This cell will:
#  - install core packages (quietly),
#  - optionally mount Google Drive,
#  - prompt for HF_TOKEN and GITHUB_TOKEN,
#  - allow you to upload kaggle.json,
#  - set RUN_ON_COLAB=1 and DRY_RUN=1 and run a safe dry-run validation of the main script.

import os, sys, getpass, subprocess
print('One-Click Setup starting...')

# 1) Install core packages (silent)
pkgs = ['qdrant-client','sentence-transformers','transformers','datasets','kaggle','huggingface_hub','torch','torchvision','scikit-learn','imgaug']
missing = []
for p in pkgs:
    try:
        __import__(p if p != 'scikit-learn' else 'sklearn')
    except Exception:
        missing.append(p)
if missing:
    print('Installing:', missing)
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q'] + missing)
else:
    print('All required packages already installed')

# 2) Optional: mount Google Drive
try:
    from google.colab import drive, files
    mount = input('Mount Google Drive to persist results? (Y/n): ').strip().lower() or 'y'
    if mount == 'y':
        drive.mount('/content/drive')
        os.makedirs('/content/drive/MyDrive/FarmFederate-results', exist_ok=True)
        print('Drive mounted: /content/drive/MyDrive/FarmFederate-results')
except Exception:
    print('Not running in Colab or Drive API not available; skipping Drive mount')

# 3) Upload kaggle.json (optional)
try:
    from google.colab import files
    print('\nUpload kaggle.json now (or press Enter to skip)')
    uploaded = files.upload()
    if uploaded:
        kaggle_dir = '/root/.kaggle'
        os.makedirs(kaggle_dir, exist_ok=True)
        for fn, data in uploaded.items():
            open(os.path.join(kaggle_dir, 'kaggle.json'), 'wb').write(data)
        try:
            os.chmod(os.path.join(kaggle_dir, 'kaggle.json'), 0o600)
        except Exception:
            pass
        print('Saved kaggle.json to ~/.kaggle/kaggle.json')
    else:
        print('No kaggle.json uploaded')
except Exception:
    print('Upload not available (not in Colab). Use env vars KAGGLE_USERNAME/KAGGLE_KEY instead.')

# 4) Prompt for tokens
hf = getpass.getpass('Enter HF_TOKEN (leave blank to skip): ')
if hf:
    os.environ['HF_TOKEN'] = hf
    print('HF_TOKEN set')
gh = getpass.getpass('Enter GITHUB_TOKEN (leave blank to skip): ')
if gh:
    os.environ['GITHUB_TOKEN'] = gh
    print('GITHUB_TOKEN set')

# 5) Run dry-run validation
os.environ['RUN_ON_COLAB'] = '1'
os.environ['DRY_RUN'] = '1'
print('\nRunning dry-run validation (DRY_RUN=1) — this will not download large datasets')
subprocess.run([sys.executable, '-u', 'FarmFederate_Kaggle_Complete.py', '--dry-run'], check=False)
print('\nDry-run completed. Inspect results/dataset_discovery_manifest.json and results/run_status.json for any issues.')

# 6) Offer to proceed to full run
cont = input('\nProceed to full run now (this will download datasets and may take a long time)? (y/N): ').strip().lower() or 'n'
if cont == 'y':
    os.environ['DRY_RUN'] = '0'
    print('Starting full run (logs will stream to results/colab_full_run.log)')
    subprocess.run([sys.executable, '-u', 'FarmFederate_Kaggle_Complete.py'], check=False)
else:
    print('Full run skipped. You can run the full run later by setting DRY_RUN=0 and executing the script')


# FarmFederate Colab Launcher & README

**Quick start (recommended):**
1. Open this notebook in Colab: 
   https://colab.research.google.com/github/Solventerritory/FarmFederate-Advisor/blob/feature/multimodal-work/colab_run_farmfederate.ipynb
2. Runtime → Change runtime type → select GPU (T4 or P100 recommended).
3. Run the **One-Click Setup & Dry-Run** cell (next). It will install packages, prompt for tokens, optionally mount Drive, and run a safe DRY-RUN of the pipeline.

Important notes:
- Provide `kaggle.json` (upload) or set `KAGGLE_USERNAME` and `KAGGLE_KEY` env vars for Kaggle downloads.
- Provide `HF_TOKEN` (Hugging Face) and `GITHUB_TOKEN` if you need access to private HF/GitHub datasets.
- The full run may be long and download many datasets; use `--max-files` / `--max-text` limits or run a subset first.

If you want me to create a public Colab share link (hosted notebook) or add a short README file to the repo, say so and I'll add it.

# FarmFederate — Colab Run Notebook

This notebook prepares a Colab environment to run `FarmFederate_Kaggle_Complete.py` end-to-end (dry-run validation, full run, ingestion, RAG test, and result sync).

Important:
- Use a GPU runtime (Runtime → Change runtime type → GPU).
- Upload `kaggle.json` or set `KAGGLE_USERNAME`/`KAGGLE_KEY` before running ingestion steps.
- Provide `HF_TOKEN` and `GITHUB_TOKEN` when prompted for private HuggingFace/GitHub access.

Follow cells in order and inspect logs in `results/`.


In [None]:
# 1) Runtime & GPU check
import sys, platform, subprocess, os
import torch
print('Python', sys.version)
print('Platform', platform.platform())
print('Torch', torch.__version__)
print('CUDA available:', torch.cuda.is_available())
if torch.cuda.is_available():
    try:
        !nvidia-smi
    except Exception as e:
        print('nvidia-smi failed:', e)
# Memory / disk
try:
    import psutil
    vm = psutil.virtual_memory()
    print(f'RAM: {vm.total/1e9:.1f} GB, Available: {vm.available/1e9:.1f} GB')
except Exception:
    pass
print('Disk usage:')
!df -h | sed -n '1,10p'


In [None]:
# 2) Install Python dependencies (run once)
# Installs are quiet to keep output compact. Adjust packages as needed.
!pip install -q qdrant-client sentence-transformers transformers datasets kaggle huggingface_hub torch torchvision scikit-learn imgaug
print('Installed core packages (or they were already present).')


In [None]:
# 3) Mount Google Drive (optional)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    drive_root = '/content/drive/MyDrive/FarmFederate-results'
    import os
    os.makedirs(drive_root, exist_ok=True)
    print('Drive mounted, results will be copied to', drive_root)
except Exception as e:
    print('Google Drive not available or not running in Colab:', e)


In [None]:
# 4) Clone repository & checkout branch
import os
if not os.path.exists('FarmFederate-Advisor'):
    !git clone https://github.com/Solventerritory/FarmFederate-Advisor.git
%cd FarmFederate-Advisor
!git fetch --all --quiet
!git checkout feature/multimodal-work || true
!git pull --quiet
# Verify the main script exists
print('\nListing root files:')
!ls -la | sed -n '1,200p'
print('\nChecking for FarmFederate_Kaggle_Complete.py:')
!ls -la FarmFederate_Kaggle_Complete.py || true


In [None]:
# 5) Upload kaggle.json or set Kaggle env vars
# Use the file upload UI to securely set kaggle.json when running interactively in Colab
try:
    from google.colab import files
    print('Upload kaggle.json now (or cancel to use env vars)')
    uploaded = files.upload()
    if uploaded:
        kaggle_dir = '/root/.kaggle'
        import os
        os.makedirs(kaggle_dir, exist_ok=True)
        for fn, data in uploaded.items():
            open(os.path.join(kaggle_dir, 'kaggle.json'), 'wb').write(data)
        try:
            os.chmod(os.path.join(kaggle_dir, 'kaggle.json'), 0o600)
        except Exception:
            pass
        print('Saved kaggle.json to ~/.kaggle/kaggle.json')
    else:
        print('No kaggle.json uploaded. You can set KAGGLE_USERNAME and KAGGLE_KEY as env vars.')
except Exception as e:
    print('files.upload not available (not in Colab). Set KAGGLE_USERNAME/KAGGLE_KEY env vars manually.')

# Example: set env vars manually (uncomment to use)
# import os
# os.environ['KAGGLE_USERNAME'] = 'YOUR_USERNAME'
# os.environ['KAGGLE_KEY'] = 'YOUR_KEY'


In [None]:
# 6) Securely set HF_TOKEN and GITHUB_TOKEN (interactive)
import os, getpass
hf = getpass.getpass('Enter HuggingFace HF_TOKEN (leave blank to skip): ')
if hf:
    os.environ['HF_TOKEN'] = hf
    print('HF_TOKEN set')
gh = getpass.getpass('Enter GITHUB_TOKEN (leave blank to skip): ')
if gh:
    os.environ['GITHUB_TOKEN'] = gh
    print('GITHUB_TOKEN set')
print('\nNote: these values are stored in the kernel environment only; do not print tokens.')


In [None]:
# 7) Confirm FarmFederate_Kaggle_Complete.py is ready
import os
script = 'FarmFederate_Kaggle_Complete.py'
if not os.path.exists(script):
    print(f"{script} not found in repo root. Check that you've cloned the repo and are in the correct directory.")
else:
    print(f"Found {script} (size: {os.path.getsize(script)} bytes). Ready to run dry-run validation.")
# Optionally display first 200 lines for quick inspection
try:
    with open(script, 'r', encoding='utf-8') as fh:
        print('--- top of script ---')
        for i, line in enumerate(fh):
            if i >= 200: break
            print(line.rstrip())
except Exception as e:
    print('Could not open script file:', e)


In [None]:
# 8) Dry-run validation (--dry-run)
import os
os.environ['DRY_RUN'] = '1'
print('Starting dry-run validation: this will not download large datasets (DRY_RUN=1)')
!python FarmFederate_Kaggle_Complete.py --dry-run 2>&1 | tee results/dry_run.log
print('\nLast 60 lines of dry-run log:')
!tail -n 60 results/dry_run.log || true


In [None]:
# 9) Inspect dataset discovery & run status reports
import json, os
for fname in ['results/run_status.json','results/dataset_discovery_manifest.json','datasets_report.json','results/colab_full_run.log']:
    if os.path.exists(fname):
        print('\n---', fname, '---')
        try:
            if fname.endswith('.json'):
                print(json.dumps(json.load(open(fname, 'r')), indent=2)[:2000])
            else:
                print('\n'.join(open(fname,'r').read().splitlines()[-50:]))
        except Exception as e:
            print('Could not pretty-print', fname, e)
    else:
        print(fname, 'not found yet')


In [None]:
# 10) Enable full run & start script (WARNING: long-running)
import os
os.environ['RUN_ON_COLAB'] = '1'
os.environ['DRY_RUN'] = '0'
os.environ['CLONE_GITHUB_REPOS'] = '1'
print('Starting full run. Output is streamed to results/colab_full_run.log')
!python -u FarmFederate_Kaggle_Complete.py 2>&1 | tee results/colab_full_run.log


In [None]:
# 11) Tail logs (run in a separate cell to monitor progress)
print('Tail the last 200 lines of the live log')
!tail -n 200 results/colab_full_run.log || true


In [None]:
# 12) Sync results & checkpoints to Drive (if mounted)
try:
    import shutil, os
    drive_root = '/content/drive/MyDrive/FarmFederate-results'
    if os.path.exists(drive_root):
        print('Copying results/ ->', drive_root)
        shutil.copytree('results', os.path.join(drive_root, 'results'), dirs_exist_ok=True)
        if os.path.exists('checkpoints'):
            shutil.copytree('checkpoints', os.path.join(drive_root, 'checkpoints'), dirs_exist_ok=True)
        print('Sync complete')
    else:
        print('Drive path not present. Make sure you mounted Drive and created the folder at', drive_root)
except Exception as e:
    print('Sync failed:', e)


In [None]:
# 13) Quick RAG / Qdrant test snippet (optional)
try:
    from qdrant_client import QdrantClient
    from backend.qdrant_rag import init_qdrant_collections, agentic_diagnose, Embedders
    from PIL import Image
    print('Initializing in-memory Qdrant...')
    client = QdrantClient(':memory:')
    init_qdrant_collections(client, recreate=True)
    emb = Embedders()
    # Simple empty search demo
    test_img = Image.new('RGB', (224,224), color='green')
    res = agentic_diagnose(client, image=test_img, user_description='Yellow spots on maize leaf', emb=emb, llm_func=lambda prompt: 'Mock LLM response: advice')
    print('Retrieved:', len(res.get('retrieved', [])))
    print('Prompt preview:')
    print(res['prompt'][:800])
except Exception as e:
    print('RAG test skipped or failed (missing optional deps):', e)


In [None]:
# 14) Visualize saved plots & read reports
import os
from IPython.display import display, Image as IPyImage
print('Plots directory listing:')
for f in sorted(os.listdir('plots') if os.path.exists('plots') else [] )[:50]:
    print(' -', f)
# Display a sample plot if present
sample = None
if os.path.exists('plots'):
    files = [os.path.join('plots', f) for f in os.listdir('plots') if f.lower().endswith(('.png','.jpg'))]
    if files:
        sample = files[0]
if sample:
    print('Displaying sample plot:', sample)
    display(IPyImage(sample))
else:
    print('No plot images found yet (run full experiment to generate plots)')

# Print brief JSON summaries
for fname in ['results/complete_results.json','results/epoch_sweep_results.json']:
    if os.path.exists(fname):
        import json
        print('\n---', fname, '---')
        try:
            d = json.load(open(fname,'r'))
            print('Keys:', list(d.keys())[:20])
        except Exception as e:
            print('Error reading', fname, e)
    else:
        print(fname, 'not found yet')


In [None]:
# 15) Compress final artifacts and offer for download (optional)
!zip -r -q farmfederate_results.zip results plots checkpoints || true
print('Created farmfederate_results.zip (if any artifacts present).')
try:
    from google.colab import files
    files.download('farmfederate_results.zip')
except Exception:
    print('files.download not available; copy the zip from the notebook workspace or sync to Drive')
