# Kisaan Colab Training (GPU)
Use this notebook on Google Colab to fine-tune the Topic and Sub-topic heads with PEFT/LoRA, then run inference on the combined CSV. Run cells top-to-bottom.

In [None]:
# 1. Configure project paths
from pathlib import Path
import os
import json
PROJECT_DIR = Path("/content/Kisaan")  # path after cloning the GitHub repo
DATASET_PATH = PROJECT_DIR / "Datasets" / "KCC_MarMay2025_combined.csv"
MODELS_DIR = PROJECT_DIR / "models"
PROCESSED_DIR = PROJECT_DIR / "Datasets" / "processed"
print("Project directory:", PROJECT_DIR)
print("Dataset path:", DATASET_PATH)

In [None]:
# 2. (Optional) Mount Google Drive for long-term storage
IN_COLAB = False
try:
    import google.colab  # type: ignore
    IN_COLAB = True
except Exception:
    pass
print({"IN_COLAB": IN_COLAB})
if IN_COLAB:
    from google.colab import drive  # type: ignore
    drive.mount("/content/drive", force_remount=False)
    DRIVE_PROJECT_DIR = Path("/content/drive/MyDrive/Kisaan")
    print("Drive project dir:", DRIVE_PROJECT_DIR)
else:
    print("Not running inside Colab; skipping Drive mount.")

In [None]:
# 3. Clone the GitHub repository if not present
import subprocess
import sys
if not PROJECT_DIR.exists():
    subprocess.run(["git", "clone", "https://github.com/7009soham/Kisaan.git", str(PROJECT_DIR)], check=True)
else:
    print("Repository already present at", PROJECT_DIR)
%cd {PROJECT_DIR}
!git status -sb

In [None]:
# 4. Install dependencies (ensures recent Transformers/PEFT)
!pip install --quiet --upgrade \
    transformers==4.46.1 \
    datasets==3.0.1 \
    accelerate==1.0.1 \
    peft==0.13.2 \
    evaluate==0.4.2 \
    sentencepiece==0.1.99 \
    scikit-learn==1.5.2 \
    pandas==2.2.3 \
    numpy==1.26.4 \
    tqdm==4.66.5 \
    pyarrow==16.1.0 \
    matplotlib==3.9.2 \
    seaborn==0.13.2

In [None]:
# 5. Validate dataset paths
assert DATASET_PATH.exists(), f"Dataset not found: {DATASET_PATH}"
print("Dataset rows:", sum(1 for _ in open(DATASET_PATH, encoding="utf-8-sig")) - 1)
print("Ready to preprocess labels.")