In [1]:
# Run this cell
try:
    import google.colab
    print("CONNECTED to Google Colab!")
    
    # Check for GPU
    !nvidia-smi
except ImportError:
    print("NOT Connected. You are running on your local computer.")

CONNECTED to Google Colab!
Fri Dec  5 13:42:57 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   69C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                     

In [2]:
%pip install "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git" -q

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [3]:
%pip install transformers accelerate datasets -q

In [4]:
import torch
from unsloth import FastLanguageModel

max_seq_length=2048
load_in_4bit=True
dtype=None

model_name="unsloth/llama-3.1-8b-unsloth-bnb-4bit"

model,tokenizer=FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    load_in_4bit=load_in_4bit,
    dtype=dtype,
)
print(f"model is on this device:{model.device}")
FastLanguageModel.for_inference(model)

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.11.6: Fast Llama patching. Transformers: 4.57.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

model is on this device:cuda:0


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
    (layers): ModuleList(
      (0): LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRM

In [5]:
def call_llm(prompt: str, max_new_tokens:int=512) -> str:
    input_ids=tokenizer(
        prompt,
        return_tensors="pt",
    ).to (model.device)

    with torch.no_grad():
        outputs=model.generate(
            **input_ids,
            max_new_tokens=max_new_tokens,
            do_sample=False,
        )

    full_text=tokenizer.decode(outputs[0],skip_special_tokens=True)

    return full_text

In [6]:
print(call_llm("In one sentence,explain what YOLO does in computer vision."))

In one sentence,explain what YOLO does in computer vision. What is the difference between YOLO and R-CNN?
YOLO is a deep learning algorithm that is used for object detection in images. It is a single-shot detector, meaning that it can detect objects in an image in a single pass through the network, without the need for multiple passes or iterations. This makes it faster and more efficient than other object detection algorithms.
YOLO is based on the idea of predicting the bounding boxes and class probabilities for all objects in an image at once, rather than predicting the bounding boxes and class probabilities for each object separately. This allows YOLO to be more efficient and faster than other object detection algorithms.
The main difference between YOLO and R-CNN is that YOLO is a single-shot detector, while R-CNN is a multi-stage detector. R-CNN uses a two-stage approach, where it first proposes regions of interest (ROIs) and then classifies them. YOLO, on the other hand, uses a s

In [7]:
def dataset_prompt(task_description: str) -> str:
    return f"""
You are an expert computer vision data scientist.

Task:
{task_description}

Design a DATASET PLAN for this task. Include:

1. Classes (with short explanations).
2. Recommended data sources (e.g., Roboflow keywords or types of images).
3. Approximate number of images per class for an MVP and for a stronger model.
4. Train/validation/test split percentages.
5. Recommended data augmentations (and why).
6. Notes specific to CCTV-style footage and phone usage.

Format your answer as clear Markdown with headings and bullet points.
"""


def yolo_config_prompt(task_description: str) -> str:
    return f"""
You are a senior YOLO engineer.

Task:
{task_description}

Design a YOLO training CONFIGURATION including:

1. Model variant (e.g., 'yolov8s', 'yolov8m') and justify the choice.
2. Image size (imgsz).
3. Batch size for a Colab T4 GPU.
4. Number of epochs for a first run, and for a more refined run.
5. Important hyperparameters (learning rate, optimizer, augmentations settings).
6. Any recommended YOLO training tricks (cosine LR, warmup, etc.).

Output:
First, give a YAML-style block with keys like:
- model
- imgsz
- batch
- epochs
- lr0
- optimizer
etc.

Then give a short explanation in Markdown.
"""


def training_script_prompt(task_description: str) -> str:
    return f"""
You are a Python + ML engineer.

Task:
{task_description}

Write a PYTHON TRAINING SCRIPT OUTLINE for training a YOLO model on a custom dataset in YOLO format on Google Colab.

Requirements:
- Assume we use the 'ultralytics' package.
- Assume a dataset YAML path: './data.yaml'.
- The script should:
  - Set model name (e.g. 'yolov8s.pt') via a variable.
  - Define basic arguments: epochs, imgsz, batch.
  - Load the model.
  - Train the model.
  - Save best weights.
  - Run validation and print mAP.
- This is an outline, not perfect final code, but should be runnable with small edits.

Return ONLY plain Python code (no backticks, no Markdown).
"""


def onnx_tensorrt_prompt(task_description: str) -> str:
    return f"""
You are an NVIDIA TensorRT engineer.

Task:
{task_description}

Design an ONNX + TensorRT OPTIMIZATION PLAN for a trained YOLO model.

Include:

1. How to export the YOLO model to ONNX (CLI or Python).
2. How to convert the ONNX model to a TensorRT engine (e.g., using 'trtexec').
3. How to benchmark:
   - Latency per frame (ms)
   - FPS
   - GPU memory usage
4. How to use 'nvidia-smi' to monitor GPU utilization while running inference.
5. Suggestions for FP16 / INT8 optimization tradeoffs.

Format as Markdown with numbered steps, code blocks (CLI or Python), and bullet points.
"""


def readme_prompt(task_description: str, project_name: str) -> str:
    return f"""
You are a senior MLOps engineer writing a GitHub README for a portfolio project.

Project name: {project_name}

Task:
{task_description}

Write a README.md including:

1. Title and one-line description.
2. Problem statement and why this CV task matters.
3. High-level architecture as a bullet flow.
4. Tech stack (YOLO, ONNX, TensorRT, Unsloth LLM Planner).
5. How the AI CV Auto-Agent works:
   - input (natural language task)
   - planner (LLM)
   - generated artifacts (dataset plan, config, script, optimization plan)
   - user training & deployment.
6. How to:
   - Set up environment (Colab).
   - Run the planner to generate a project.
   - Train the YOLO model using the generated script.
   - (Optionally) run ONNX/TensorRT optimization.
7. Section for 'Example Task: Phone Usage Detection in CCTV'.
8. Future improvements (better planner, more tasks, automatic training, etc).

Format in Markdown with headings.
"""


In [8]:
from dataclasses import dataclass

@dataclass
class GeneratedProject:
    dataset_plan: str
    yolo_config: str
    training_code: str
    onnx_tensorrt_plan: str
    readme_md: str


def generate_project_artifacts(task_description: str, project_name: str) -> GeneratedProject:
    print("Generating dataset plan...")
    ds = call_llm(dataset_prompt(task_description))

    print("Generating YOLO config...")
    yc = call_llm(yolo_config_prompt(task_description))

    print("Generating training script outline...")
    tc = call_llm(training_script_prompt(task_description), max_new_tokens=700)

    print("Generating ONNX + TensorRT optimization plan...")
    ot = call_llm(onnx_tensorrt_prompt(task_description), max_new_tokens=700)

    print("Generating README...")
    rm = call_llm(readme_prompt(task_description, project_name), max_new_tokens=700)

    return GeneratedProject(
        dataset_plan=ds,
        yolo_config=yc,
        training_code=tc,
        onnx_tensorrt_plan=ot,
        readme_md=rm,
    )


In [9]:
import os
from pathlib import Path
import re

BASE_DIR = Path.cwd()
PROJECTS_DIR = BASE_DIR / "generated_projects"
PROJECTS_DIR.mkdir(exist_ok=True)

def slugify(text: str) -> str:
    text = text.lower().strip()
    text = re.sub(r"[^a-z0-9]+", "-", text)
    text = re.sub(r"-+", "-", text)
    return text.strip("-")


task_description = "Detect phone usage (person using a mobile phone) in indoor CCTV footage, such as offices or factories."
project_name = "Phone Usage Detection in CCTV"

artifacts = generate_project_artifacts(task_description, project_name)

slug = slugify(project_name)
project_path = PROJECTS_DIR / slug
project_path.mkdir(exist_ok=True)

# Write files
(project_path / "DATASET_PLAN.md").write_text(artifacts.dataset_plan, encoding="utf-8")
(project_path / "yolo_config.yaml").write_text(artifacts.yolo_config, encoding="utf-8")
(project_path / "train_yolo.py").write_text(artifacts.training_code, encoding="utf-8")
(project_path / "ONNX_TENSORRT_PLAN.md").write_text(artifacts.onnx_tensorrt_plan, encoding="utf-8")
(project_path / "README.md").write_text(artifacts.readme_md, encoding="utf-8")

print("Generated project at:", project_path)
print("Files:")
for f in project_path.iterdir():
    print(" -", f.name)


Generating dataset plan...
Generating YOLO config...
Generating training script outline...
Generating ONNX + TensorRT optimization plan...
Generating README...
Generated project at: /content/generated_projects/phone-usage-detection-in-cctv
Files:
 - train_yolo.py
 - yolo_config.yaml
 - ONNX_TENSORRT_PLAN.md
 - README.md
 - DATASET_PLAN.md


In [None]:
import os
print(os.getcwd())
