In [None]:
# Carrot Detection Pipeline – CSF Internship Assessment

#**Autor:** Stephanny Gabriela Sanchez Bautista 
#**Fecha:** 8/06/2025  
#**Objetivo:** Simulate a video processing pipeline using a folder of images to detect carrots. 
            #Additionally, demonstrate understanding of classification through a simple ML demo.


In [None]:
#As an AI Development Intern at the CSF, you will be tasked with developing pipelines.
#Please answer the following questions in a jupyter notebook. 
#You should only spend about two hours on these questions. If you want to spend more you can. 
#If you've run out of time, please describe thoroughly what remains to be done and how you would accomplish it

In [None]:
#Task 1: Processing Video Pipeline
#Assume you have a working ML model that can process individual images and identify carrots, 
#how would you adapt that model such that you could feed it live video inside a grocery store
#and have it create a record of any carrots it sees.

In [None]:
#Answer 1:
#If I want to adapt a carrot detection model to process a real-time video from a supermarket:
#We trained a roboflow model with Yolv5 in Roboflow platform to detect carrots and we will use the api key with 170 images of carrots
#The first step is to extract each frame from the video as an image, I would used 1 second dataframes. 
#The second step is to pass each frame through the detection model to determine whether a carrot appears in that specific image. 
#Third, whenever the model detects a carrot, it will extract the position within the frame, the exact timestamp, and the confidence score indicating how likely it is to be a carrot.{
#This information should be saved in a .csv file. The entire process will run in continuous cycles, allowing real-time visualization of detected carrots as customers move around the store.

In [None]:
#Demo: 
#Write a toy implementation of whatever machine learning concept you would like in order to demonstrate your skills.
#The problems we work on are wholly related to classfication, so your toy implementation should show knowledge of the fundamentals of classification problems.

In [None]:
# Install dependencies and download Yolov5
#Execute this only one time
#We are using Yolov5 trained in the Roboflow platform
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
!pip install -r requirements.txt


In [None]:
#We instaled the Roboflow library and 
# Install Roboflow library
!pip install roboflow


In [11]:
#Upload the video in the main
#This code extract one frame every second from the video

import cv2
import os
from datetime import timedelta

video_path = "stephy_carrot_2.mp4"  # Path to your video
output_folder = "video_frames_1fps"
os.makedirs(output_folder, exist_ok=True)

cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
frame_id = 0
saved = 0
frame_info = []

while True:
    ret, frame = cap.read()
    if not ret:
        break

    if frame_id % int(fps) == 0:
        timestamp = str(timedelta(seconds=frame_id / fps))
        filename = f"frame_{frame_id:05d}.jpg"
        full_path = os.path.join(output_folder, filename)
        cv2.imwrite(full_path, frame)

        frame_info.append({
            "frame_id": frame_id,
            "timestamp": timestamp,
            "filename": filename,
            "path": full_path
        })
        saved += 1

    frame_id += 1

cap.release()
print(f"✅ {saved} frames saved to '{output_folder}'")


✅ 12 frames saved to 'video_frames_1fps'


In [12]:
#Test every frame from the video with the Roboflow model
#The API_KEY that I have used is XW0aGMAI2SwH3ZRPEdt2
import requests
import pandas as pd

API_KEY = "XW0aGMAI2SwH3ZRPEdt2"  # Replace with your actual Roboflow key in the line 2
MODEL_ID = "carrot-dt8zs-5efjd"
VERSION = 1

classification_log = []

for entry in frame_info:
    # Get image size to calculate area
    img = cv2.imread(entry["path"])
    if img is None:
        continue
    h, w = img.shape[:2]
    frame_area = w * h

    with open(entry["path"], "rb") as f:
        response = requests.post(
            f"https://detect.roboflow.com/{MODEL_ID}/{VERSION}?api_key={API_KEY}&confidence=10",
            files={"file": f}
        )

    if response.status_code != 200:
        print(f"❌ Failed: {entry['filename']}")
        continue

    result = response.json()
    detections = result.get("predictions", [])

    # Calculate total area covered by all bounding boxes
    total_carrot_area = sum([d["width"] * d["height"] for d in detections])
    carrot_ratio = total_carrot_area / frame_area if frame_area > 0 else 0

    # Decide prediction based on 50% threshold
    prediction = 1 if carrot_ratio >= 0.2 else 0

    classification_log.append({
        "frame_id": entry["frame_id"],
        "timestamp": entry["timestamp"],
        "filename": entry["filename"],
        "prediction": prediction,
        "carrot_ratio": round(carrot_ratio, 4),
        "confidence": max([d["confidence"] for d in detections], default=None),
        "bbox_count": len(detections)
    })

print("✅ Classification complete.") #You know if the clasification model is working


✅ Classification complete.


In [14]:
# Save the predictions on a .ccv field
# Save to CSV
df = pd.DataFrame(classification_log)
df.to_csv("carrot_area_based_classification.csv", index=False)

# Load and label
df["prediction"] = df["prediction"].map({1: "Carrot", 0: "Not-Carrot"})

# Show all results
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
df

#The carrot_ratio determines if the carrot is in the frame and prediction determines if carrot_ratio is more than 0.3

Unnamed: 0,frame_id,timestamp,filename,prediction,carrot_ratio,confidence,bbox_count
0,0,0:00:00,frame_00000.jpg,Not-Carrot,0.1064,0.927071,1
1,15,0:00:00.999418,frame_00015.jpg,Not-Carrot,0.0952,0.949548,1
2,30,0:00:01.998836,frame_00030.jpg,Not-Carrot,0.0612,0.632271,1
3,45,0:00:02.998254,frame_00045.jpg,Not-Carrot,0.1842,0.738772,2
4,60,0:00:03.997673,frame_00060.jpg,Carrot,0.3501,0.848164,3
5,75,0:00:04.997091,frame_00075.jpg,Carrot,0.3805,0.846088,3
6,90,0:00:05.996509,frame_00090.jpg,Not-Carrot,0.1139,0.82861,2
7,105,0:00:06.995927,frame_00105.jpg,Not-Carrot,0.1198,0.473713,1
8,120,0:00:07.995345,frame_00120.jpg,Not-Carrot,0.0896,0.88866,2
9,135,0:00:08.994763,frame_00135.jpg,Not-Carrot,0.0799,0.748456,2
