# Kudzu Object Detection
### Detect Kudzu Plant from Object Detection

## 0. Check for GPU  
This command verifies if an NVIDIA GPU is available for faster training.

In [None]:
!nvidia-smi

## 1. Install libraries

In [None]:
!pip install ultralytics roboflow

In [None]:
from ultralytics import YOLO
import os
from IPython.display import display, Image
from IPython import display
import os 
import shutil
from math import ceil
import subprocess
import json
display.clear_output()
!yolo checks


## 2. Load Dataset

Download dataset from Roboflow using API key.

In [None]:
from roboflow import Roboflow
rf = Roboflow(api_key="YOUR_API_KEY_GOES_HERE")
project = rf.workspace("test-mhm3s").project("kudzu_full_images")
version = project.version(3)
dataset = version.download("yolov8")         

## 3. Train Model
Train YOLOv8 with our costum kudzu dataset from Roboflow.

In [None]:
!yolo task=detect mode=train model=yolov8s.pt data={dataset.location}/data.yaml epochs=50 imgsz=512 lr0 = 0.001


Add the path after "Results saved to" here:

In [None]:
results_folder = '/home/student/Desktop/runs/detect/train'

#### Confusion Matrix

In [None]:
filename = 'confusion_matrix.png'
confusion_matrix = os.path.join(results_folder, filename)

In [None]:
Image(filename=confusion_matrix, width=600)

#### Train Results

In [None]:
filename = 'results.png'
results = os.path.join(results_folder, filename)

In [None]:
Image(filename=results, width=600)

## 4. Validation

### 4.1 Validate the Base YOLOv8 Model (Pretrained)
This runs validation using the original YOLOv8 Small (yolov8s.pt) without fine-tuning on kudzu images.

In [None]:
!yolo task=detect mode=val model=yolov8s.pt data={dataset.location}/data.yaml

### 4.2 Validate the Fine-Tuned Kudzu Model
This runs validation using the model you trained specifically for kudzu detection.  
Get the `best.pt` file from the subfolder named `/weights` inside the folder were results saved to.

In [None]:
!yolo task=detect mode=val model=/home/student/Desktop/runs/detect/train16/weights/best.pt data={dataset.location}/data.yaml

## 5. Predict Images

##### Folder Containing GSV Images to Detect Kudzu

In [None]:
# This is the folder where you cloned the GitHub repository.
your_directory = "YOUR_BASE_DIRECTORY_GOES_HERE"

# Join paths
source_folder = os.path.join(your_directory, 'output')
print("Joined path:", source_folder)

# Alert if source folder doesn't exists
if not os.path.exists(source_folder):
    print(f'{source_folder} does not exist')

In [None]:
# Make sure this is the folder where you saved the images downloaded in GSV.
print(source_folder)

##### Get all the GSV images 

In [None]:
image_files = [f for f in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, f))]

##### Define all the paths, variables and log files.

In [None]:
# This is the folder where your weights are saved after training.
model_path = "/home/student/Desktop/runs/detect/train/weights/best.pt"

# Modify this threshold
conf_threshold = 0.5 

# Log files
processed_log = "processed_images_v1.txt"  # Log file to track processed images
detections_log = "detections_above_threshold_v1.txt"  # Log file to track images with detections above threshold

##### Log Functions 

In [None]:
# Function to load already processed images from log file
def load_processed_images(log_file):
    if os.path.exists(log_file):
        with open(log_file, 'r') as f:
            return set(line.strip() for line in f)
    return set()

def count_processed_images(processed_set):
    return len(processed_set)
    
# Function to append new processed image to log file
def log_processed_image(log_file, image_path):
    with open(log_file, 'a') as f:
        f.write(f"{image_path}\n")


#### Image Predictions

In [None]:
# Load the list of already processed images
processed_images = load_processed_images(processed_log)
processed_count = count_processed_images(processed_images)

# Print the processed count after all images are processed
print(f"Skipping {processed_count} images already processed.")

# Process each image in the subdirectory
for image_file in image_files:
    image_path = os.path.join(source_folder, image_file)

    # Check if this image has already been processed (skip if true)
    if image_path in processed_images:    
        continue  

    # Construct the YOLO command with confidence threshold
    yolo_command = [
        "yolo", 
        "task=detect", 
        "mode=predict", 
        f"model={model_path}", 
        f"source={image_path}",
        f"conf={conf_threshold}"  
    ]

    # Run the YOLO command using subprocess and capture output
    result = subprocess.run(yolo_command, capture_output=True, text=True)

    # Get YOLO's raw output for analysis
    output = result.stdout

    # Check if there is a Kudzu detection by looking for 'kudzu' in the output
    if "kudzu" in output:  # Adjust this based on actual YOLO output structure
        print(f"Kudzu detected for {image_path} with confidence above {conf_threshold * 100}%")

        # Log this image as having Kudzu detections above the threshold
        with open(detections_log, 'a') as f:
            f.write(f"{image_path}\n")

    # Mark this image as processed by logging it
    log_processed_image(processed_log, image_path)

print("✅ YOLO detection completed for all images.")

## 6. Visualize Results
6a. You can visualize your results in the predict folder that will be created in the runs folder.  
6b. You can also use the code below to obtain all the detections above threshold, save them in a new folder, and display 10 random detections.

In [None]:
import os
import shutil
from pathlib import Path
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
# Update this for your runs/detect folder path
root_directory = '/home/student/Desktop/runs/detect/'  

In [None]:
# Detections file Path
text_file_path = 'detections_above_threshold_v1.txt'  

In [None]:
# Step 0: Create new directory for all detections
output_directory = os.path.join(your_directory, 'matching_images_v1')
print("Joined path:", output_directory)

if not os.path.exists(output_directory):
    os.makedirs(output_directory, exist_ok=True)

In [None]:
# Step 1: Read image paths from the text file and extract filenames
try:
    with open(text_file_path, 'r') as file:
        # Extract only the filenames from the full paths in the text file
        listed_image_filenames = {os.path.basename(line.strip()) for line in file.readlines()}
    print(f"Loaded {len(listed_image_filenames)} image filenames from text file.")
except FileNotFoundError:
    print('Text file not found! Check the file path.')
    listed_image_filenames = set()

In [None]:
# Step 2: Check if root directory exists and process images
try:
    predict_folders = [d for d in Path(root_directory).iterdir() if d.is_dir() and d.name.startswith('predict')]
    print(f"Found {len(predict_folders)} 'predict' folders.")

    for folder in predict_folders:
        print(f"Processing folder: {folder}")
        for file in folder.glob('*.*'):  # Match all files with any extension
            if file.name in listed_image_filenames:  # Compare only filenames
                print(f'Copying: {file}')
                shutil.copy(str(file), output_directory)
except FileNotFoundError:
    print('Root directory not found! Check the directory path.')

In [None]:
# Step 3: Fetch all image file paths from the output directory
if os.path.exists(output_directory):
    image_files = [os.path.join(output_directory, f) for f in os.listdir(output_directory) if f.lower().endswith(('png', 'jpg', 'jpeg'))]
    
    # Step 4: Randomly select 10 images from the list (or fewer if less than 10)
    if len(image_files) > 10:
        random_images = random.sample(image_files, 10)
    else:
        random_images = image_files  # Select all if less than 10
    
    # Step 5: Display the randomly selected images using matplotlib
    plt.figure(figsize=(30, 20))
    
    for i, img_path in enumerate(random_images):
        # Load and display each image
        print(i+1, img_path)
        img = mpimg.imread(img_path)
        plt.subplot(2, 5, i + 1)  # Create a grid of 2 rows and 5 columns
        plt.imshow(img)
        plt.axis('off')  # Hide axis for better visualization
        plt.title(os.path.basename(img_path))  # Display filename as title
    
    plt.tight_layout()
    plt.show()
else:
    print(f"Directory does not exist: {output_directory}")