# Prediction with label
### Importing Libraries

In [1]:
!pip3 install --no-cache-dir scikit-learn==1.2.2
import os
import cv2
import torch
import pandas as pd
import supervision as sv
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
import shutil
import numpy as np
import joblib

os.environ['PYTORCH_CUDA_ALLOC_CONF']='max_split_size_mb:128' 
torch.cuda.set_per_process_memory_fraction(0.8, 0)

  from .autonotebook import tqdm as notebook_tqdm


### Setup path and model

In [2]:
CHECKPOINT_PATH = "../models/sam_vit_h_4b8939.pth"
print(CHECKPOINT_PATH, "; exist:", os.path.isfile(CHECKPOINT_PATH))

# Define directories
IMAGE_DIR = "mask_extraction_noisy"
OUTPUT_DIR = "data_output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Initialize model
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
MODEL_TYPE = "vit_h"
sam = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT_PATH).to(device=DEVICE, non_blocking=True)
mask_generator = SamAutomaticMaskGenerator(sam)
model_path = '../models/rf_model.joblib'
rf = joblib.load(model_path)

../models/sam_vit_h_4b8939.pth ; exist: True


### Prepare essential functions

In [3]:
# Function to resize image
def resize_image(image, max_size=(512, 512)):
    h, w = image.shape[:2]
    if h > max_size[0] or w > max_size[1]:
        scale = min(max_size[0]/h, max_size[1]/w)
        new_size = (int(w*scale), int(h*scale))
        return cv2.resize(image, new_size, interpolation=cv2.INTER_AREA)
    return image

# Function to filter out the largest mask, assuming it is the background
def filter_out_background(masks, threshold=0.5):
    areas = [mask["area"] for mask in masks]
    max_area = max(areas)
    filtered_masks = [mask for mask in masks if mask["area"] < threshold * max_area]
    return filtered_masks

# Generate a list of colors for segmentation
def generate_colors(n):
    colors = np.random.randint(0, 255, size=(n, 3)).tolist()
    return colors

# Check if the mask is predominantly white (background)
def is_white_background(segmentation, image_rgb, threshold=240):
    mask_area = segmentation > 0
    mean_color = np.mean(image_rgb[mask_area], axis=0)
    return np.all(mean_color > threshold)

### Process Images

In [4]:
data = []
for image_name in os.listdir(IMAGE_DIR):
    if image_name.endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(IMAGE_DIR, image_name)
        output_path = os.path.join(OUTPUT_DIR, f"annotated_{image_name}")

        image_bgr = cv2.imread(image_path)
        image_bgr = resize_image(image_bgr)  # Resize image to reduce memory usage
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

        # Free up GPU memory before processing each image
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()

        sam_result = mask_generator.generate(image_rgb)

        # Filter out the background
        filtered_masks = filter_out_background(sam_result)

        annotated_image = image_bgr.copy()
        colors = generate_colors(len(filtered_masks))

        for i, mask in enumerate(filtered_masks):
            area = mask['area']
            segmentation = mask['segmentation'].astype('uint8')

            # Skip white background areas
            if is_white_background(segmentation, image_rgb):
                continue

            # Color the segmented area
            color = colors[i]
            r, g, b = color
            annotated_image[segmentation > 0] = cv2.addWeighted(annotated_image, 0.5, np.full_like(annotated_image, color), 0.5, 0)[segmentation > 0]

            # Append data
            data.append([image_name, area, r, g, b])

        cv2.imwrite(output_path, annotated_image)
        print(f"Processed and saved: {output_path}")

        # Free up memory after processing each image
        del sam_result
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()

Processed and saved: data_output/annotated_noisy-week_1.jpg
Processed and saved: data_output/annotated_noisy-week_2.jpg
Processed and saved: data_output/annotated_noisy-week_3.jpg
Processed and saved: data_output/annotated_noisy-week_4.jpg
Processed and saved: data_output/annotated_noisy-week_5.jpg


### Saving result to CSV

In [5]:
# Convert to DataFrame
columns = ['filename', 'area', 'r', 'g', 'b']
df = pd.DataFrame(data, columns=columns)

# Split the 'filename' column and create a new 'Week' column, removing 'noisy-'
df['Week'] = df['filename'].str.extract(r'(Week\d+)')

# Order the DataFrame by 'filename' column
df.sort_values(by='filename', inplace=True)

# Create 'count' column with total count for each 'filename'
df['count'] = df.groupby('filename')['filename'].transform('count')

# Save DataFrame to CSV
OUTPUT_CSV = "segmented_areas.csv"
df.to_csv(OUTPUT_CSV, index=False)

# Variables de entrada
X = df[['count']]

# Make predictions
predictions = rf.predict(X)

# Add predictions to the DataFrame
df['Predicted_Week'] = predictions

# Save the updated DataFrame to a new CSV file
output_path = 'segmented_areas_with_predictions.csv'
df.to_csv(output_path, index=False)

### Annotate images with predictions

In [6]:
for image_name, predicted_week in zip(df['filename'], df['Predicted_Week']):
    image_path = os.path.join(OUTPUT_DIR, f"annotated_{image_name}")
    image_bgr = cv2.imread(image_path)

    # Add text to the bottom center of the image in red color
    text = f'Predicted Week: {predicted_week}'
    text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)[0]
    text_x = (image_bgr.shape[1] - text_size[0]) // 2
    text_y = image_bgr.shape[0] - 10  # 10 pixels from the bottom
    cv2.putText(image_bgr, text, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Save the annotated image correctly
    annotated_output_path = os.path.join(OUTPUT_DIR, f"annotated_{image_name}")
    cv2.imwrite(annotated_output_path, image_bgr)
    print(f"Annotated {annotated_output_path} with Predicted Week: {predicted_week}")

print("All images have been processed, annotated with predictions, and characteristics exported.")

Annotated data_output/annotated_noisy-week_1.jpg with Predicted Week: Week1
Annotated data_output/annotated_noisy-week_1.jpg with Predicted Week: Week1
Annotated data_output/annotated_noisy-week_1.jpg with Predicted Week: Week1
Annotated data_output/annotated_noisy-week_1.jpg with Predicted Week: Week1
Annotated data_output/annotated_noisy-week_1.jpg with Predicted Week: Week1
Annotated data_output/annotated_noisy-week_1.jpg with Predicted Week: Week1
Annotated data_output/annotated_noisy-week_1.jpg with Predicted Week: Week1
Annotated data_output/annotated_noisy-week_2.jpg with Predicted Week: Week2
Annotated data_output/annotated_noisy-week_2.jpg with Predicted Week: Week2
Annotated data_output/annotated_noisy-week_2.jpg with Predicted Week: Week2
Annotated data_output/annotated_noisy-week_2.jpg with Predicted Week: Week2
Annotated data_output/annotated_noisy-week_2.jpg with Predicted Week: Week2
Annotated data_output/annotated_noisy-week_2.jpg with Predicted Week: Week2
Annotated da