#start


In [None]:
import pandas as pd
import os
import cv2
from sklearn.model_selection import train_test_split

# Define paths
csv_image_mapping = [
        {
        'csv_path': '/content/drive/MyDrive/Tomato_dataset/phone1(ripe)/phone1(ripe).csv',
        'image_dir': '/content/drive/MyDrive/Tomato_dataset/phone1(ripe)'
    },
    {
        'csv_path': '/content/drive/MyDrive/Tomato_dataset/Phone1(green)/phone1(green).csv',
        'image_dir': '/content/drive/MyDrive/Tomato_dataset/Phone1(green)'
    },
    {
        'csv_path': '/content/drive/MyDrive/Tomato_dataset/phone2(batch1)/annotations/phone2(batch1).csv',
        'image_dir': '/content/drive/MyDrive/Tomato_dataset/phone2(batch1)'
    },
    {
        'csv_path': '/content/drive/MyDrive/Tomato_dataset/phone2(batch2)/annotations/phone2(batch2).csv',
        'image_dir': '/content/drive/MyDrive/Tomato_dataset/phone2(batch2)'
    }
]
output_dir = '/content/drive/MyDrive/Tomato_dataset/cnn_crops'

In [None]:


# Valid labels
ripe_labels = ['R.healthy', 'R.lateblight', 'R.spots', 'R.pests', 'R.ber']
green_labels = ['G.healthy', 'G.lateblight', 'G.spots', 'G.pests', 'G.ber']

# Load and combine CSVs
dfs = [pd.read_csv(m['csv_path']).assign(image_dir=m['image_dir']) for m in csv_image_mapping]
data = pd.concat(dfs, ignore_index=True)[['image_name', 'label_name', 'bbox_x', 'bbox_y', 'bbox_width', 'bbox_height', 'image_dir']]
data = data[data['label_name'].isin(ripe_labels + green_labels)]  # Filter valid labels

# Create output directories
for ripeness in ['ripe', 'green']:
    for split in ['train', 'val']:
        for label in ripe_labels if ripeness == 'ripe' else green_labels:
            os.makedirs(f'{output_dir}/{ripeness}/{split}/{label.replace(".", "_")}', exist_ok=True)

# Split images (80/20)
unique_images = data['image_name'].unique()
train_images, val_images = train_test_split(unique_images, test_size=0.2, random_state=42)

# Crop and save
for idx, row in data.iterrows():
    img_path = os.path.join(row['image_dir'], row['image_name'])
    if not os.path.exists(img_path):
        continue
    img = cv2.imread(img_path)
    if img is None:
        continue
    x, y, w, h = int(row['bbox_x']), int(row['bbox_y']), int(row['bbox_width']), int(row['bbox_height'])
    crop = img[y:y+h, x:x+w]
    if crop.size == 0:
        continue
    crop = cv2.resize(crop, (224, 224))  # Resize for CNN
    ripeness = 'ripe' if row['label_name'].startswith('R') else 'green'
    label_dir = row['label_name'].replace('.', '_')
    split = 'train' if row['image_name'] in train_images else 'val'
    save_path = f'{output_dir}/{ripeness}/{split}/{label_dir}/{row["image_name"].replace(".jpg", f"_{idx}.jpg")}'
    cv2.imwrite(save_path, crop)
print("Cropped datasets saved to", output_dir)