In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import pandas as pd

# Define the main directories
packed_dir = '/kaggle/input/freibergs-groceries/images'
unpacked_dir = '/kaggle/input/woking-dataset/Detection'

# Initialize an empty DataFrame
image_data = pd.DataFrame(columns=['image_position', 'label'])

# Function to process images in a directory
def process_images(directory, label):
    data = []
    for root, dirs, files in os.walk(directory):
        for filename in files:
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_position = os.path.join(root, filename)
                data.append({'image_position': image_position, 'label': label})
    return pd.DataFrame(data)

# Process packed images
packed_data = process_images(packed_dir, 'Packed')

# Process unpacked images
unpacked_data = process_images(unpacked_dir, 'Unpacked')

# Combine packed and unpacked data
image_data = pd.concat([packed_data, unpacked_data], ignore_index=True)

# Display the first few rows of the DataFrame
print(image_data.head())

# Display the total number of images
print(f"\nTotal number of images: {len(image_data)}")

# Display the number of packed and unpacked images
print(image_data['label'].value_counts())

# Save the DataFrame to a CSV file
image_data.to_csv('image_data.csv', index=False)
print("\nDataFrame saved to 'image_data.csv'")

In [None]:
image_data.head()

In [None]:
import os
import pandas as pd

# Define the main directories
packed_dir = '/kaggle/input/freibergs-groceries/images'
unpacked_dir = '/kaggle/input/woking-dataset/Detection'
new_packed_dir = '/kaggle/input/grocery-store-dataset/GroceryStoreDataset/dataset'

# Initialize an empty DataFrame
image_data = pd.DataFrame(columns=['image_position', 'label'])

# Function to process images in a directory
def process_images(directory, label):
    data = []
    for root, dirs, files in os.walk(directory):
        for filename in files:
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_position = os.path.join(root, filename)
                data.append({'image_position': image_position, 'label': label})
    return pd.DataFrame(data)

# Process original packed images
packed_data = process_images(packed_dir, 'Packed')

# Process unpacked images
unpacked_data = process_images(unpacked_dir, 'Unpacked')

# Process new packed images from GroceryStoreDataset
new_packed_data = pd.DataFrame()
for category in ['train', 'val', 'test']:
    packages_dir = os.path.join(new_packed_dir, category, 'Packages')
    if os.path.exists(packages_dir):
        new_packed_data = pd.concat([new_packed_data, process_images(packages_dir, 'Packed')], ignore_index=True)

# Combine all data
image_data = pd.concat([packed_data, unpacked_data, new_packed_data], ignore_index=True)

# Display the first few rows of the DataFrame
print(image_data.head())

# Display the total number of images
print(f"\nTotal number of images: {len(image_data)}")

# Display the number of packed and unpacked images
print("\nCount of packed and unpacked images:")
print(image_data['label'].value_counts())

# Save the DataFrame to a CSV file
image_data.to_csv('image_data.csv', index=False)
print("\nDataFrame saved to 'image_data.csv'")

In [None]:
image_data.head()

In [None]:
print("\nCount of packed and unpacked images:")
print(image_data['label'].value_counts())

In [None]:
import pandas as pd
from sklearn.utils import resample

# Assuming you already have your image_data DataFrame
# If not, load it from your CSV file
# image_data = pd.read_csv('image_data.csv')

# Get the counts of packed and unpacked images
packed_count = image_data[image_data['label'] == 'Packed'].shape[0]
unpacked_count = image_data[image_data['label'] == 'Unpacked'].shape[0]

print("Original distribution:")
print(f"Packed: {packed_count}")
print(f"Unpacked: {unpacked_count}")

# Determine the minimum count for balancing the dataset
min_count = min(packed_count, unpacked_count)

# Stratify the DataFrame by randomly sampling
packed_sampled = image_data[image_data['label'] == 'Packed'].sample(n=min_count, random_state=42)
unpacked_sampled = image_data[image_data['label'] == 'Unpacked'].sample(n=min_count, random_state=42)

# Create a balanced DataFrame
balanced_image_data = pd.concat([packed_sampled, unpacked_sampled], ignore_index=True)

# Shuffle the DataFrame
balanced_image_data = balanced_image_data.sample(frac=1, random_state=42).reset_index(drop=True)

# Display the new distribution
print("\nBalanced distribution:")
print(balanced_image_data['label'].value_counts())

# Save the balanced DataFrame to a new CSV file
balanced_image_data.to_csv('balanced_image_data.csv', index=False)
print("\nBalanced DataFrame saved to 'balanced_image_data.csv'")

In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from tqdm import tqdm

# Ensure GPU usage if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the balanced dataset
df = pd.read_csv('/kaggle/working/balanced_dataset.csv')

# Map categories to numerical labels
category_to_idx = {'Packed': 0, 'Unpacked': 1}
df['Label'] = df['label'].map(category_to_idx)

# Split the dataset into train, validation, and test sets
train_val_df, test_df = train_test_split(df, test_size=0.2, stratify=df['Label'])
train_df, val_df = train_test_split(train_val_df, test_size=0.2, stratify=train_val_df['Label'])

# Custom Dataset class for loading images and labels
class PackedUnpackedDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['image_position']
        image = Image.open(img_path).convert('RGB')
        label = self.dataframe.iloc[idx]['Label']

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations for the training and validation sets
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

transform_val_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create datasets and dataloaders
train_dataset = PackedUnpackedDataset(train_df, transform=transform_train)
val_dataset = PackedUnpackedDataset(val_df, transform=transform_val_test)
test_dataset = PackedUnpackedDataset(test_df, transform=transform_val_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

# Load pre-trained EfficientNet B0 model and modify the final layer for our dataset
model = models.efficientnet_b0(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)  # 2 classes: Packed and Unpacked
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Function to train and evaluate the model
def train_and_evaluate():
    num_epochs = 3
    best_accuracy = 0

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        
        for images, labels in tqdm(train_loader, desc=f'Training Epoch {epoch+1}/{num_epochs}'):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)

        # Validation phase
        model.eval()
        corrects = 0
        
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f'Validating'):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                corrects += torch.sum(preds == labels).item()

        epoch_accuracy = corrects / len(val_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

        # Save the best model based on validation accuracy
        if epoch_accuracy > best_accuracy:
            best_accuracy = epoch_accuracy
            best_model_wts = model.state_dict()

    # Load best model weights and evaluate on test set
    model.load_state_dict(best_model_wts)
    
    # Test phase
    model.eval()
    test_corrects = 0
    
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc=f'Testing'):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            test_corrects += torch.sum(preds == labels).item()

    test_accuracy = test_corrects / len(test_loader.dataset)
    print(f'Test Accuracy: {test_accuracy:.4f}')

    # Save the trained model
    torch.save(model.state_dict(), 'efficientnet_b0_packed_unpacked.pth')
    print("Model saved as 'efficientnet_b0_packed_unpacked.pth'")

# Train and evaluate the EfficientNet B0 model
train_and_evaluate()

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image

# Ensure GPU usage if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define categories
categories = ['Packed', 'Unpacked']
category_to_idx = {category: idx for idx, category in enumerate(categories)}
idx_to_category = {idx: category for category, idx in category_to_idx.items()}

# Load the saved model
model = models.efficientnet_b0(pretrained=False)  # Load without pre-trained weights
model.classifier[1] = nn.Linear(model.classifier[1].in_features, len(categories))
model.load_state_dict(torch.load('/kaggle/working/efficientnet_b0_packed_unpacked.pth', map_location=device))
model.to(device)
model.eval()

# Define the transformation for the input image
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def predict_image_class(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(image)
        _, predicted_label = torch.max(outputs, 1)

    predicted_label = predicted_label.item()
    predicted_category = idx_to_category[predicted_label]

    return predicted_label, predicted_category

# Example usage:
image_path = '/kaggle/input/woking-dataset/Detection/Fruits/Beetroot/Beetroot 1/100_100.jpg'  # Replace with the path to your uploaded image
predicted_label, predicted_category = predict_image_class(image_path)

print("Predicted label:", predicted_label)
print("Predicted category:", predicted_category)

# Display the labelled classes and their actual names
print("\nLabelled classes and their actual names:")
for idx, category in idx_to_category.items():
    print(f"Label {idx}: {category}")

In [None]:
import os
import csv
import pandas as pd
from sklearn.model_selection import train_test_split

def categorize_image(directory):
    if 'S_' in directory or 'Bad' in directory or 'Rotten' in directory or 'Old' in directory or 'Dried' in directory or 'Damaged' in directory or 'Formalin-mixed' in directory:
        return 'Rotten'
    else:
        return 'Fresh'

def create_csv(root_dir, output_file):
    with open(output_file, 'w', newline='') as csvfile:
        fieldnames = ['Image Directory', 'Main Directory', 'Category', 'Sub Category', 'Sub Sub Category']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for main_dir in ['Fruits', 'Vegetables']:
            main_path = os.path.join(root_dir, 'Detection', main_dir)
            
            for category in os.listdir(main_path):
                category_path = os.path.join(main_path, category)
                
                if os.path.isdir(category_path):
                    for sub_category in os.listdir(category_path):
                        sub_category_path = os.path.join(category_path, sub_category)
                        
                        if os.path.isdir(sub_category_path):
                            sub_sub_category = categorize_image(sub_category)
                            
                            for image in os.listdir(sub_category_path):
                                if image.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                                    image_path = os.path.join(sub_category_path, image)
                                    writer.writerow({
                                        'Image Directory': image_path,
                                        'Main Directory': main_dir,
                                        'Category': category,
                                        'Sub Category': sub_category,
                                        'Sub Sub Category': sub_sub_category
                                    })

def create_balanced_df(unpacked_csv, packed_csv1, packed_csv2, output_csv):
    # Read the CSVs
    unpacked_df = pd.read_csv(unpacked_csv)
    packed_df1 = pd.read_csv(packed_csv1)
    packed_df2 = pd.read_csv(packed_csv2)

    # Combine packed datasets
    packed_df = pd.concat([packed_df1, packed_df2], ignore_index=True)
    packed_df['label'] = 'Packed'

    # Add 'label' column to unpacked_df
    unpacked_df['label'] = 'Unpacked'

    # Get the number of packed images
    n_packed = len(packed_df)

    # Stratified sampling of unpacked images
    unpacked_stratified = unpacked_df.groupby(['Main Directory', 'Category', 'Sub Category', 'Sub Sub Category'])
    unpacked_sampled = unpacked_stratified.apply(lambda x: x.sample(n=int(np.rint(n_packed * len(x) / len(unpacked_df))), replace=True))
    unpacked_sampled = unpacked_sampled.reset_index(drop=True)

    # Combine packed and unpacked datasets
    combined_df = pd.concat([packed_df, unpacked_sampled], ignore_index=True)

    # Shuffle the combined dataset
    combined_df = combined_df.sample(frac=1).reset_index(drop=True)

    # Save the combined dataset
    combined_df.to_csv(output_csv, index=False)

    return combined_df

# Usage
root_directory = '/kaggle/input/woking-dataset'
unpacked_csv = 'fruit_vegetable_dataset.csv'
create_csv(root_directory, unpacked_csv)

packed_csv1 = '/path/to/packed_dataset1.csv'  # Replace with actual path
packed_csv2 = '/path/to/packed_dataset2.csv'  # Replace with actual path
output_csv = 'balanced_dataset.csv'

balanced_df = create_balanced_df(unpacked_csv, packed_csv1, packed_csv2, output_csv)

print(f"Balanced dataset created with {len(balanced_df)} images")
print(balanced_df['label'].value_counts())

In [None]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

def collect_freiberg_images(root_dir):
    images = []
    for category in tqdm(os.listdir(root_dir), desc="Processing Freiberg dataset"):
        category_path = os.path.join(root_dir, category)
        if os.path.isdir(category_path):
            for root, _, files in os.walk(category_path):
                for file in files:
                    if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                        image_path = os.path.join(root, file)
                        images.append({
                            'Image Directory': image_path,
                            'Label': 'Packed'
                        })
    return pd.DataFrame(images)

def collect_woking_images(root_dir):
    images = []
    for main_dir in tqdm(['Fruits', 'Vegetables'], desc="Processing Woking dataset"):
        main_path = os.path.join(root_dir, main_dir)
        for category in os.listdir(main_path):
            category_path = os.path.join(main_path, category)
            if os.path.isdir(category_path):
                for sub_category in os.listdir(category_path):
                    sub_category_path = os.path.join(category_path, sub_category)
                    if os.path.isdir(sub_category_path):
                        for root, _, files in os.walk(sub_category_path):
                            for file in files:
                                if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                                    image_path = os.path.join(root, file)
                                    images.append({
                                        'Image Directory': image_path,
                                        'Main Directory': main_dir,
                                        'Category': category,
                                        'Sub Category': sub_category,
                                        'Label': 'Unpacked'
                                    })
    return pd.DataFrame(images)

def collect_grocery_images(root_dir):
    images = []
    for sub_dir in tqdm(['iconic-images-and-descriptions', 'test', 'train', 'val'], desc="Processing GroceryStore dataset"):
        sub_dir_path = os.path.join(root_dir, sub_dir)
        if os.path.exists(sub_dir_path):
            packages_path = os.path.join(sub_dir_path, 'Packages')
            if os.path.exists(packages_path):
                for root, _, files in os.walk(packages_path):
                    for file in files:
                        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                            image_path = os.path.join(root, file)
                            images.append({
                                'Image Directory': image_path,
                                'Label': 'Packed'
                            })
    return pd.DataFrame(images)

# Paths to datasets
freiberg_dir = '/kaggle/input/freibergs-groceries/images'
woking_dir = '/kaggle/input/woking-dataset/Detection'
grocery_dir = '/kaggle/input/grocery-store-dataset/GroceryStoreDataset/dataset'

# Collect images from all datasets
freiberg_df = collect_freiberg_images(freiberg_dir)
woking_df = collect_woking_images(woking_dir)
grocery_df = collect_grocery_images(grocery_dir)

# Combine packed datasets
packed_df = pd.concat([freiberg_df, grocery_df], ignore_index=True)

# Get the number of packed images
n_packed = len(packed_df)

# Stratified sampling of unpacked images
unpacked_stratified = woking_df.groupby(['Main Directory', 'Category', 'Sub Category'])
unpacked_sampled = unpacked_stratified.apply(lambda x: x.sample(n=int(np.rint(n_packed * len(x) / len(woking_df))), replace=True))
unpacked_sampled = unpacked_sampled.reset_index(drop=True)

# Ensure exact same number of packed and unpacked images
if len(unpacked_sampled) > n_packed:
    unpacked_sampled = unpacked_sampled.sample(n=n_packed, random_state=42)
elif len(unpacked_sampled) < n_packed:
    packed_df = packed_df.sample(n=len(unpacked_sampled), random_state=42)

# Combine packed and unpacked datasets
combined_df = pd.concat([packed_df, unpacked_sampled], ignore_index=True)

# Keep only 'Image Directory' and 'Label' columns
combined_df = combined_df[['Image Directory', 'Label']]

# Shuffle the combined dataset
combined_df = combined_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Display counts
packed_count = combined_df[combined_df['Label'] == 'Packed'].shape[0]
unpacked_count = combined_df[combined_df['Label'] == 'Unpacked'].shape[0]

print(f"Number of packed images: {packed_count}")
print(f"Number of unpacked images: {unpacked_count}")
print(f"Total number of images: {len(combined_df)}")

# Save the combined DataFrame to a CSV file
combined_df.to_csv('balanced_dataset.csv', index=False)
print("Balanced dataset saved to 'balanced_dataset.csv'")

In [None]:
combined_df.head()

# MODEL FOR EFFICIENT NET FOR PACKED UNPACKED

In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from tqdm import tqdm

# Ensure GPU usage if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the balanced dataset
df = pd.read_csv('/kaggle/working/balanced_dataset.csv')

# Map categories to numerical labels
category_to_idx = {'Packed': 0, 'Unpacked': 1}
df['Label'] = df['Label'].map(category_to_idx)

# Split the dataset into train, validation, and test sets
train_val_df, test_df = train_test_split(df, test_size=0.2, stratify=df['Label'])
train_df, val_df = train_test_split(train_val_df, test_size=0.2, stratify=train_val_df['Label'])

# Custom Dataset class for loading images and labels
class PackedUnpackedDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['Image Directory']
        image = Image.open(img_path).convert('RGB')
        label = self.dataframe.iloc[idx]['Label']

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations for the training and validation sets
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

transform_val_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create datasets and dataloaders
train_dataset = PackedUnpackedDataset(train_df, transform=transform_train)
val_dataset = PackedUnpackedDataset(val_df, transform=transform_val_test)
test_dataset = PackedUnpackedDataset(test_df, transform=transform_val_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

# Load pre-trained EfficientNet B0 model and modify the final layer for our dataset
model = models.efficientnet_b0(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)  # 2 classes: Packed and Unpacked
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Function to train and evaluate the model
def train_and_evaluate():
    num_epochs = 1
    best_accuracy = 0

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        
        for images, labels in tqdm(train_loader, desc=f'Training Epoch {epoch+1}/{num_epochs}'):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)

        # Validation phase
        model.eval()
        corrects = 0
        
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f'Validating'):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                corrects += torch.sum(preds == labels).item()

        epoch_accuracy = corrects / len(val_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

        # Save the best model based on validation accuracy
        if epoch_accuracy > best_accuracy:
            best_accuracy = epoch_accuracy
            best_model_wts = model.state_dict()

    # Load best model weights and evaluate on test set
    model.load_state_dict(best_model_wts)
    
    # Test phase
    model.eval()
    test_corrects = 0
    
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc=f'Testing'):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            test_corrects += torch.sum(preds == labels).item()

    test_accuracy = test_corrects / len(test_loader.dataset)
    print(f'Test Accuracy: {test_accuracy:.4f}')

    # Save the trained model
    torch.save(model.state_dict(), 'efficientnet_b0_packed_unpacked1.pth')
    print("Model saved as 'efficientnet_b0_packed_unpacked1.pth'")

# Train and evaluate the EfficientNet B0 model
train_and_evaluate()

In [None]:
/kaggle/working/efficientnet_b0_packed_unpacked.pth

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image

# Ensure GPU usage if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define categories
categories = ['Packed', 'Unpacked']
category_to_idx = {category: idx for idx, category in enumerate(categories)}
idx_to_category = {idx: category for category, idx in category_to_idx.items()}

# Load the saved model
model = models.efficientnet_b0(pretrained=False)  # Load without pre-trained weights
model.classifier[1] = nn.Linear(model.classifier[1].in_features, len(categories))
model.load_state_dict(torch.load('/kaggle/working/efficientnet_b0_packed_unpacked.pth', map_location=device))
model.to(device)
model.eval()

# Define the transformation for the input image
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def predict_image_class(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(image)
        _, predicted_label = torch.max(outputs, 1)

    predicted_label = predicted_label.item()
    predicted_category = idx_to_category[predicted_label]

    return predicted_label, predicted_category

# Example usage:
image_path = '/kaggle/input/grocery-store-dataset/GroceryStoreDataset/dataset/test/Vegetables/Ginger/Ginger_001.jpg'  # Replace with the path to your uploaded image
predicted_label, predicted_category = predict_image_class(image_path)

print("Predicted label:", predicted_label)
print("Predicted category:", predicted_category)

# Display the labelled classes and their actual names
print("\nLabelled classes and their actual names:")
for idx, category in idx_to_category.items():
    print(f"Label {idx}: {category}")

# STANDARD SEGMENTATION

In [None]:
from ultralytics import SAM, YOLO
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from PIL import Image
import os
from sklearn.metrics import jaccard_score

def iou(mask1, mask2):
    return jaccard_score(mask1.flatten(), mask2.flatten())

def place_on_white_background(image):
    white_bg = np.ones_like(image) * 255
    mask = np.any(image != [0, 0, 0], axis=-1)
    white_bg[mask] = image[mask]
    return white_bg

def process_sam_results(results, model_name, yolo_model, iou_threshold=0.8, confidence_threshold=0.25):
    original_image = results[0].orig_img
    masks = results[0].masks.data.cpu().numpy()
    boxes = results[0].boxes.data.cpu().numpy()
    
    unique_objects = []
    for i, (mask, box) in enumerate(zip(masks, boxes)):
        is_unique = True
        for existing_obj in unique_objects:
            if iou(mask, existing_obj['mask']) > iou_threshold:
                is_unique = False
                break
        
        if is_unique:
            object_mask = mask.astype(bool)
            object_image = np.zeros_like(original_image)
            object_image[object_mask] = original_image[object_mask]
            
            x1, y1, x2, y2 = map(int, box[:4])
            cropped_object = object_image[y1:y2, x1:x2]
            
            # Place on white background
            object_on_white = place_on_white_background(cropped_object)
            
            # Perform YOLO detection on the cropped object
            yolo_results = yolo_model(object_on_white)
            yolo_boxes = yolo_results[0].boxes
            
            # Check if any detection has confidence above the threshold
            is_product = any(conf > confidence_threshold for conf in yolo_boxes.conf)
            
            object_filename = f"{model_name}object{len(unique_objects)}.png"
            cv2.imwrite(object_filename, cv2.cvtColor(object_on_white, cv2.COLOR_RGB2BGR))
            
            unique_objects.append({
                'model': model_name,
                'object_id': len(unique_objects),
                'image': Image.fromarray(object_on_white),
                'mask': mask,
                'x1': x1,
                'y1': y1,
                'x2': x2,
                'y2': y2,
                'filename': object_filename,
                'is_product': is_product
            })
    
    return unique_objects

def visualize_objects(objects, cols=5):
    n = len(objects)
    rows = (n + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axs = axs.flatten()
    
    for i, obj in enumerate(objects):
        axs[i].imshow(obj['image'])
        axs[i].axis('off')
        title = f"Object {obj['object_id']}\n{'Product' if obj['is_product'] else 'Not Product'}"
        axs[i].set_title(title, color='green' if obj['is_product'] else 'red')
    
    for i in range(n, len(axs)):
        axs[i].axis('off')
    
    plt.tight_layout()
    plt.show()

# Load models
sam_model_b = SAM("sam2_b.pt")
yolo_model = YOLO("/kaggle/input/nothing-new/pytorch/default/1/yolov8x-oiv7 (2).pt")  # or the path to your YOLO model

# Load image
img_path = "/kaggle/input/new-tester/Imashe.jpg"
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Process with SAM2_b model
results_b = sam_model_b(img)

# Display segmentation results
plt.figure(figsize=(10, 10))
plt.imshow(results_b[0].plot())
plt.title("Segmentation results - SAM2_b")
plt.axis('off')
plt.show()

# Process and visualize objects
objects_b = process_sam_results(results_b, "sam2_b", yolo_model)
visualize_objects(objects_b)

# Create DataFrame
df_b = pd.DataFrame([{k: v for k, v in obj.items() if k != 'mask' and k != 'image'} for obj in objects_b])

# Display DataFrame
print(df_b)

# Save DataFrame to CSV
df_b.to_csv('segmented_objects_sam2_b.csv', index=False)

# COMPLETE

In [None]:

import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image
import pandas as pd

# Ensure GPU usage if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


categories =['Fresh', 'Rotten']
category_to_idx = {category: idx for idx, category in enumerate(categories)}
idx_to_category = {idx: category for category, idx in category_to_idx.items()}

# Load the saved model
model = models.efficientnet_b0(pretrained=False)  # Load without pre-trained weights
model.classifier[1] = nn.Linear(model.classifier[1].in_features, len(categories))
model.load_state_dict(torch.load('/kaggle/input/fresh-model/keras/default/1/efficientnet_b0_fruit_veg (2).pth', map_location=device))
model.to(device)
model.eval()

# Define the transformation for the input image
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def predict_image_class(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(image)
        print(outputs)
        print(torch.max(outputs,1))
        _, predicted_label = torch.max(outputs, 1)

    predicted_label = predicted_label.item()
    predicted_category = idx_to_category[predicted_label]

    return predicted_label, predicted_category


# Example usage:
image_path = '/kaggle/input/woking-dataset/Detection/Fruits/Pear/Rotten/Image1.png'  # Replace with the path to your uploaded image
predicted_label, predicted_category = predict_image_class(image_path)

print("Predicted label:", predicted_label)
print("Predicted category:", predicted_category)

# Display the labelled classes and their actual names
print("\nLabelled classes and their actual names:")
for idx, category in idx_to_category.items():
    print(f"Label {idx}: {category}")

In [None]:
print('ello')

In [None]:
!pip install ultralytics

In [None]:
import torch
from ultralytics import SAM
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from PIL import Image
import os
from sklearn.metrics import jaccard_score
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor

def iou(mask1, mask2):
    return jaccard_score(mask1.flatten(), mask2.flatten())

def place_on_white_background(image):
    white_bg = np.ones_like(image) * 255
    mask = np.any(image != [0, 0, 0], axis=-1)
    white_bg[mask] = image[mask]
    return white_bg

def resize_image(image, min_size=224):
    width, height = image.size
    if width < min_size or height < min_size:
        scale = min_size / min(width, height)
        new_width = int(width * scale)
        new_height = int(height * scale)
        return image.resize((new_width, new_height), Image.LANCZOS)
    return image

def is_product(image, model, processor):
    image = resize_image(image)
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": "Is this image showing a packed product or fruits or vegetables ? Answer only yes or no."}
            ]
        }
    ]
    text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
    inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors="pt")
    inputs = inputs.to(model.device)
    output_ids = model.generate(**inputs, max_new_tokens=10)
    output_text = processor.batch_decode(output_ids, skip_special_tokens=True)[0].strip().lower()
    return "yes" in output_text

def process_sam_results(results, model_name, qwen_model, qwen_processor, iou_threshold=0.8, min_object_size=28):
    original_image = results[0].orig_img
    masks = results[0].masks.data.cpu().numpy()
    boxes = results[0].boxes.data.cpu().numpy()
    
    unique_objects = []
    for i, (mask, box) in enumerate(zip(masks, boxes)):
        is_unique = True
        for existing_obj in unique_objects:
            if iou(mask, existing_obj['mask']) > iou_threshold:
                is_unique = False
                break
        
        if is_unique:
            object_mask = mask.astype(bool)
            object_image = np.zeros_like(original_image)
            object_image[object_mask] = original_image[object_mask]
            
            x1, y1, x2, y2 = map(int, box[:4])
            cropped_object = object_image[y1:y2, x1:x2]
            
            # Skip extremely small objects
            if cropped_object.shape[0] < min_object_size or cropped_object.shape[1] < min_object_size:
                continue
            
            object_on_white = place_on_white_background(cropped_object)
            pil_image = Image.fromarray(object_on_white)
            
            is_product_result = is_product(pil_image, qwen_model, qwen_processor)
            
            if is_product_result:
                object_filename = f"{model_name}object{len(unique_objects)}.png"
                cv2.imwrite(object_filename, cv2.cvtColor(object_on_white, cv2.COLOR_RGB2BGR))
                
                unique_objects.append({
                    'model': model_name,
                    'object_id': len(unique_objects),
                    'image': pil_image,
                    'mask': mask,
                    'x1': x1,
                    'y1': y1,
                    'x2': x2,
                    'y2': y2,
                    'filename': object_filename,
                    'is_product': is_product_result
                })
    
    return unique_objects

def visualize_objects(objects, cols=5):
    n = len(objects)
    rows = (n + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axs = axs.flatten()
    
    for i, obj in enumerate(objects):
        axs[i].imshow(obj['image'])
        axs[i].axis('off')
        axs[i].set_title(f"Object {obj['object_id']}")
    
    for i in range(n, len(axs)):
        axs[i].axis('off')
    
    plt.tight_layout()
    plt.show()

# Load models
sam_model = SAM("sam2_b.pt")
qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2-VL-2B-Instruct",
    torch_dtype="auto",
    device_map="auto",
)
qwen_processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")

# Load image
img_path = "/kaggle/input/newest-tester-data/download (3).jpg"
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Display original image
plt.figure(figsize=(10, 10))
plt.imshow(img)
plt.title("Original Image")
plt.axis('off')
plt.show()

# Process with SAM model
results = sam_model(img)

# Display segmentation results
plt.figure(figsize=(10, 10))
plt.imshow(results[0].plot())
plt.title("Segmentation results - SAM")
plt.axis('off')
plt.show()

# Process and visualize objects
objects = process_sam_results(results, "sam", qwen_model, qwen_processor)
visualize_objects(objects)

# Create DataFrame
df = pd.DataFrame([{k: v for k, v in obj.items() if k != 'mask' and k != 'image'} for obj in objects])

# Display DataFrame
print(df)

# Save DataFrame to CSV
df.to_csv('segmented_products.csv', index=False)

# Complete working

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms, models
from ultralytics import SAM
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from PIL import Image
import os
from sklearn.metrics import jaccard_score
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Ensure GPU usage if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def iou(mask1, mask2):
    return jaccard_score(mask1.flatten(), mask2.flatten())

def place_on_white_background(image):
    white_bg = np.ones_like(image) * 255
    mask = np.any(image != [0, 0, 0], axis=-1)
    white_bg[mask] = image[mask]
    return white_bg

def process_sam_results(results, model_name, iou_threshold=0.99):
    original_image = results[0].orig_img
    masks = results[0].masks.data.cpu().numpy()
    boxes = results[0].boxes.data.cpu().numpy()
    
    unique_objects = []
    for i, (mask, box) in enumerate(zip(masks, boxes)):
        is_unique = True
        for existing_obj in unique_objects:
            if iou(mask, existing_obj['mask']) > iou_threshold:
                is_unique = False
                break
        
        if is_unique:
            object_mask = mask.astype(bool)
            object_image = np.zeros_like(original_image)
            object_image[object_mask] = original_image[object_mask]
            
            x1, y1, x2, y2 = map(int, box[:4])
            cropped_object = object_image[y1:y2, x1:x2]
            
            object_on_white = place_on_white_background(cropped_object)
            
            object_filename = f"{model_name}object{len(unique_objects)}.png"
            cv2.imwrite(object_filename, cv2.cvtColor(object_on_white, cv2.COLOR_RGB2BGR))
            
            unique_objects.append({
                'model': model_name,
                'object_id': len(unique_objects),
                'image': Image.fromarray(object_on_white),
                'mask': mask,
                'x1': x1,
                'y1': y1,
                'x2': x2,
                'y2': y2,
                'filename': object_filename
            })
    
    return unique_objects

def visualize_objects(objects, cols=5):
    n = len(objects)
    rows = (n + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axs = axs.flatten()
    
    for i, obj in enumerate(objects):
        axs[i].imshow(obj['image'])
        axs[i].axis('off')
        axs[i].set_title(f"Object {obj['object_id']}")
    
    for i in range(n, len(axs)):
        axs[i].axis('off')
    
    plt.tight_layout()
    plt.show()

# Load and initialize models
sam_model_b = SAM("sam2_b.pt")

packed_unpacked_model = models.efficientnet_b0(pretrained=False)
packed_unpacked_model.classifier[1] = nn.Linear(packed_unpacked_model.classifier[1].in_features, 2)
packed_unpacked_model.load_state_dict(torch.load('/kaggle/input/models-loaded/keras/default/1/efficientnet_b0_packed_unpacked (2).pth', map_location=device))
packed_unpacked_model.to(device)
packed_unpacked_model.eval()

fruit_veg_model = models.efficientnet_b0(pretrained=False)
fruit_veg_model.classifier[1] = nn.Linear(fruit_veg_model.classifier[1].in_features, 54)
fruit_veg_model.load_state_dict(torch.load('/kaggle/input/models-loaded/keras/default/1/efficientnet_b0_fruit_veg_1 (1).pth', map_location=device))
fruit_veg_model.to(device)
fruit_veg_model.eval()

fresh_rotten_model = models.efficientnet_b0(pretrained=False)
fresh_rotten_model.classifier[1] = nn.Linear(fresh_rotten_model.classifier[1].in_features, 2)
fresh_rotten_model.load_state_dict(torch.load('/kaggle/input/models-loaded/keras/default/1/efficientnet_b0_fruit_veg (2).pth', map_location=device))
fresh_rotten_model.to(device)
fresh_rotten_model.eval()

qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2-VL-2B-Instruct",
    torch_dtype="auto",
    device_map="auto",
)
qwen_processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")

# Define transformations and categories
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

packed_unpacked_categories = ['Packed', 'Unpacked']
packed_unpacked_idx_to_category = {idx: category for idx, category in enumerate(packed_unpacked_categories)}

fruit_veg_categories = [
    'Orange', 'Tamarillo', 'Lime', 'Pomegranate', 'Plum', 'Pineapple', 'Apple', 'Dates', 'Papaya', 'Guava',
    'Beetroot', 'Pear', 'Strawberry', 'Blueberry', 'Lulo', 'Avacado', 'Lemon', 'Kaki', 'Peach', 'Grape',
    'Banana', 'Cherry', 'Watermelon', 'Mango', 'Grapefruit', 'Broccoli', 'Capsicum', 'Radish', 'Tomato', 'Turnip',
    'Ginger', 'Zucchini', 'Brinjal', 'Pumpkin', 'Bell Pepper', 'Carrot', 'New Mexico Green Chile', 'Eggplant',
    'Baby Corn', 'Zucchini dark', 'Sweet corn', 'Cabbage', 'Bitter_Gourd', 'Cauliflower', 'Chile Pepper',
    'Sweet Potato', 'Bean', 'Cucumber', 'Bottle Gourd', 'Garlic', 'Peas', 'Onion', 'Potato', 'Spinach'
]
fruit_veg_idx_to_category = {idx: category for idx, category in enumerate(fruit_veg_categories)}

fresh_rotten_categories = ['Fresh', 'Rotten']
fresh_rotten_idx_to_category = {idx: category for idx, category in enumerate(fresh_rotten_categories)}

packed_categories = [
    'Staples', 'Snacks & Beverages', 'Packaged Food', 'Personal & Baby Care',
    'Household Care', 'Dairy & Eggs', 'Home & Kitchen'
]

def predict_image_class(model, image_path, idx_to_category):
    try:
        image = Image.open(image_path).convert('RGB')
        image = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            outputs = model(image)
            _, predicted_label = torch.max(outputs, 1)

        predicted_label = predicted_label.item()
        predicted_category = idx_to_category[predicted_label]

        return predicted_label, predicted_category
    except Exception as e:
        print(f"Error predicting class for {image_path}: {str(e)}")
        return None, None

def get_product_info(image_path, question):
    try:
        image = Image.open(image_path)
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "image"},
                    {"type": "text", "text": question}
                ]
            }
        ]
        text_prompt = qwen_processor.apply_chat_template(messages, add_generation_prompt=True)
        inputs = qwen_processor(text=[text_prompt], images=[image], padding=True, return_tensors="pt")
        inputs = inputs.to("cuda")
        output_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
        generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids)]
        output_text = qwen_processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
        return output_text[0]
    except Exception as e:
        print(f"Error getting product info for {image_path}: {str(e)}")
        return "Error: Unable to process image"

def process_image(image_path):
    try:
        # Load image
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError(f"Unable to load image: {image_path}")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Perform initial segmentation
        results_b = sam_model_b(img)
        objects_b = process_sam_results(results_b, "sam2_b")

        # Create DataFrame for segmented objects
        df_objects = pd.DataFrame([{k: v for k, v in obj.items() if k != 'mask' and k != 'image'} for obj in objects_b])
        df_objects.to_csv('segmented_objects.csv', index=False)

        # Initialize DataFrames for packed and unpacked items
        df_packed = pd.DataFrame(columns=['name', 'expiry_date', 'description', 'frequency', 'category'])
        df_unpacked = pd.DataFrame(columns=['name', 'frequency', 'condition', 'indepth_condition', 'weight'])

        # Process each segmented object
        for _, obj in df_objects.iterrows():
            object_image_path = obj['filename']
            
            # Determine if the object is packed or unpacked
            _, packed_unpacked_category = predict_image_class(packed_unpacked_model, object_image_path, packed_unpacked_idx_to_category)
            
            if packed_unpacked_category == 'Unpacked':
                # Classify fruit/vegetable
                _, fruit_veg_category = predict_image_class(fruit_veg_model, object_image_path, fruit_veg_idx_to_category)
                
                # Determine if fresh or rotten
                _, fresh_rotten_category = predict_image_class(fresh_rotten_model, object_image_path, fresh_rotten_idx_to_category)
                
                # Add or update entry in df_unpacked
                if fruit_veg_category in df_unpacked['name'].values:
                    df_unpacked.loc[df_unpacked['name'] == fruit_veg_category, 'frequency'] += 1
                else:
                    new_row = pd.DataFrame({
                        'name': [fruit_veg_category],
                        'frequency': [1],
                        'condition': [fresh_rotten_category],
                        'indepth_condition': ['To be determined'],  # Placeholder for now
                        'weight': ['To be determined']  # Placeholder for now
                    })
                    df_unpacked = pd.concat([df_unpacked, new_row], ignore_index=True)
            
            elif packed_unpacked_category == 'Packed':
                # Get product information using Qwen model
                product_name = get_product_info(object_image_path, "What is the name of the product? NOTE: JUST PROVIDE NAME AS THE ANSWER")
                expiry_date = get_product_info(object_image_path, "What is the expiry date of the product? If not visible, say 'Not visible'")
                description = get_product_info(object_image_path, "Provide a brief description of the product")
                
                # Determine category (placeholder logic, replace with actual categorization)
                category = packed_categories[0]  # Default to first category
                
                # Add or update entry in df_packed
                if product_name in df_packed['name'].values:
                    df_packed.loc[df_packed['name'] == product_name, 'frequency'] += 1
                else:
                    new_row = pd.DataFrame({
                        'name': [product_name],
                        'expiry_date': [expiry_date],
                        'description': [description],
                        'frequency': [1],
                        'category': [category]
                    })
                    df_packed = pd.concat([df_packed, new_row], ignore_index=True)

        # Save results
        df_packed.to_csv('packed_items.csv', index=False)
        df_unpacked.to_csv('unpacked_items.csv', index=False)

        return df_packed, df_unpacked, df_objects
    except Exception as e:
        print(f"Error processing image: {str(e)}")
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

# Example usage
image_path = "/kaggle/input/newest-tester-data/lay-s-american-style-cream-and-onion-potato-chips-32-g-quick-pantry.jpg"
df_packed, df_unpacked, df_objects = process_image(image_path)

print("Packed items:")
print(df_packed)
print("\nUnpacked items:")
print(df_unpacked)
print("\nAll segmented objects:")
print(df_objects)

# Trying for segmentation

In [None]:
from ultralytics import YOLO, SAM
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
from skimage.filters import sobel
from skimage.measure import label, regionprops

def detect_and_segment(image_path, yolo_model, sam_model, confidence_threshold=0.25, iou_threshold=0.5, 
                       min_area=1000, blur_threshold=50, edge_threshold=0.1):
    # Step 1: Object Detection with YOLOv8
    results = yolo_model(image_path)[0]
    detections = results.boxes.data.cpu().numpy()

    # Read the image
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    segmented_objects = []

    for detection in detections:
        x1, y1, x2, y2, conf, class_id = detection
        if conf < confidence_threshold:
            continue

        # Step 2: Apply SAM2 for segmentation
        sam_result = sam_model(image_rgb, bboxes=[[x1, y1, x2, y2]])[0]
        mask = sam_result.masks.data[0].cpu().numpy()

        # Check mask quality
        if not is_mask_valid(mask, min_area, blur_threshold, edge_threshold):
            continue

        # Check for overlap with existing objects
        if any(calculate_iou(mask, existing_obj['mask']) > iou_threshold for existing_obj in segmented_objects):
            continue

        # Extract the object using the mask
        object_image = np.zeros_like(image_rgb)
        object_image[mask] = image_rgb[mask]

        # Crop the object
        y_indices, x_indices = np.where(mask)
        x_min, x_max = np.min(x_indices), np.max(x_indices)
        y_min, y_max = np.min(y_indices), np.max(y_indices)
        cropped_object = object_image[y_min:y_max, x_min:x_max]

        # Add segmented object to the list
        segmented_objects.append({
            'class_id': int(class_id),
            'confidence': conf,
            'image': cropped_object,
            'mask': mask
        })

    return segmented_objects

def is_mask_valid(mask, min_area, blur_threshold, edge_threshold):
    # Check area
    if np.sum(mask) < min_area:
        return False

    # Check for blurriness
    gray_mask = (mask * 255).astype(np.uint8)
    laplacian_var = cv2.Laplacian(gray_mask, cv2.CV_64F).var()
    if laplacian_var < blur_threshold:
        return False

    # Check for edge strength
    edges = sobel(mask)
    if np.mean(edges) < edge_threshold:
        return False

    # Check for compactness and shape regularity
    labeled_mask = label(mask)
    regions = regionprops(labeled_mask)
    if regions:
        main_region = max(regions, key=lambda r: r.area)
        if main_region.eccentricity > 0.95 or main_region.solidity < 0.5:
            return False

    return True

def calculate_iou(mask1, mask2):
    intersection = np.logical_and(mask1, mask2)
    union = np.logical_or(mask1, mask2)
    return np.sum(intersection) / np.sum(union)

def save_and_display_objects(segmented_objects, yolo_model, output_dir='segmented_objects'):
    os.makedirs(output_dir, exist_ok=True)
    
    plt.figure(figsize=(20, 20))
    for i, obj in enumerate(segmented_objects):
        # Save the object image
        filename = f"{output_dir}/object_{i}_{yolo_model.names[obj['class_id']]}.png"
        cv2.imwrite(filename, cv2.cvtColor(obj['image'], cv2.COLOR_RGB2BGR))
        
        # Display the object image
        plt.subplot(5, 5, i+1)
        plt.imshow(obj['image'])
        plt.title(f"{yolo_model.names[obj['class_id']]}\nConf: {obj['confidence']:.2f}")
        plt.axis('off')
        
        if i == 24:  # Limit to 25 images for display
            break
    
    plt.tight_layout()
    plt.show()

# Load models
yolo_model = YOLO('yolo11x.pt')
sam_model = SAM('sam2_b.pt')

# Process an image
image_path = '/kaggle/input/newest-tester-data/images.jpg'
segmented_objects = detect_and_segment(image_path, yolo_model, sam_model)

# Save and display the segmented objects
save_and_display_objects(segmented_objects, yolo_model)

# Print information about segmented objects
for i, obj in enumerate(segmented_objects):
    print(f"Object {i+1}:")
    print(f"  Class: {yolo_model.names[obj['class_id']]}")
    print(f"  Confidence: {obj['confidence']:.2f}")
    print(f"  Image Shape: {obj['image'].shape}")
    print()

# NEW

In [None]:
import torch,gc

torch.cuda.empty_cache()
gc.collect()

In [None]:
import torch
from ultralytics import SAM
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from PIL import Image
import os
from sklearn.metrics import jaccard_score
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def iou(mask1, mask2):
    return jaccard_score(mask1.flatten(), mask2.flatten())

def place_on_white_background(image):
    white_bg = np.ones_like(image) * 255
    mask = np.any(image != [0, 0, 0], axis=-1)
    white_bg[mask] = image[mask]
    return white_bg

def resize_image(image, min_size=224):
    width, height = image.size
    if width < min_size or height < min_size:
        scale = min_size / min(width, height)
        new_width = int(width * scale)
        new_height = int(height * scale)
        return image.resize((new_width, new_height), Image.LANCZOS)
    return image

def is_product(image, model, processor):
    image = resize_image(image)
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": "Is this image showing a product , if its a label or qr code or a tag type or just plain white or anything that isnt an object its a no? Answer only yes or no."}
            ]
        }
    ]
    text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
    inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    output_ids = model.generate(**inputs, max_new_tokens=10)
    output_text = processor.batch_decode(output_ids, skip_special_tokens=True)[0].strip().lower()
    return "yes" in output_text

def process_sam_results(results, model_name, qwen_model, qwen_processor, iou_threshold=0.8, min_object_size=28):
    original_image = results[0].orig_img
    masks = results[0].masks.data.cpu().numpy()
    boxes = results[0].boxes.data.cpu().numpy()
    
    unique_objects = []
    for i, (mask, box) in enumerate(zip(masks, boxes)):
        is_unique = True
        for existing_obj in unique_objects:
            if iou(mask, existing_obj['mask']) > iou_threshold:
                is_unique = False
                break
        
        if is_unique:
            object_mask = mask.astype(bool)
            object_image = np.zeros_like(original_image)
            object_image[object_mask] = original_image[object_mask]
            
            x1, y1, x2, y2 = map(int, box[:4])
            cropped_object = object_image[y1:y2, x1:x2]
            
            # Skip extremely small objects
            if cropped_object.shape[0] < min_object_size or cropped_object.shape[1] < min_object_size:
                continue
            
            object_on_white = place_on_white_background(cropped_object)
            pil_image = Image.fromarray(object_on_white)
            
            is_product_result = is_product(pil_image, qwen_model, qwen_processor)
            
            if is_product_result:
                object_filename = f"{model_name}object{len(unique_objects)}.png"
                cv2.imwrite(object_filename, cv2.cvtColor(object_on_white, cv2.COLOR_RGB2BGR))
                
                unique_objects.append({
                    'model': model_name,
                    'object_id': len(unique_objects),
                    'image': pil_image,
                    'mask': mask,
                    'x1': x1,
                    'y1': y1,
                    'x2': x2,
                    'y2': y2,
                    'filename': object_filename,
                    'is_product': is_product_result
                })
    
    return unique_objects

def visualize_objects(objects, cols=5):
    n = len(objects)
    rows = (n + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axs = axs.flatten()
    
    for i, obj in enumerate(objects):
        axs[i].imshow(obj['image'])
        axs[i].axis('off')
        axs[i].set_title(f"Object {obj['object_id']}")
    
    for i in range(n, len(axs)):
        axs[i].axis('off')
    
    plt.tight_layout()
    plt.show()

# Load models
sam_model = SAM("sam2_b.pt").to(device)
qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2-VL-2B-Instruct",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto",
)
qwen_processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")

# Load image
img_path = "/kaggle/input/newest-tester-data/download (4).jpg"
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Display original image
plt.figure(figsize=(10, 10))
plt.imshow(img)
plt.title("Original Image")
plt.axis('off')
plt.show()

# Process with SAM model
results = sam_model(img)

# Display segmentation results
plt.figure(figsize=(10, 10))
plt.imshow(results[0].plot())
plt.title("Segmentation results - SAM")
plt.axis('off')
plt.show()

# Process and visualize objects
objects = process_sam_results(results, "sam", qwen_model, qwen_processor)
visualize_objects(objects)

# Create DataFrame
df = pd.DataFrame([{k: v for k, v in obj.items() if k != 'mask' and k != 'image'} for obj in objects])

# Display DataFrame
print(df)

# Save DataFrame to CSV
df.to_csv('segmented_products.csv', index=False)

# 1

In [None]:
from ultralytics import SAM
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from PIL import Image
import os
from sklearn.metrics import jaccard_score

def iou(mask1, mask2):
    return jaccard_score(mask1.flatten(), mask2.flatten())

def place_on_white_background(image):
    # Create a white background
    white_bg = np.ones_like(image) * 255
    
    # Create a mask for non-black pixels
    mask = np.any(image != [0, 0, 0], axis=-1)
    
    # Place the image on the white background
    white_bg[mask] = image[mask]
    
    return white_bg

def process_sam_results(results, model_name, iou_threshold=0.8):
    original_image = results[0].orig_img
    masks = results[0].masks.data.cpu().numpy()
    boxes = results[0].boxes.data.cpu().numpy()
    
    unique_objects = []
    for i, (mask, box) in enumerate(zip(masks, boxes)):
        is_unique = True
        for existing_obj in unique_objects:
            if iou(mask, existing_obj['mask']) > iou_threshold:
                is_unique = False
                break
        
        if is_unique:
            object_mask = mask.astype(bool)
            object_image = np.zeros_like(original_image)
            object_image[object_mask] = original_image[object_mask]
            
            x1, y1, x2, y2 = map(int, box[:4])
            cropped_object = object_image[y1:y2, x1:x2]
            
            # Place on white background
            object_on_white = place_on_white_background(cropped_object)
            
            object_filename = f"{model_name}object{len(unique_objects)}.png"
            cv2.imwrite(object_filename, cv2.cvtColor(object_on_white, cv2.COLOR_RGB2BGR))
            
            unique_objects.append({
                'model': model_name,
                'object_id': len(unique_objects),
                'image': Image.fromarray(object_on_white),
                'mask': mask,
                'x1': x1,
                'y1': y1,
                'x2': x2,
                'y2': y2,
                'filename': object_filename
            })
    
    return unique_objects

def visualize_objects(objects, cols=5):
    n = len(objects)
    rows = (n + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axs = axs.flatten()
    
    for i, obj in enumerate(objects):
        axs[i].imshow(obj['image'])
        axs[i].axis('off')
        axs[i].set_title(f"Object {obj['object_id']}")
    
    for i in range(n, len(axs)):
        axs[i].axis('off')
    
    plt.tight_layout()
    plt.show()

# Load image
img_path = '/kaggle/input/new-tester/top-view-grapefruit-with-oranges-pink-background_141793-51393.jpg'
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Process with SAM2_b model
sam_model_b = SAM("sam2_b.pt")
results_b = sam_model_b(img)

# Display segmentation results
plt.figure(figsize=(10, 10))
plt.imshow(results_b[0].plot())
plt.title("Segmentation results - SAM2_b")
plt.axis('off')
plt.show()

# Process and visualize objects
objects_b = process_sam_results(results_b, "sam2_b")
visualize_objects(objects_b)

# Create DataFrame
df_b = pd.DataFrame([{k: v for k, v in obj.items() if k != 'mask' and k != 'image'} for obj in objects_b])

# Display DataFrame
print(df_b)

# Save DataFrame to CSV
df_b.to_csv('segmented_objects_sam2_b.csv', index=False)

In [None]:
import torch
from ultralytics import SAM
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from PIL import Image
from sklearn.metrics import jaccard_score
from torchvision.models import resnet50
from torchvision.transforms import functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def iou(mask1, mask2):
    return jaccard_score(mask1.flatten(), mask2.flatten())

def place_on_white_background(image):
    white_bg = np.ones_like(image) * 255
    mask = np.any(image != [0, 0, 0], axis=-1)
    white_bg[mask] = image[mask]
    return white_bg

def process_sam_results(results, sam_model_name, iou_threshold=0.5, min_object_size=28):
    original_image = results[0].orig_img
    masks = results[0].masks.data.cpu().numpy()
    
    unique_objects = []
    for i, mask in enumerate(masks):
        is_unique = True
        for existing_obj in unique_objects:
            if iou(mask, existing_obj['mask']) > iou_threshold:
                is_unique = False
                break
        
        if is_unique:
            object_mask = mask.astype(bool)
            object_image = np.zeros_like(original_image)
            object_image[object_mask] = original_image[object_mask]
            
            # Get bounding box
            y, x = np.where(object_mask)
            y1, y2, x1, x2 = y.min(), y.max(), x.min(), x.max()
            
            cropped_object = object_image[y1:y2, x1:x2]
            
            # Skip extremely small objects
            if cropped_object.shape[0] < min_object_size or cropped_object.shape[1] < min_object_size:
                continue
            
            object_on_white = place_on_white_background(cropped_object)
            
            object_filename = f"{sam_model_name}object{len(unique_objects)}.png"
            cv2.imwrite(object_filename, cv2.cvtColor(object_on_white, cv2.COLOR_RGB2BGR))
            
            unique_objects.append({
                'model': sam_model_name,
                'object_id': len(unique_objects),
                'image': Image.fromarray(object_on_white),
                'mask': mask,
                'bbox': [x1, y1, x2, y2],
                'filename': object_filename
            })
    
    return unique_objects

def visualize_objects(objects, cols=5):
    n = len(objects)
    rows = (n + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axs = axs.flatten()
    
    for i, obj in enumerate(objects):
        axs[i].imshow(obj['image'])
        axs[i].axis('off')
        axs[i].set_title(f"Object {obj['object_id']}")
    
    for i in range(n, len(axs)):
        axs[i].axis('off')
    
    plt.tight_layout()
    plt.show()

def is_object(image, model):
    # Preprocess the image
    image = F.to_tensor(image)
    image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    image = image.unsqueeze(0).to(device)
    
    # Get model prediction
    with torch.no_grad():
        output = model(image)
    
    # Check if the top predicted class is not background (assuming background is class 0)
    _, predicted = output.max(1)
    return predicted.item() != 0

# Load SAM model
sam_model = SAM("sam2_b.pt").to(device)

# Load pre-trained ResNet model for object classification
resnet_model = resnet50(pretrained=True).to(device)
resnet_model.eval()

# Load image
img_path = "/kaggle/input/new-tester/flat-sticker-pattern-fruits-white-background_1156689-7863.jpg"
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Display original image
plt.figure(figsize=(10, 10))
plt.imshow(img)
plt.title("Original Image")
plt.axis('off')
plt.show()

# Process with SAM model
results = sam_model(img)

# Display segmentation results
plt.figure(figsize=(10, 10))
plt.imshow(results[0].plot())
plt.title("Segmentation results - SAM")
plt.axis('off')
plt.show()

# Process SAM results
objects = process_sam_results(results, "sam")

print(f"Number of objects detected by SAM: {len(objects)}")

# Visualize objects from SAM
visualize_objects(objects)

# Filter objects using ResNet
filtered_objects = [obj for obj in objects if is_object(obj['image'], resnet_model)]

print(f"Number of objects after filtering: {len(filtered_objects)}")

# Visualize filtered objects
visualize_objects(filtered_objects)

# Create DataFrame
df = pd.DataFrame([{k: v for k, v in obj.items() if k not in ['mask', 'image']} for obj in filtered_objects])

# Display DataFrame
print(df)

# Save DataFrame to CSV
df.to_csv('segmented_objects.csv', index=False)

In [None]:
import torch
from transformers import DetrForObjectDetection, DetrImageProcessor
from ultralytics import SAM
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

def detect_and_segment(image_path, detr_model, detr_processor, sam_model, confidence_threshold=0.7):
    # Load the image
    image = Image.open(image_path)
    image_np = np.array(image)
    
    # Step 1: Object Detection with DETR
    inputs = detr_processor(images=image, return_tensors="pt")
    outputs = detr_model(**inputs)
    
    # Post-process DETR outputs
    target_sizes = torch.tensor([image.size[::-1]])
    results = detr_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=confidence_threshold)[0]
    
    segmented_objects = []
    image_with_boxes = image_np.copy()
    
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        box = [round(i, 2) for i in box.tolist()]
        if score >= confidence_threshold:
            # Step 2: Apply SAM for segmentation
            sam_result = sam_model(image_np, bboxes=[box])[0]
            mask = sam_result.masks.data[0].cpu().numpy()
            
            # Add segmented object to the list
            segmented_objects.append({
                'class_id': label.item(),
                'class_name': detr_model.config.id2label[label.item()],
                'confidence': score.item(),
                'bbox': box,
                'mask': mask
            })
            
            # Visualize the segmentation
            colored_mask = np.zeros_like(image_np)
            colored_mask[mask] = [0, 255, 0]  # Green color for the mask
            image_with_boxes = cv2.addWeighted(image_with_boxes, 1, colored_mask, 0.5, 0)
            
            # Draw bounding box
            cv2.rectangle(image_with_boxes, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2)
            
            # Add label
            label_text = f"{detr_model.config.id2label[label.item()]}: {score:.2f}"
            cv2.putText(image_with_boxes, label_text, (int(box[0]), int(box[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
    
    return image_with_boxes, segmented_objects

# Load models
detr_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
detr_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
sam_model = SAM('sam2_b.pt')  # or 'sam2_l.pt' for a larger model

# Process an image
image_path = '/path/to/your/image.jpg'
result_image, segmented_objects = detect_and_segment(image_path, detr_model, detr_processor, sam_model)

# Display the original image
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.imshow(Image.open(image_path))
plt.title("Original Image")
plt.axis('off')

# Display the result image
plt.subplot(1, 2, 2)
plt.imshow(result_image)
plt.title("Detected and Segmented Objects")
plt.axis('off')
plt.tight_layout()
plt.show()

# Save the result image
output_path = 'result_image.jpg'
cv2.imwrite(output_path, cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR))

# Print information about segmented objects
for i, obj in enumerate(segmented_objects):
    print(f"Object {i+1}:")
    print(f"  Class: {obj['class_name']}")
    print(f"  Confidence: {obj['confidence']:.2f}")
    print(f"  Bounding Box: {obj['bbox']}")
    print(f"  Mask Shape: {obj['mask'].shape}")
    print()

# Visualize individual segmented objects
num_objects = len(segmented_objects)
cols = 3
rows = (num_objects + cols - 1) // cols
plt.figure(figsize=(15, 5 * rows))

for i, obj in enumerate(segmented_objects):
    plt.subplot(rows, cols, i + 1)
    
    # Create a masked image
    masked_image = np.zeros_like(result_image)
    masked_image[obj['mask']] = result_image[obj['mask']]
    
    plt.imshow(masked_image)
    plt.title(f"{obj['class_name']} ({obj['confidence']:.2f})")
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
!pip install ultralytics

In [None]:
import cv2
import numpy as np
from ultralytics import SAM
import matplotlib.pyplot as plt
from skimage.measure import label, regionprops
from skimage.filters import threshold_otsu

def preprocess_image(image):
    # Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    
    # Enhance contrast using CLAHE
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY))
    
    return enhanced

def is_valid_object(mask, min_area=100, min_solidity=0.5):
    # Label connected components in the mask
    labeled = label(mask)
    regions = regionprops(labeled)
    
    if not regions:
        return False
    
    # Check area and solidity of the largest region
    largest_region = max(regions, key=lambda r: r.area)
    return largest_region.area >= min_area and largest_region.solidity >= min_solidity

def extract_object(image, mask):
    # Create a white background
    white_bg = np.ones_like(image) * 255
    
    # Place the object on the white background
    object_image = np.where(mask[:,:,None], image, white_bg)
    
    # Crop the object
    y, x = np.where(mask)
    return object_image[np.min(y):np.max(y), np.min(x):np.max(x)]

def segment_and_filter_objects(image_path, sam_model):
    # Load and preprocess the image
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    preprocessed = preprocess_image(image)
    
    # Generate SAM masks
    sam_result = sam_model(image_rgb)[0]
    masks = sam_result.masks.data.cpu().numpy()
    
    # Filter and extract objects
    valid_objects = []
    for mask in masks:
        if is_valid_object(mask):
            object_image = extract_object(image_rgb, mask)
            valid_objects.append(object_image)
    
    return valid_objects

def display_objects(objects, title):
    cols = 5
    rows = (len(objects) + cols - 1) // cols
    plt.figure(figsize=(15, 3 * rows))
    plt.suptitle(title, fontsize=16)
    
    for i, obj in enumerate(objects):
        plt.subplot(rows, cols, i + 1)
        plt.imshow(obj)
        plt.axis('off')
        plt.title(f"Object {i+1}")
    
    plt.tight_layout()
    plt.show()

# Load SAM model
sam_model = SAM('sam2_b.pt')

# Process image
image_path = '/kaggle/input/new-tester/f.jpg'
all_objects = segment_and_filter_objects(image_path, sam_model)

# Display all extracted objects
display_objects(all_objects, "All Extracted Objects")

# Apply additional filtering
final_objects = []
for obj in all_objects:
    gray = cv2.cvtColor(obj, cv2.COLOR_RGB2GRAY)
    thresh = threshold_otsu(gray)
    binary = gray > thresh
    if np.sum(binary) / binary.size > 0.1:  # Ensure object covers at least 10% of the image
        final_objects.append(obj)

# Display final filtered objects
display_objects(final_objects, "Final Filtered Objects")

# Save final objects
for i, obj in enumerate(final_objects):
    cv2.imwrite(f'object_{i+1}.png', cv2.cvtColor(obj, cv2.COLOR_RGB2BGR))

print(f"Extracted {len(all_objects)} initial objects.")
print(f"Filtered down to {len(final_objects)} final objects.")
print("Final objects have been saved as PNG files.")

In [None]:
from ultralytics import SAM
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from PIL import Image
from sklearn.metrics import jaccard_score
from scipy.ndimage import gaussian_gradient_magnitude

def iou(mask1, mask2):
    return jaccard_score(mask1.flatten(), mask2.flatten())

def place_on_white_background(image):
    white_bg = np.ones_like(image) * 255
    mask = np.any(image != [0, 0, 0], axis=-1)
    white_bg[mask] = image[mask]
    return white_bg

def filter_by_area(mask, min_area, max_area):
    area = np.sum(mask)
    return min_area < area < max_area

def filter_by_contour(mask, min_solidity=0.8, min_aspect_ratio=0.2, max_aspect_ratio=5):
    contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = max(contours, key=cv2.contourArea)
        area = cv2.contourArea(contour)
        hull = cv2.convexHull(contour)
        hull_area = cv2.contourArea(hull)
        solidity = float(area) / hull_area
        
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = float(w) / h if h != 0 else 0
        
        return (solidity > min_solidity and 
                min_aspect_ratio < aspect_ratio < max_aspect_ratio)
    return False

def is_sharp(image, threshold=100):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    gradient_magnitude = gaussian_gradient_magnitude(gray, sigma=2)
    sharpness = np.mean(gradient_magnitude)
    return sharpness > threshold

def is_object_like(mask, edge_ratio_threshold=0.1):
    edges = cv2.Canny(mask.astype(np.uint8), 100, 200)
    edge_ratio = np.sum(edges > 0) / mask.size
    return edge_ratio > edge_ratio_threshold

def process_sam_results(results, model_name, confidence_threshold=0.5, min_area=500, max_area=50000, iou_threshold=0.5):
    original_image = results[0].orig_img
    masks = results[0].masks.data.cpu().numpy()
    boxes = results[0].boxes.data.cpu().numpy()
    confidences = results[0].boxes.conf.cpu().numpy()
    
    unique_objects = []
    for i, (mask, box, confidence) in enumerate(zip(masks, boxes, confidences)):
        if confidence < confidence_threshold:
            continue
        
        if not filter_by_area(mask, min_area, max_area):
            continue
        
        if not filter_by_contour(mask):
            continue
        
        if not is_object_like(mask):
            continue
        
        object_mask = mask.astype(bool)
        object_image = np.zeros_like(original_image)
        object_image[object_mask] = original_image[object_mask]
        
        x1, y1, x2, y2 = map(int, box[:4])
        cropped_object = object_image[y1:y2, x1:x2]
        
        if not is_sharp(cropped_object):
            continue
        
        is_unique = True
        for existing_obj in unique_objects:
            if iou(mask, existing_obj['mask']) > iou_threshold:
                if confidence > existing_obj['confidence']:
                    unique_objects.remove(existing_obj)
                else:
                    is_unique = False
                break
        
        if is_unique:
            object_on_white = place_on_white_background(cropped_object)
            
            object_filename = f"{model_name}object{len(unique_objects)}.png"
            cv2.imwrite(object_filename, cv2.cvtColor(object_on_white, cv2.COLOR_RGB2BGR))
            
            unique_objects.append({
                'model': model_name,
                'object_id': len(unique_objects),
                'image': Image.fromarray(object_on_white),
                'mask': mask,
                'x1': x1,
                'y1': y1,
                'x2': x2,
                'y2': y2,
                'filename': object_filename,
                'confidence': confidence
            })
    
    return unique_objects

def visualize_objects(objects, cols=5):
    n = len(objects)
    rows = (n + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axs = axs.flatten()
    
    for i, obj in enumerate(objects):
        axs[i].imshow(obj['image'])
        axs[i].axis('off')
        axs[i].set_title(f"Object {obj['object_id']}")
    
    for i in range(n, len(axs)):
        axs[i].axis('off')
    
    plt.tight_layout()
    plt.show()

# Load image
img_path = "/kaggle/input/new-tester/f.jpg"
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Process with SAM2_b model
sam_model_b = SAM("sam2_b.pt")
results_b = sam_model_b(img)

# Display segmentation results
plt.figure(figsize=(10, 10))
plt.imshow(results_b[0].plot())
plt.title("Segmentation results - SAM2_b")
plt.axis('off')
plt.show()

# Process and visualize objects with enhanced filtering
objects_b = process_sam_results(results_b, "sam2_b", confidence_threshold=0.6, min_area=70, max_area=50000, iou_threshold=0.5)
visualize_objects(objects_b)

# Create DataFrame
df_b = pd.DataFrame([{k: v for k, v in obj.items() if k != 'mask' and k != 'image'} for obj in objects_b])

# Display DataFrame
print(df_b)

# Save DataFrame to CSV
df_b.to_csv('segmented_objects_sam2_b.csv', index=False)

In [None]:
from ultralytics import SAM
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from PIL import Image
import os
from sklearn.metrics import jaccard_score

def iou(mask1, mask2):
    return jaccard_score(mask1.flatten(), mask2.flatten())

def place_on_white_background(image):
    white_bg = np.ones_like(image) * 255
    mask = np.any(image != [0, 0, 0], axis=-1)
    white_bg[mask] = image[mask]
    return white_bg

def process_sam_results(results, model_name, iou_threshold=0.8, conf_threshold=0.3, min_area=70):
    original_image = results[0].orig_img
    masks = results[0].masks.data.cpu().numpy()
    boxes = results[0].boxes.data.cpu().numpy()
    
    unique_objects = []
    for i, (mask, box) in enumerate(zip(masks, boxes)):
        if box[4] < conf_threshold:
            continue
        
        area = np.sum(mask)
        if area < min_area:
            continue
        
        is_unique = True
        for existing_obj in unique_objects:
            if iou(mask, existing_obj['mask']) > iou_threshold:
                is_unique = False
                break
        
        if is_unique:
            object_mask = mask.astype(bool)
            object_image = np.zeros_like(original_image)
            object_image[object_mask] = original_image[object_mask]
            
            x1, y1, x2, y2 = map(int, box[:4])
            cropped_object = object_image[y1:y2, x1:x2]
            
            object_on_white = place_on_white_background(cropped_object)
            
            object_filename = f"{model_name}_object{len(unique_objects)}.png"
            cv2.imwrite(object_filename, cv2.cvtColor(object_on_white, cv2.COLOR_RGB2BGR))
            
            unique_objects.append({
                'model': model_name,
                'object_id': len(unique_objects),
                'image': Image.fromarray(object_on_white),
                'mask': mask,
                'x1': x1,
                'y1': y1,
                'x2': x2,
                'y2': y2,
                'confidence': box[4],
                'area': area,
                'filename': object_filename
            })
    
    return unique_objects

def visualize_objects(objects, cols=5):
    n = len(objects)
    rows = (n + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axs = axs.flatten()
    
    for i, obj in enumerate(objects):
        axs[i].imshow(obj['image'])
        axs[i].axis('off')
        axs[i].set_title(f"Object {obj['object_id']}\nConf: {obj['confidence']:.2f}")
    
    for i in range(n, len(axs)):
        axs[i].axis('off')
    
    plt.tight_layout()
    plt.show()

def process_image(img_path, model, conf_threshold=0.3, min_area=100):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    results = model(img)

    plt.figure(figsize=(10, 10))
    plt.imshow(results[0].plot())
    
    model_name = model.__class__.__name__
    plt.title(f"Segmentation results - {model_name}")
    plt.axis('off')
    plt.show()

    objects = process_sam_results(results, model_name, conf_threshold=conf_threshold, min_area=min_area)
    visualize_objects(objects)

    df = pd.DataFrame([{k: v for k, v in obj.items() if k != 'mask' and k != 'image'} for obj in objects])
    print(df)

    csv_filename = f'segmented_objects_{model_name}.csv'
    df.to_csv(csv_filename, index=False)
    print(f"Results saved to {csv_filename}")

    return objects, df

# Load SAM2_b model
sam_model_b = SAM("sam2_b.pt")

# Process single image
img_path = "/kaggle/input/newest-tester-data/images (1).jpg"
try:
    objects_b, df_b = process_image(img_path, sam_model_b, conf_threshold=0.3, min_area=100)
except Exception as e:
    print(f"An error occurred while processing the image: {e}")

# If you want to process multiple images in a directory:
# image_dir = "/path/to/image/directory"
# for img_file in os.listdir(image_dir):
#     if img_file.endswith(('.jpg', '.png', '.jpeg')):
#         img_path = os.path.join(image_dir, img_file)
#         print(f"Processing {img_file}")
#         try:
#             objects_b, df_b = process_image(img_path, sam_model_b, conf_threshold=0.3, min_area=100)
#         except Exception as e:
#             print(f"An error occurred while processing {img_file}: {e}")

In [None]:
import torch
from ultralytics import SAM
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from PIL import Image
from sklearn.metrics import jaccard_score
from torchvision.models import resnet50
from torchvision.transforms import functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def iou(mask1, mask2):
    return jaccard_score(mask1.flatten(), mask2.flatten())

def place_on_white_background(image):
    white_bg = np.ones_like(image) * 255
    mask = np.any(image != [0, 0, 0], axis=-1)
    white_bg[mask] = image[mask]
    return white_bg

def process_sam_results(results, sam_model_name, iou_threshold=0.5, min_object_size=50):
    original_image = results[0].orig_img
    masks = results[0].masks.data.cpu().numpy()
    
    unique_objects = []
    for i, mask in enumerate(masks):
        is_unique = True
        for existing_obj in unique_objects:
            if iou(mask, existing_obj['mask']) > iou_threshold:
                is_unique = False
                break
        
        if is_unique:
            object_mask = mask.astype(bool)
            object_image = np.zeros_like(original_image)
            object_image[object_mask] = original_image[object_mask]
            
            # Get bounding box
            y, x = np.where(object_mask)
            y1, y2, x1, x2 = y.min(), y.max(), x.min(), x.max()
            
            cropped_object = object_image[y1:y2, x1:x2]
            
            # Skip extremely small objects
            if cropped_object.shape[0] < min_object_size or cropped_object.shape[1] < min_object_size:
                continue
            
            object_on_white = place_on_white_background(cropped_object)
            
            object_filename = f"{sam_model_name}object{len(unique_objects)}.png"
            cv2.imwrite(object_filename, cv2.cvtColor(object_on_white, cv2.COLOR_RGB2BGR))
            
            unique_objects.append({
                'model': sam_model_name,
                'object_id': len(unique_objects),
                'image': Image.fromarray(object_on_white),
                'mask': mask,
                'bbox': [x1, y1, x2, y2],
                'filename': object_filename
            })
    
    return unique_objects

def visualize_objects(objects, cols=5):
    n = len(objects)
    rows = (n + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axs = axs.flatten()
    
    for i, obj in enumerate(objects):
        axs[i].imshow(obj['image'])
        axs[i].axis('off')
        axs[i].set_title(f"Object {obj['object_id']}")
    
    for i in range(n, len(axs)):
        axs[i].axis('off')
    
    plt.tight_layout()
    plt.show()

def is_object(image, model):
    # Preprocess the image
    image = F.to_tensor(image)
    image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    image = image.unsqueeze(0).to(device)
    
    # Get model prediction
    with torch.no_grad():
        output = model(image)
    
    # Check if the top predicted class is not background (assuming background is class 0)
    _, predicted = output.max(1)
    return predicted.item() != 0

# Load SAM model
sam_model = SAM("sam2_b.pt").to(device)

# Load pre-trained ResNet model for object classification


# Load image
img_path = "/kaggle/input/newest-tester-data/images (1).jpg"
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Display original image
plt.figure(figsize=(10, 10))
plt.imshow(img)
plt.title("Original Image")
plt.axis('off')
plt.show()

# Process with SAM model
results = sam_model(img)

# Display segmentation results
plt.figure(figsize=(10, 10))
plt.imshow(results[0].plot())
plt.title("Segmentation results - SAM")
plt.axis('off')
plt.show()

# Process SAM results
objects = process_sam_results(results, "sam")

print(f"Number of objects detected by SAM: {len(objects)}")

# Visualize objects from SAM
visualize_objects(objects)

# Filter objects using ResNet

# Visualize filtered objects

# Create DataFrame
df = pd.DataFrame([{k: v for k, v in obj.items() if k not in ['mask', 'image']} for obj in objects])

# Display DataFrame
print(df)
# Save DataFrame to CSV
df.to_csv('segmented_objects.csv', index=False)

In [None]:
import torch,gc
torch.cuda.empty_cache()
gc.collect()

In [None]:
!pip install ultralytics

In [None]:
!pip install ultralytics

In [None]:

import torch
import torch.nn as nn
from torchvision import transforms, models
from ultralytics import SAM
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
from PIL import Image
import os
from sklearn.metrics import jaccard_score
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
import warnings
import re
from datetime import datetime

# Suppress warnings
warnings.filterwarnings("ignore")

# Ensure GPU usage if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Helper functions
def iou(mask1, mask2):
    return jaccard_score(mask1.flatten(), mask2.flatten())

def place_on_white_background(image):
    white_bg = np.ones_like(image) * 255
    mask = np.any(image != [0, 0, 0], axis=-1)
    white_bg[mask] = image[mask]
    return white_bg

def process_sam_results(results, model_name, iou_threshold=0.8, min_object_size=50):
    original_image = results[0].orig_img
    masks = results[0].masks.data.cpu().numpy()
    
    unique_objects = []
    for i, mask in enumerate(masks):
        is_unique = True
        for existing_obj in unique_objects:
            if iou(mask, existing_obj['mask']) > iou_threshold:
                is_unique = False
                break
        
        if is_unique:
            object_mask = mask.astype(bool)
            object_image = np.zeros_like(original_image)
            object_image[object_mask] = original_image[object_mask]
            
            y, x = np.where(object_mask)
            y1, y2, x1, x2 = y.min(), y.max(), x.min(), x.max()
            
            cropped_object = object_image[y1:y2, x1:x2]
            
            if cropped_object.shape[0] < min_object_size or cropped_object.shape[1] < min_object_size:
                continue
            
            object_on_white = place_on_white_background(cropped_object)
            
            object_filename = f"{model_name}object{len(unique_objects)}.png"
            cv2.imwrite(object_filename, cv2.cvtColor(object_on_white, cv2.COLOR_RGB2BGR))
            
            unique_objects.append({
                'model': model_name,
                'object_id': len(unique_objects),
                'image': Image.fromarray(object_on_white),
                'mask': mask,
                'bbox': [x1, y1, x2, y2],
                'filename': object_filename
            })
    
    return unique_objects

def visualize_objects(objects, cols=5):
    n = len(objects)
    rows = (n + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axs = axs.flatten()
    
    for i, obj in enumerate(objects):
        axs[i].imshow(obj['image'])
        axs[i].axis('off')
        axs[i].set_title(f"Object {obj['object_id']}")
    
    for i in range(n, len(axs)):
        axs[i].axis('off')
    
    plt.tight_layout()
    plt.show()

# Load models
sam_model = SAM("sam2_b.pt")

packed_unpacked_model = models.efficientnet_b0(pretrained=False)
packed_unpacked_model.classifier[1] = nn.Linear(packed_unpacked_model.classifier[1].in_features, 2)
packed_unpacked_model.load_state_dict(torch.load('/kaggle/input/models-loaded/keras/default/1/efficientnet_b0_packed_unpacked (2).pth', map_location=device))
packed_unpacked_model.to(device)
packed_unpacked_model.eval()

fruit_veg_model = models.efficientnet_b0(pretrained=False)
fruit_veg_model.classifier[1] = nn.Linear(fruit_veg_model.classifier[1].in_features, 54)
fruit_veg_model.load_state_dict(torch.load('/kaggle/input/models-loaded/keras/default/1/efficientnet_b0_fruit_veg_1 (1).pth', map_location=device))
fruit_veg_model.to(device)
fruit_veg_model.eval()

fresh_rotten_model = models.efficientnet_b0(pretrained=False)
fresh_rotten_model.classifier[1] = nn.Linear(fresh_rotten_model.classifier[1].in_features, 2)
fresh_rotten_model.load_state_dict(torch.load('/kaggle/input/models-loaded/keras/default/1/efficientnet_b0_fruit_veg (2).pth', map_location=device))
fresh_rotten_model.to(device)
fresh_rotten_model.eval()

qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2-VL-2B-Instruct",
    torch_dtype="auto",
    device_map="auto",
)
qwen_processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")

# Define categories and transformations
packed_unpacked_categories = ['Packed', 'Unpacked']
packed_unpacked_idx_to_category = {idx: category for idx, category in enumerate(packed_unpacked_categories)}

fruit_veg_categories = [
    'Orange', 'Tamarillo', 'Lime', 'Pomegranate', 'Plum', 'Pineapple', 'Apple', 'Dates', 'Papaya', 'Guava',
    'Beetroot', 'Pear', 'Strawberry', 'Blueberry', 'Lulo', 'Avacado', 'Lemon', 'Kaki', 'Peach', 'Grape',
    'Banana', 'Cherry', 'Watermelon', 'Mango', 'Grapefruit', 'Broccoli', 'Capsicum', 'Radish', 'Tomato', 'Turnip',
    'Ginger', 'Zucchini', 'Brinjal', 'Pumpkin', 'Bell Pepper', 'Carrot', 'New Mexico Green Chile', 'Eggplant',
    'Baby Corn', 'Zucchini dark', 'Sweet corn', 'Cabbage', 'Bitter_Gourd', 'Cauliflower', 'Chile Pepper',
    'Sweet Potato', 'Bean', 'Cucumber', 'Bottle Gourd', 'Garlic', 'Peas', 'Onion', 'Potato', 'Spinach'
]
fruit_veg_idx_to_category = {idx: category for idx, category in enumerate(fruit_veg_categories)}

fresh_rotten_categories = ['Fresh', 'Rotten']
fresh_rotten_idx_to_category = {idx: category for idx, category in enumerate(fresh_rotten_categories)}

packed_categories = [
    'Staples', 'Snacks & Beverages', 'Packaged Food', 'Personal & Baby Care',
    'Household Care', 'Dairy & Eggs', 'Home & Kitchen'
]

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def predict_image_class(model, image_path, idx_to_category):
    try:
        image = Image.open(image_path).convert('RGB')
        image = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            outputs = model(image)
            _, predicted_label = torch.max(outputs, 1)

        predicted_label = predicted_label.item()
        predicted_category = idx_to_category[predicted_label]

        return predicted_label, predicted_category
    except Exception as e:
        print(f"Error predicting class for {image_path}: {str(e)}")
        return None, None

def get_product_info(image_path, question):
    try:
        image = Image.open(image_path)
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "image"},
                    {"type": "text", "text": question}
                ]
            }
        ]
        text_prompt = qwen_processor.apply_chat_template(messages, add_generation_prompt=True)
        inputs = qwen_processor(text=[text_prompt], images=[image], padding=True, return_tensors="pt")
        inputs = inputs.to("cuda")
        output_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
        generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids)]
        output_text = qwen_processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
        return output_text[0]
    except Exception as e:
        print(f"Error getting product info for {image_path}: {str(e)}")
        return "Error: Unable to process image"

def is_valid_date(date_str):
    date_formats = [
        "%d/%m/%Y", "%d-%m-%Y", "%Y/%m/%d", "%Y-%m-%d", 
        "%m/%d/%Y", "%m-%d-%Y", "%d %b %Y", "%d %B %Y", 
        "%d/%m/%y", "%d-%m-%y", "%Y%m%d"
    ]
    for date_format in date_formats:
        try:
            datetime.strptime(date_str, date_format)
            return True
        except ValueError:
            continue
    return False

def process_single_object(image_path):
    # Check if packed or unpacked
    _, packed_unpacked_category = predict_image_class(packed_unpacked_model, image_path, packed_unpacked_idx_to_category)
    
    if packed_unpacked_category == 'Packed':
        # Get product information using Qwen model
        product_name = get_product_info(image_path, "What is the name of the product? NOTE: JUST PROVIDE NAME AS THE ANSWER")
        expiry_date = get_product_info(image_path, "What is the expiry date of the product? If not visible, say 'Not visible'")
        description = get_product_info(image_path, "Provide a brief description of the product")
        category_info = get_product_info(image_path, "Classify the product into one of these categories: Staples, Snacks & Beverages, Packaged Food, Personal & Baby Care, Household Care, Dairy & Eggs, Home & Kitchen")
        
        return pd.DataFrame({
            'name': [product_name],
            'expiry_date': [expiry_date],
            'description': [description],
            'category': [category_info],
            'type': ['Packed'],
            'frequency': [1]
        })
    else:
        # Classify fruit/vegetable
        _, fruit_veg_category = predict_image_class(fruit_veg_model, image_path, fruit_veg_idx_to_category)
        
        # Determine if fresh or rotten
        _, fresh_rotten_category = predict_image_class(fresh_rotten_model, image_path, fresh_rotten_idx_to_category)
        
        return pd.DataFrame({
            'name': [fruit_veg_category],
            'condition': [fresh_rotten_category],
            'type': ['Unpacked'],
            'frequency': [1],
            'indepth_condition': ['To be determined'],  # Placeholder
            'weight': ['To be determined']  # Placeholder
        })

def process_multiple_objects(image_path, is_front):
    # Perform segmentation
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = sam_model(img)
    objects = process_sam_results(results, "sam")
    
    df_list = []
    for obj in objects:
        obj_image_path = obj['filename']
        obj_df = process_single_object(obj_image_path)
        obj_df['bbox'] = [obj['bbox']]
        obj_df['side'] = ['Front' if is_front else 'Back']
        df_list.append(obj_df)
    
    return pd.concat(df_list, ignore_index=True)

def main():
    image_type = input("Is this a single object image or multiple objects image? (single/multiple): ").lower()
    
    if image_type == 'single':
        front_image_path = input("Enter the path to the front image: ")
        back_image_path = input("Enter the path to the back image: ")
        
        front_df = process_single_object(front_image_path)
        front_df['side'] = 'Front'
        back_df = process_single_object(back_image_path)
        back_df['side'] = 'Back'
        
        combined_df = pd.concat([front_df, back_df], ignore_index=True)
        
    elif image_type == 'multiple':
        front_image_path = input("Enter the path to the front image: ")
        back_image_path = input("Enter the path to the back image: ")
        
        front_df = process_multiple_objects(front_image_path, is_front=True)
        back_df = process_multiple_objects(back_image_path, is_front=False)
        
        combined_df = pd.concat([front_df, back_df], ignore_index=True)
        
        # Instead of grouping by 'bbox', we'll create a unique identifier
        combined_df['object_id'] = range(len(combined_df))
        
        # Merge front and back information based on object overlap
        # This is a simplified approach and may need refinement
        merged_df = combined_df.groupby('object_id').first().reset_index()
        
    else:
        print("Invalid input. Please choose 'single' or 'multiple'.")
        return
    
    # Create separate DataFrames for packed and unpacked items
    packed_df = combined_df[combined_df['type'] == 'Packed']
    unpacked_df = combined_df[combined_df['type'] == 'Unpacked']
    
    # Save results
    combined_df.to_csv('combined_items.csv', index=False)
    packed_df.to_csv('packed_items.csv', index=False)
    unpacked_df.to_csv('unpacked_items.csv', index=False)
    
    print("Processing complete. Results saved to CSV files.")
    print("\nCombined items:")
    print(combined_df)
    print("\nPacked items:")
    print(packed_df)
    print("\nUnpacked items:")
    print(unpacked_df)

    
if __name__ == "__main__":
    main()