In [10]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/damaged/packagingboxeswithdamagesanddents56.jpeg
/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/damaged/packagingboxeswithdamagesanddents30.jpeg
/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/damaged/damagedfoodpackagingbox59.jpeg
/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/damaged/packagingboxesthataredamaged234.jpeg
/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/damaged/packagingboxesthataredamaged245.jpeg
/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/damaged/damagedfoodpackagingbox244.jpeg
/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/damaged/packagingboxesthataredamaged18.jpeg
/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/damaged/packagingboxesthataredamaged109.jpeg
/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/damaged/packag

In [11]:
# Install required packages
!pip install ultralytics roboflow scikit-learn opencv-python




In [15]:
import os
import shutil
from sklearn.model_selection import train_test_split
import cv2
import numpy as np
from pathlib import Path

def prepare_dataset():
    # Get file paths
    damaged_files = [f for f in paths if 'damaged' in f.lower()]
    intact_files = [f for f in paths if 'intact' in f.lower()]
    
    # Create directory structure
    for split in ['train', 'val', 'test']:
        for cls in ['damaged', 'intact']:
            os.makedirs(f'dataset/{split}/{cls}', exist_ok=True)
    
    # Split data
    for files, cls in [(damaged_files, 'damaged'), (intact_files, 'intact')]:
        # First split into train and remaining
        train_files, temp = train_test_split(files, train_size=0.7, random_state=42)
        # Split remaining into val and test
        val_files, test_files = train_test_split(temp, test_size=0.5, random_state=42)
        
        # Copy files to respective directories
        for split, file_list in [
            ('train', train_files),
            ('val', val_files),
            ('test', test_files)
        ]:
            for file in file_list:
                dest = f'dataset/{split}/{cls}/{os.path.basename(file)}'
                shutil.copy(file, dest)

def create_yaml():
    """Create YAML file for YOLOv8 training"""
    yaml_content = {
        'path': 'dataset',
        'train': 'train',
        'val': 'val',
        'test': 'test',
        'names': {
            0: 'damaged',
            1: 'intact'
        },
        'nc': 2
    }
    
    with open('data.yaml', 'w') as f:
        yaml.dump(yaml_content, f)

In [16]:
from ultralytics import YOLO

def train_model():
    # Load the model
    model = YOLO('yolov8n-cls.pt')
    
    # Train the model
    results = model.train(
        data='data.yaml',
        epochs=50,
        imgsz=640,
        batch=32,
        name='package_damage_detector',
        pretrained=True,
        optimizer='Adam',  # Using Adam optimizer
        lr0=0.001,        # Initial learning rate
        augment=True,     # Use data augmentation
        dropout=0.2,      # Add dropout for regularization
        mixup=0.1,        # Apply mixup augmentation
        fliplr=0.5,       # Random horizontal flips
        scale=0.5,        # Random scale augmentation
        translate=0.2,    # Random translations
    )
    
    return model

In [17]:
def predict_damage(model_path, image_path, conf_threshold=0.5):
    """
    Predict if a package is damaged
    """
    model = YOLO(model_path)
    results = model(image_path)[0]
    
    # Get prediction
    class_id = results.probs.top1
    confidence = results.probs.top1conf.item()
    class_name = results.names[class_id]
    
    return {
        'class': class_name,
        'confidence': f"{confidence:.2%}",
        'is_damaged': class_name == 'damaged' and confidence >= conf_threshold
    }

In [19]:
def evaluate_model(model, test_dir):
    """
    Evaluate model performance on test set
    """
    import glob
    from sklearn.metrics import classification_report, confusion_matrix
    
    test_images = []
    true_labels = []
    predictions = []
    
    # Get all test images
    for cls in ['damaged', 'intact']:
        images = glob.glob(f"{test_dir}/{cls}/*.jpg")
        test_images.extend(images)
        true_labels.extend([cls] * len(images))
    
    # Make predictions
    for img_path in test_images:
        pred = predict_damage(model, img_path)
        predictions.append(pred['class'])
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(true_labels, predictions))
    
    # Print confusion matrix
    print("\nConfusion Matrix:")
    print(confusion_matrix(true_labels, predictions))

# Run evaluation
evaluate_model(model, 'processed_dataset/test')

NameError: name 'model' is not defined