<a href="https://www.kaggle.com/code/ismetsemedov/damaged-parcels?scriptVersionId=216581050" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Install required packages
!pip install ultralytics roboflow scikit-learn opencv-python torch

In [None]:
from ultralytics import YOLO
import os
import torch
from sklearn.model_selection import train_test_split
import shutil

def create_yolo_dataset():
    base_dir = '/kaggle/working'
    os.makedirs(f'{base_dir}/dataset/images/train', exist_ok=True)
    os.makedirs(f'{base_dir}/dataset/images/val', exist_ok=True)
    os.makedirs(f'{base_dir}/dataset/labels/train', exist_ok=True)
    os.makedirs(f'{base_dir}/dataset/labels/val', exist_ok=True)

def prepare_data():
    damaged_dir = '/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/damaged'
    intact_dir = '/kaggle/input/damaged-and-intact-packages/damaged-and-intact-packages/intact'
    
    damaged_files = [f for f in os.listdir(damaged_dir) if f.endswith(('.jpg', '.jpeg'))]
    intact_files = [f for f in os.listdir(intact_dir) if f.endswith(('.jpg', '.jpeg'))]
    
    damaged_train, damaged_val = train_test_split(damaged_files, test_size=0.2, random_state=42)
    intact_train, intact_val = train_test_split(intact_files, test_size=0.2, random_state=42)
    
    base_dir = '/kaggle/working'
    
    for img_file in damaged_train:
        shutil.copy(os.path.join(damaged_dir, img_file), 
                   os.path.join(f'{base_dir}/dataset/images/train', img_file))
        create_label(img_file, 'train', 0)
    
    for img_file in intact_train:
        shutil.copy(os.path.join(intact_dir, img_file),
                   os.path.join(f'{base_dir}/dataset/images/train', img_file))
        create_label(img_file, 'train', 1)
    
    for img_file in damaged_val:
        shutil.copy(os.path.join(damaged_dir, img_file),
                   os.path.join(f'{base_dir}/dataset/images/val', img_file))
        create_label(img_file, 'val', 0)
    
    for img_file in intact_val:
        shutil.copy(os.path.join(intact_dir, img_file),
                   os.path.join(f'{base_dir}/dataset/images/val', img_file))
        create_label(img_file, 'val', 1)

def create_label(img_file, split, class_id):
    base_dir = '/kaggle/working'
    label_path = f'{base_dir}/dataset/labels/{split}/{os.path.splitext(img_file)[0]}.txt'
    with open(label_path, 'w') as f:
        f.write(f'{class_id} 0.5 0.5 1.0 1.0\n')

def create_yaml():
    yaml_content = """
train: /kaggle/working/dataset/images/train
val: /kaggle/working/dataset/images/val

nc: 2
names: ['damaged', 'intact']
    """
    
    with open('/kaggle/working/dataset.yaml', 'w') as f:
        f.write(yaml_content)

def train_model():
    model = YOLO('yolov8n.pt')
    results = model.train(
        data='/kaggle/working/dataset.yaml',
        epochs=50,
        imgsz=640,
        batch=16,
        workers=4,
        device='cuda' if torch.cuda.is_available() else 'cpu'
    )
    return model

if __name__ == "__main__":
    create_yolo_dataset()
    prepare_data()
    create_yaml()
    model = train_model()