In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import torch
import os
import yaml
import json

In [2]:
import os
import json
import shutil
from collections import defaultdict

coco_path = '/kaggle/input/coco-2017-dataset/coco2017/train2017'
train_json = '/kaggle/input/semis-od-coco-10/instances_train2017_labeled.json'
val_json = '/kaggle/input/semis-od-coco-10/instances_val2017.json'
unlabeled_json = '/kaggle/input/semis-od-coco-10/instances_train2017_unlabeled.json'
output_path = '/kaggle/working/datasets/coco'

os.makedirs(os.path.join(output_path, 'images', 'train'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'images', 'val'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'labels', 'train'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'labels', 'val'), exist_ok=True)
os.makedirs('/kaggle/working/data/coco', exist_ok=True)


with open(train_json) as f:
    train_annotations = json.load(f)

with open(val_json) as f:
    val_annotations = json.load(f)

with open(unlabeled_json) as f:
    unlabeled_annotations = json.load(f)

categories = {category['id']: category['name'] for category in train_annotations['categories']}

# Convert COCO bbox to YOLO format
def convert_bbox(size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[2] / 2.0) * dw
    y = (box[1] + box[3] / 2.0) * dh
    w = box[2] * dw
    h = box[3] * dh
    return (x, y, w, h)

# Process labeled annotations
def process_annotations(annotations, split):
    images = {image['id']: image for image in annotations['images']}
    seen_labels = defaultdict(set)  # Track seen labels for each image
    for ann in annotations['annotations']:
        image = images[ann['image_id']]
        bbox = convert_bbox((image['width'], image['height']), ann['bbox'])
        label = ann['category_id'] - 2   # YOLO class ids start at 0
        if (label < 0 or label > 7):
            print(label)
        bbox_tuple = (label, *bbox)  # Create a tuple of label and bbox for comparison

        if bbox_tuple not in seen_labels[image['id']]:  # Check for duplicates
            seen_labels[image['id']].add(bbox_tuple)
            # Write label file
            label_path = os.path.join(output_path, 'labels', split, f"{os.path.splitext(image['file_name'])[0]}.txt")
            with open(label_path, 'a') as f:
                f.write(f"{label} " + " ".join(map(str, bbox)) + '\n')

        # Copy image to split folder if not already copied
        target_image_path = os.path.join(output_path, 'images', split, image['file_name'])
        if not os.path.exists(target_image_path):
            shutil.copy(os.path.join(coco_path, image['file_name']), target_image_path)

# Process unlabeled annotations
def process_unlabeled_annotations(annotations, split):
    images = {image['id']: image for image in annotations['images']}
    for image_id, image in images.items():
        # Write image file names to a .txt file for unlabeled data
        label_path = os.path.join(output_path, 'labels', split, f"{os.path.splitext(image['file_name'])[0]}.txt")
        with open(label_path, 'w') as f:
            f.write('')

        # Copy image to split folder if not already copied
        target_image_path = os.path.join(output_path, 'images', split, image['file_name'])
        if not os.path.exists(target_image_path):
            shutil.copy(os.path.join(coco_path, image['file_name']), target_image_path)

# Process train and val annotations
process_annotations(train_annotations, 'train')
process_annotations(val_annotations, 'val')

# Process unlabeled annotations
os.makedirs(os.path.join(output_path, 'images', 'unlabeled'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'labels', 'unlabeled'), exist_ok=True)
process_unlabeled_annotations(unlabeled_annotations, 'unlabeled')

# Create .yaml file
yaml_content = f"""
train: {os.path.join(output_path, 'images', 'train')}
val: {os.path.join(output_path, 'images', 'val')}
test: {os.path.join(output_path, 'images', 'val')}
target: {os.path.join(output_path, 'images', 'unlabeled')}

nc: {len(categories)}
names: {list(categories.values())}
"""

with open(os.path.join(output_path, 'dataset.yaml'), 'w') as f:
    f.write(yaml_content)
    
# Create .txt files for training and target datasets
train_txt_path = '/kaggle/working/data/coco/train2017_p10.0_labeled_data.txt'
unlabeled_txt_path = '/kaggle/working/data/coco/train2017_p10.0_unlabeled_data.txt'
val_txt_path = '/kaggle/working/data/coco/val2017.txt'

with open(train_txt_path, 'w') as f:
    for file_name in os.listdir(os.path.join(output_path, 'images', 'train')):
        f.write(f"datasets/coco/images/train/{file_name}\n")

with open(unlabeled_txt_path, 'w') as f:
    for file_name in os.listdir(os.path.join(output_path, 'images', 'unlabeled')):
        f.write(f"datasets/coco/images/unlabeled/{file_name}\n")
        
with open(val_txt_path, 'w') as f:
    for file_name in os.listdir(os.path.join(output_path, 'images', 'val')):
        f.write(f"datasets/coco/images/val/{file_name}\n")


In [3]:
def read_yaml(file_path):
    with open(file_path, 'r') as file:
        data = yaml.safe_load(file)
    return data

# Example usage
file_path = '/kaggle/working/datasets/coco/dataset.yaml'  # Replace with your file path
yaml_data = read_yaml(file_path)
print(yaml_data)

{'train': '/kaggle/working/datasets/coco/images/train', 'val': '/kaggle/working/datasets/coco/images/val', 'test': '/kaggle/working/datasets/coco/images/val', 'target': '/kaggle/working/datasets/coco/images/unlabeled', 'nc': 8, 'names': ['bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat']}


In [4]:
# !rm -rf /kaggle/working/efficientteacher
# %cd /kaggle/working/
# !ls /kaggle/working

In [5]:
!git clone https://github.com/Min-KiD/efficientteacher.git

Cloning into 'efficientteacher'...
remote: Enumerating objects: 482, done.[K
remote: Counting objects: 100% (154/154), done.[K
remote: Compressing objects: 100% (113/113), done.[K
remote: Total 482 (delta 117), reused 41 (delta 41), pack-reused 328[K
Receiving objects: 100% (482/482), 1.97 MiB | 10.73 MiB/s, done.
Resolving deltas: 100% (255/255), done.


In [6]:
%cd efficientteacher

/kaggle/working/efficientteacher


In [7]:
!pip install -r requirements.txt

Collecting onnx-simplifier>=0.3.6 (from -r requirements.txt (line 27))
  Downloading onnx_simplifier-0.4.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting thop (from -r requirements.txt (line 37))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Downloading onnx_simplifier-0.4.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop, onnx-simplifier
Successfully installed onnx-simplifier-0.4.36 thop-0.1.1.post2209072238


In [8]:
%cd /kaggle/working/

/kaggle/working


In [9]:
!torchrun \
    --nproc_per_node 2 \
    --master_port 9527 \
    /kaggle/working/efficientteacher/train.py \
    --cfg /kaggle/working/efficientteacher/configs/ssod/coco-standard/yolov5l_coco_ssod_10_percent.yaml 

Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
2024-06-12 05:10:01.850969: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-12 05:10:01.850969: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-12 05:10:01.851048: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-12 05:10:01.851096: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-12 05:10:01.947020: E external/local_xla/xla/strea