In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import torch
import os
import yaml
import json

In [2]:
def combine_coco_jsons(json_path1, json_path2, output_path):
    # Load the first JSON file
    with open(json_path1, 'r') as f:
        data1 = json.load(f)

    # Load the second JSON file
    with open(json_path2, 'r') as f:
        data2 = json.load(f)
    
    # Initialize the combined data structure
    combined_data = {
        "images": [],
        "annotations": [],
        "categories": data1["categories"]
    }

    # Combine images
    image_id_map = {}
    new_image_id = 1
    for image in data1["images"]:
        image_id_map[image["id"]] = new_image_id
        image["id"] = new_image_id
        combined_data["images"].append(image)
        new_image_id += 1
    
    for image in data2["images"]:
        image_id_map[image["id"]] = new_image_id
        image["id"] = new_image_id
        combined_data["images"].append(image)
        new_image_id += 1

    # Combine annotations
    new_annotation_id = 1
    for annotation in data1["annotations"]:
        annotation["id"] = new_annotation_id
        annotation["image_id"] = image_id_map[annotation["image_id"]]
        combined_data["annotations"].append(annotation)
        new_annotation_id += 1
    
    for annotation in data2["annotations"]:
        annotation["id"] = new_annotation_id
        annotation["image_id"] = image_id_map[annotation["image_id"]]
        combined_data["annotations"].append(annotation)
        new_annotation_id += 1

    # Save the combined JSON
    with open(output_path, 'w') as f:
        json.dump(combined_data, f, indent=4)

In [3]:
import shutil

# Define source paths
source_data_path = '/kaggle/input/coco-2017-dataset/coco2017/train2017'
source_train_label_annotations = '/kaggle/input/semis-od-coco-10/instances_train2017_labeled.json'
source_train_unlabel_annotations = '/kaggle/input/semis-od-coco-10/instances_train2017_unlabeled.json'
source_val_annotations = '/kaggle/input/semis-od-coco-10/instances_val2017.json'

source_train_annotations = '/kaggle/working/instances_train2017.json'

combine_coco_jsons(source_train_label_annotations, source_train_unlabel_annotations, source_train_annotations)

# Define destination paths
destination_path = 'datasets/coco'
annotations_path = os.path.join(destination_path, 'annotations')
train_images_path = os.path.join(destination_path, 'train2017')
val_images_path = os.path.join(destination_path, 'val2017')

# Create the directory structure
os.makedirs(annotations_path, exist_ok=True)
os.makedirs(train_images_path, exist_ok=True)
os.makedirs(val_images_path, exist_ok=True)
# Copy annotation files
shutil.copy(source_train_annotations, os.path.join(annotations_path, 'instances_train2017.json'))
shutil.copy(source_val_annotations, os.path.join(annotations_path, 'instances_val2017.json'))

# Function to get image filenames from annotations
def get_image_filenames(annotation_file):
    with open(annotation_file, 'r') as f:
        data = json.load(f)
    image_filenames = {image['file_name']: image['id'] for image in data['images']}
    return image_filenames

# Get image filenames for train and val sets
train_label_images = get_image_filenames(source_train_label_annotations)
train_unlabel_images = get_image_filenames(source_train_unlabel_annotations)
val_images = get_image_filenames(source_val_annotations)

# Copy images to respective directories based on annotation files
for image_filename in os.listdir(source_data_path):
    if image_filename in train_label_images:
        shutil.copy(os.path.join(source_data_path, image_filename), os.path.join(train_images_path, image_filename))
        
    elif image_filename in train_unlabel_images:
        shutil.copy(os.path.join(source_data_path, image_filename), os.path.join(train_images_path, image_filename))
        
    elif image_filename in val_images:
        shutil.copy(os.path.join(source_data_path, image_filename), os.path.join(val_images_path, image_filename))

print("Dataset organized successfully.")

Dataset organized successfully.


In [4]:
print(len(os.listdir(val_images_path)))

1232


In [5]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-71c76fzy
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-71c76fzy
  Resolved https://github.com/facebookresearch/detectron2.git to commit e8806d607403cf0f2634d4c5ac464109fdc7d4af
  Preparing metadata (setup.py) ... [?25ldone
Collecting pycocotools>=2.0.2 (from detectron2==0.6)
  Downloading pycocotools-2.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting yacs>=0.1.8 (from detectron2==0.6)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.p

In [6]:
!git clone https://github.com/Min-KiD/unbiased-teacher-v2.git

Cloning into 'unbiased-teacher-v2'...
remote: Enumerating objects: 139, done.[K
remote: Counting objects: 100% (78/78), done.[K
remote: Compressing objects: 100% (65/65), done.[K
remote: Total 139 (delta 38), reused 13 (delta 13), pack-reused 61[K
Receiving objects: 100% (139/139), 2.07 MiB | 24.62 MiB/s, done.
Resolving deltas: 100% (40/40), done.


In [7]:
# %cd unbiased-teacher-v2/

In [8]:
# from detectron2.data import DatasetCatalog, MetadataCatalog

# # Define the dataset names
# dataset_names = ["coco_train", "coco_val"]

# # Remove existing dataset registrations if they exist
# for name in dataset_names:
#     if name in DatasetCatalog:
#         del DatasetCatalog[name]
#     if name in MetadataCatalog:
#         del MetadataCatalog[name]

In [9]:
# from detectron2.data.datasets import register_coco_instances
# train_ann_file = "/kaggle/working/datasets/coco/annotations/instances_train2017.json"
# val_ann_file = "/kaggle/working/datasets/coco/annotations/instances_val2017.json"
# train_path = "/kaggle/working/datasets/coco/train2017"
# val_path = "/kaggle/working/datasets/coco/val2017"

# register_coco_instances("coco_train", {}, train_ann_file, train_path)
# register_coco_instances("coco_val", {}, val_ann_file, val_path)

In [10]:
# !rm -rf /kaggle/working/unbiased-teacher-v2
# %cd /kaggle/working/
# !ls /kaggle/working

In [11]:
!python /kaggle/working/unbiased-teacher-v2/train_net.py \
      --num-gpus 2 \
      --resume \
      --config /kaggle/working/unbiased-teacher-v2/configs/Faster-RCNN/coco-standard/faster_rcnn_R_50_FPN_ut2_sup10_run0.yaml \
        SOLVER.IMG_PER_BATCH_LABEL 8 SOLVER.IMG_PER_BATCH_UNLABEL 8 \
        SOLVER.STEPS "(2000,)" SOLVER.MAX_ITER 2000 MODEL.WEIGHTS /kaggle/input/unbiasedweight2000iters/model_final700.pth \
        DATASETS.TRAIN "('coco_train',)" DATASETS.TEST "('coco_val',)" 
#epochs = 6

Command Line Args: Namespace(config_file='/kaggle/working/unbiased-teacher-v2/configs/Faster-RCNN/coco-standard/faster_rcnn_R_50_FPN_ut2_sup10_run0.yaml', resume=True, eval_only=False, num_gpus=2, num_machines=1, machine_rank=0, dist_url='tcp://127.0.0.1:49152', opts=['SOLVER.IMG_PER_BATCH_LABEL', '8', 'SOLVER.IMG_PER_BATCH_UNLABEL', '8', 'SOLVER.STEPS', '(2000,)', 'SOLVER.MAX_ITER', '2000', 'MODEL.WEIGHTS', '/kaggle/input/unbiasedweight2000iters/model_final700.pth', 'DATASETS.TRAIN', "('coco_train',)", 'DATASETS.TEST', "('coco_val',)"])
[32m[06/12 03:30:09 detectron2]: [0mRank of current process: 0. World size: 2
[32m[06/12 03:30:10 detectron2]: [0mEnvironment info:
-------------------------------  ------------------------------------------------------------------------------
sys.platform                     linux
Python                           3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0]
numpy                            1.26.4
detectron2