In [1]:
import os
import torch

# Setting up the environment to use only GPU 2
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Check number of GPUs
print(f"Number of GPUs: {torch.cuda.device_count()}")

# Check GPU name
print(f"GPU name: {torch.cuda.get_device_name()}")

# Check PyTorch version
print(f"PyTorch version: {torch.__version__}")

Using device: cuda
Number of GPUs: 1
GPU name: NVIDIA A100-SXM4-40GB
PyTorch version: 2.5.1+cu121


In [2]:
!nvidia-smi

Tue Dec  3 14:40:44 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.154.05             Driver Version: 535.154.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          On  | 00000000:07:00.0 Off |                    0 |
| N/A   36C    P0             230W / 400W |  11590MiB / 40960MiB |     98%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM4-40GB          On  | 00000000:0F:00.0 Off |  

Adjust bounding boxes and segmentations

In [1]:
import json

In [2]:
def load_coco_annotations(coco_json_path):
    with open(coco_json_path, 'r') as f:
        coco_data = json.load(f)
    return coco_data

def adjust_segmentations(annotations, image_width, image_height):
    for annotation in annotations:
        if 'segmentation' in annotation:
            segmentation = annotation['segmentation']
            for segment in segmentation:
                for i in range(0, len(segment), 2):
                    segment[i] = max(0, min(segment[i], image_width))
                    segment[i + 1] = max(0, min(segment[i + 1], image_height))
    return annotations

def adjust_bounding_boxes(annotations, image_width, image_height):
    for annotation in annotations:
        bbox = annotation['bbox']
        x, y, w, h = bbox
        
        # Ensure x and y are within the image boundaries
        x = max(0, x)
        y = max(0, y)
        
        # Adjust width and height to ensure the bounding box is within the image boundaries
        if x + w > image_width:
            w = image_width - x
        if y + h > image_height:
            h = image_height - y
        
        # Ensure bounding box does not extend beyond segmentation boundaries
        if 'segmentation' in annotation:
            segmentation = annotation['segmentation']
            all_x = [segment[i] for segment in segmentation for i in range(0, len(segment), 2)]
            all_y = [segment[i + 1] for segment in segmentation for i in range(0, len(segment), 2)]
            min_x, max_x = min(all_x), max(all_x)
            min_y, max_y = min(all_y), max(all_y)
            
            x = max(x, min_x)
            y = max(y, min_y)
            w = min(w, max_x - x)
            h = min(h, max_y - y)
        
        annotation['bbox'] = [x, y, w, h]
    return annotations

def ensure_annotations_within_boundaries(coco_data):
    images = {image['id']: (image['width'], image['height']) for image in coco_data['images']}
    for annotation in coco_data['annotations']:
        image_id = annotation['image_id']
        image_width, image_height = images[image_id]
        annotation = adjust_segmentations([annotation], image_width, image_height)[0]
        annotation = adjust_bounding_boxes([annotation], image_width, image_height)[0]
    return coco_data

def save_coco_annotations(coco_data, output_path):
    with open(output_path, 'w') as f:
        json.dump(coco_data, f)

In [3]:
# Load COCO annotations
coco_json_path = "/data/students/mary/mlops-exercises/ME6/dataset__v3/adj_instances_train.json"
coco_data = load_coco_annotations(coco_json_path)

# Ensure annotations are within image boundaries
coco_data = ensure_annotations_within_boundaries(coco_data)

# Save the adjusted COCO annotations
output_coco_json_path = "/data/students/mary/mlops-exercises/ME6/dataset__v3/adj01_instances_train.json"
save_coco_annotations(coco_data, output_coco_json_path)

In [4]:
# Load COCO annotations
coco_json_path = "/data/students/mary/mlops-exercises/ME6/dataset__v3/adj_instances_val.json"
coco_data = load_coco_annotations(coco_json_path)

# Ensure annotations are within image boundaries
coco_data = ensure_annotations_within_boundaries(coco_data)

# Save the adjusted COCO annotations
output_coco_json_path = "/data/students/mary/mlops-exercises/ME6/dataset__v3/adj01_instances_val.json"
save_coco_annotations(coco_data, output_coco_json_path)

Match V1 and V3 Category Names

In [8]:
file_v1 = "/data/students/mary/mlops-exercises/ME6/dataset__v3/v1_instances_train.json"
file_v3 = "/data/students/mary/mlops-exercises/ME6/dataset__v3/adj01_instances_train.json"

# Load the second JSON file
with open(file_v1, 'r') as f1:
    v1_data = json.load(f1)

# Load the first JSON file
with open(file_v3, 'r') as f3:
    v3_data = json.load(f3)

In [9]:
v1_categories = v1_data['categories']

# Sort categories by id
sorted_categories = sorted(v1_categories, key=lambda x: x['id'])

# Print sorted categories
print("V1 Data")
for category in sorted_categories:
    print(f"ID: {category['id']}, Category: {category['name']}")

v3_categories = v3_data['categories']

# Sort categories by id
sorted_categories = sorted(v3_categories, key=lambda x: x['id'])

# Print sorted categories
print("\nV3 Data")
for category in sorted_categories:
    print(f"ID: {category['id']}, Category: {category['name']}")

V1 Data
ID: 1, Category: Coke Zero Bottled
ID: 2, Category: Eden Cheese
ID: 3, Category: KitKat
ID: 4, Category: Nescafe 3-in-1 Twin Pack
ID: 5, Category: Alaska Classic 377g Can
ID: 6, Category: Simply Pure Canola Oil
ID: 7, Category: Purefoods Corned Beef
ID: 8, Category: Whole Bulb of Garlic
ID: 9, Category: Lucky Me Pansit Canton
ID: 10, Category: UFC Banana Ketchup
ID: 11, Category: Whole Lemon
ID: 12, Category: Nestle All Purpose Cream 250ml
ID: 13, Category: Lady's Choice Real Mayonnaise 220 ml jar
ID: 14, Category: Skippy Peanut Butter
ID: 15, Category: Royal Pasta
ID: 16, Category: Del Monte Pineapple Juice
ID: 17, Category: Rebisco Crackers
ID: 18, Category: 555 Sardines
ID: 19, Category: Sunsilk Shampoo
ID: 20, Category: Dove Lavender Soap
ID: 21, Category: Silver Swan Soy Sauce - 385 mL
ID: 22, Category: Colgate (Advanced White) Value Pack (2 Tubes)
ID: 23, Category: Century Tuna
ID: 24, Category: GreenCross Alcohol

V3 Data
ID: 1, Category: Coke Zero Bottled
ID: 2, Categor

In [8]:
# Update Category Names
for v1_category in v1_data['categories']:
    for v3_category in v3_data['categories']:
        if v1_category['id'] == v3_category['id']:
            v1_category['name'] = v3_category['name']
            break

In [None]:
# Dump the updated data to a file
filename = f'/data/students/mary/mlops-exercises/ME6/dataset/json_files/v1_instances_train.json'
with open(filename, 'w') as file:
    json.dump(v1_data, file)

In [6]:
file_v1 = "/data/students/mary/mlops-exercises/ME6/dataset__v3/v1_instances_val.json"
file_v3 = "/data/students/mary/mlops-exercises/ME6/dataset__v3/adj01_instances_val.json"

# Load the second JSON file
with open(file_v1, 'r') as f1:
    v1_data = json.load(f1)

# Load the first JSON file
with open(file_v3, 'r') as f3:
    v3_data = json.load(f3)

In [7]:
v1_categories = v1_data['categories']

# Sort categories by id
sorted_categories = sorted(v1_categories, key=lambda x: x['id'])

# Print sorted categories
print("V1 Data")
for category in sorted_categories:
    print(f"ID: {category['id']}, Category: {category['name']}")

v3_categories = v3_data['categories']

# Sort categories by id
sorted_categories = sorted(v3_categories, key=lambda x: x['id'])

# Print sorted categories
print("\nV3 Data")
for category in sorted_categories:
    print(f"ID: {category['id']}, Category: {category['name']}")

V1 Data
ID: 1, Category: Coke Zero Bottled
ID: 2, Category: Eden Cheese
ID: 3, Category: KitKat
ID: 4, Category: Nescafe 3-in-1 Twin Pack
ID: 5, Category: Alaska Classic 377g Can
ID: 6, Category: Simply Pure Canola Oil
ID: 7, Category: Purefoods Corned Beef
ID: 8, Category: Whole Bulb of Garlic
ID: 9, Category: Lucky Me Pansit Canton
ID: 10, Category: UFC Banana Ketchup
ID: 11, Category: Whole Lemon
ID: 12, Category: Nestle All Purpose Cream 250ml
ID: 13, Category: Lady's Choice Real Mayonnaise 220 ml jar
ID: 14, Category: Skippy Peanut Butter
ID: 15, Category: Royal Pasta
ID: 16, Category: Del Monte Pineapple Juice
ID: 17, Category: Rebisco Crackers
ID: 18, Category: 555 Sardines
ID: 19, Category: Sunsilk Shampoo
ID: 20, Category: Dove Lavender Soap
ID: 21, Category: Silver Swan Soy Sauce - 385 mL
ID: 22, Category: Colgate (Advanced White) Value Pack (2 Tubes)
ID: 23, Category: Century Tuna
ID: 24, Category: GreenCross Alcohol

V3 Data
ID: 1, Category: Coke Zero Bottled
ID: 2, Categor

In [12]:
# Update Category Names
for v1_category in v1_data['categories']:
    for v3_category in v3_data['categories']:
        if v1_category['id'] == v3_category['id']:
            v1_category['name'] = v3_category['name']
            break

In [13]:
# Dump the updated data to a file
filename = f'/data/students/mary/mlops-exercises/ME6/dataset/json_files/v1_instances_val.json'
with open(filename, 'w') as file:
    json.dump(v1_data, file)

Merge JSON Files

In [8]:
file_v1 = "/data/students/mary/mlops-exercises/ME6/dataset__v3/v1_instances_train.json"
file_v3 = "/data/students/mary/mlops-exercises/ME6/dataset__v3/adj01_instances_train.json"

# Load the second JSON file
with open(file_v1, 'r') as f1:
    v1_data = json.load(f1)

# Load the first JSON file
with open(file_v3, 'r') as f3:
    v3_data = json.load(f3)

In [9]:
# Merge the 'images' lists
merged_data = {
    'images': v1_data['images'] + v3_data['images'],
    'categories': v3_data['categories'],
    'annotations': v1_data['annotations'] + v3_data['annotations'],
}

In [10]:
filename = f'/data/students/mary/mlops-exercises/ME6/dataset__v3/merged_instances_train.json'

# Save the merged result to a new JSON file
with open(filename, 'w') as f:
    json.dump(merged_data, f, indent=4)

print("JSON files merged successfully.")

JSON files merged successfully.


In [11]:
file_v1 = "/data/students/mary/mlops-exercises/ME6/dataset__v3/v1_instances_val.json"
file_v3 = "/data/students/mary/mlops-exercises/ME6/dataset__v3/adj01_instances_val.json"

# Load the second JSON file
with open(file_v1, 'r') as f1:
    v1_data = json.load(f1)

# Load the first JSON file
with open(file_v3, 'r') as f3:
    v3_data = json.load(f3)

In [12]:
# Merge the 'images' lists
merged_data = {
    'images': v1_data['images'] + v3_data['images'],
    'categories': v3_data['categories'],
    'annotations': v1_data['annotations'] + v3_data['annotations'],
}

In [13]:
filename = f'/data/students/mary/mlops-exercises/ME6/dataset__v3/merged_instances_val.json'

# Save the merged result to a new JSON file
with open(filename, 'w') as f:
    json.dump(merged_data, f, indent=4)

print("JSON files merged successfully.")

JSON files merged successfully.
