# 03_Separate_Labels_Files

Script to separate the labels files into subsets for each tower type.


In [2]:
!pip install geopandas

Collecting geopandas
  Downloading geopandas-0.12.2-py3-none-any.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 3.4 MB/s eta 0:00:01
Collecting pyproj>=2.6.1.post1
  Downloading pyproj-3.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[K     |████████████████████████████████| 7.8 MB 129.5 MB/s eta 0:00:01
[?25hCollecting fiona>=1.8
  Downloading Fiona-1.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.1 MB)
[K     |████████████████████████████████| 16.1 MB 63.2 MB/s eta 0:00:01
[?25hCollecting shapely>=1.7
  Downloading shapely-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 57.3 MB/s eta 0:00:01
[?25hCollecting pandas>=1.0.0
  Downloading pandas-1.5.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.2 MB)
[K     |████████████████████████████████| 12.2 MB 65.0 MB/s eta 0:00:01
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (

In [2]:
import os
import sys
import json
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np

In [3]:
base_path = "/workspace/data/"

os.chdir(base_path)

In [4]:
base_path

'/workspace/data/'

## change all category label for the gridtracer sample data to 'tower'

### for train

In [4]:
jf = open("labels_train.json")
labels_train = json.load(jf)

In [5]:
# set all annotations to one category
for annotation in labels_train["annotations"]:
    annotation["category_id"] = 0

In [6]:
# Remove all categories except one from the category dictionary
labels_train["categories"] = [cat for cat in labels_train["categories"] if cat["name"] == "DT"]

# Rename the remaining category to "tower"
labels_train["categories"][0]["name"] = "tower"

In [7]:
with open("labels_tower_train.json", "w") as f:
    json.dump(labels_train, f)

### for val

In [8]:
jf = open("labels_val.json")
labels_val = json.load(jf)

In [9]:
for annotation in labels_val["annotations"]:
    annotation["category_id"] = 0

In [10]:
# Remove all categories except one
labels_val["categories"] = [cat for cat in labels_val["categories"] if cat["name"] == "DT"]

# Rename the remaining category to "tower"
labels_val["categories"][0]["name"] = "tower"

In [11]:
with open("labels_tower_val.json", "w") as f:
    json.dump(labels_val, f)

### for test

In [5]:
jf = open("labels_test.json")
labels_test = json.load(jf)

In [6]:
for annotation in labels_test["annotations"]:
    annotation["category_id"] = 0

In [7]:
# Remove all categories except one
labels_test["categories"] = [cat for cat in labels_test["categories"] if cat["name"] == "DT"]

# Rename the remaining category to "tower"
labels_test["categories"][0]["name"] = "tower"

In [9]:
with open("labels_tower_test.json", "w") as f:
    json.dump(labels_test, f)

## Clean train labels file from Other Towers (this category only exists in the training data)

In [11]:
# Iterate over the annotations list, removing any dictionaries that have category_id equal to 1
labels_train['annotations'] = [ann for ann in labels_train['annotations'] if ann['category_id'] != 1]

# Save the modified dictionary back to a JSON file
with open('labels_train_new.json', 'w') as f:
    json.dump(labels_train, f)

In [12]:
for i in labels_train['categories']:
    print(i)

{'id': 0, 'name': 'DT', 'supercategory': None}
{'id': 1, 'name': 'OT', 'supercategory': None}
{'id': 2, 'name': 'TT', 'supercategory': None}


In [13]:
labels_train['categories'] = [cat for cat in labels_train['categories'] if cat['id'] != 1]

# Save the modified dictionary back to a JSON file
with open('labels_train_new.json', 'w') as f:
    json.dump(labels_train, f)

## create label json file for train, val, test for only transmission towers

### for test

In [14]:
jf = open("labels_train_new.json")
labels_train = json.load(jf)

In [15]:
for i in labels_train['categories']:
    print(i)

{'id': 0, 'name': 'DT', 'supercategory': None}
{'id': 2, 'name': 'TT', 'supercategory': None}


In [16]:
# Iterate over the annotations list, removing any dictionaries that have category_id equal to 1
labels_train['annotations'] = [ann for ann in labels_train['annotations'] if ann['category_id'] != 0]

labels_train['categories'] = [cat for cat in labels_train['categories'] if cat['id'] != 0]


In [17]:
for i in labels_train['categories']:
    print(i)

{'id': 2, 'name': 'TT', 'supercategory': None}


In [18]:
# Save dictionary back to a JSON file
with open('labels_train_TT.json', 'w') as f:
    json.dump(labels_train, f)

### for val

In [19]:
jf = open("labels_val.json")
labels_val = json.load(jf)

In [20]:
# Iterate over the annotations list, removing any dictionaries that have category_id equal to 1
labels_val['annotations'] = [ann for ann in labels_val['annotations'] if ann['category_id'] != 0]

labels_val['categories'] = [cat for cat in labels_val['categories'] if cat['id'] != 0]


In [21]:
for i in labels_val['categories']:
    print(i)

{'id': 1, 'name': 'TT', 'supercategory': None}


In [22]:
# Save dictionary back to a JSON file
with open('labels_val_TT.json', 'w') as f:
    json.dump(labels_val, f)

### for test

In [24]:
jf = open("labels_test.json")
labels_test = json.load(jf)

In [25]:
# Iterate over the annotations list, removing any dictionaries that have category_id equal to 1
labels_test['annotations'] = [ann for ann in labels_test['annotations'] if ann['category_id'] != 0]

labels_test['categories'] = [cat for cat in labels_test['categories'] if cat['id'] != 0]

In [26]:
for i in labels_test['categories']:
    print(i)

{'id': 1, 'name': 'TT', 'supercategory': None}


In [27]:
# Save dictionary back to a JSON file
with open('labels_test_TT.json', 'w') as f:
    json.dump(labels_test, f)

# lets do the same for only the distribution towers

### for train

In [30]:
jf = open("labels_train_new.json")
labels_train = json.load(jf)

In [31]:
# Iterate over the annotations list, removing any dictionaries that have category_id equal to 1
labels_train['annotations'] = [ann for ann in labels_train['annotations'] if ann['category_id'] != 2]

labels_train['categories'] = [cat for cat in labels_train['categories'] if cat['id'] != 2]

In [32]:
for i in labels_train['categories']:
    print(i)

{'id': 0, 'name': 'DT', 'supercategory': None}


In [33]:
# Save the modified dictionary back to a JSON file
with open('labels_train_DT.json', 'w') as f:
    json.dump(labels_train, f)

### for val

In [3]:
jf = open("labels_val.json")
labels_val = json.load(jf)

In [4]:
# Iterate over the annotations list, removing any dictionaries that have category_id equal to 1 ## DT = 0, TT = 1
labels_val['annotations'] = [ann for ann in labels_val['annotations'] if ann['category_id'] != 1]

labels_val['categories'] = [cat for cat in labels_val['categories'] if cat['id'] != 1]


In [5]:
# Save the modified dictionary back to a JSON file
with open('labels_val_DT.json', 'w') as f:
    json.dump(labels_val, f)

### for test

In [3]:
jf = open("labels_test_DT.json")
labels_test = json.load(jf)

In [4]:
# Iterate over the annotations list, removing any dictionaries that have category_id equal to 1
labels_test['annotations'] = [ann for ann in labels_test['annotations'] if ann['category_id'] != 1]

labels_test['categories'] = [cat for cat in labels_test['categories'] if cat['id'] != 1]

In [5]:
# Save the modified dictionary back to a JSON file
with open('labels_test_DT.json', 'w') as f:
    json.dump(labels_test, f)

## Remap category IDs for clean labels file new

In [5]:
def remap_category(json_file_path, old_id, new_id):
    # Load the JSON file
    with open(json_file_path, 'r') as f:
        data = json.load(f)
        
    for cat in data['categories']:
        if cat['id'] == old_id:
            cat['id'] = new_id

    # Loop through the annotations and update the category ID
    for annotation in data['annotations']:
        if annotation['category_id'] == old_id:
            annotation['category_id'] = new_id

    # Write the updated data to the same file
    with open(json_file_path, 'w') as f:
        json.dump(data, f)

remap_category('labels_train_new.json', 2, 1)