In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls "/content/drive/MyDrive/tmas/batch_1/train_batch1.json"

README.dataset.txt   test	       TMAS.v1i.coco.zip  train_batch1	     valid
README.roboflow.txt  test_batch1.json  train		  train_batch1.json  valid_batch1.json


In [1]:
import re
import os
import pandas as pd
from pathlib import Path
import json
import numpy as np
import shutil

In [2]:
data_dir = Path("/content/drive/My Drive/tmas")
plate_images_folder =  data_dir / "CRYPTIC" / "plate-images-20240423"

In [6]:
def extract_image_name(file_name):
    # Use a regular expression to capture the part of the file name before '-UKMYC[5-6]-filtered.png'
    match = re.search(r"^(.+)-UKMYC[56]-filtered\.png$", file_name)
    if match:
        return match.group(1) + '.jpg'
    return None

def rename_image(file_name):
    match = re.search(r"^(.+)-UKMYC[56]-filtered\.png$", file_name)
    if match:
        return match.group(1) + '.png'
    return None

def read_json_and_find_images(json_path, folder_path, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Read the JSON file to get the list of image names
    with open(json_path, 'r') as file:
        image_names = json.load(file)
        print("Number of image names in JSON:", len(image_names))  # assuming the JSON file contains a list of image names

    # Set to store found images to avoid duplication
    found_images = set()

    # Loop through the main folders (e.g., "UKMYC5", "UKMYC6")
    for main_folder in os.listdir(folder_path):
        main_folder_path = os.path.join(folder_path, main_folder)
        print("Check", main_folder_path)
        if os.path.isdir(main_folder_path):  # check if it is a directory
            # Loop through each subfolder (e.g., "14", "21")
            for subfolder in os.listdir(main_folder_path):
                subfolder_path = os.path.join(main_folder_path, subfolder)
                if os.path.isdir(subfolder_path):
                    # List all files in the subfolder
                    for filename in os.listdir(subfolder_path):
                        if filename.endswith('-filtered.png'):
                            searchname = extract_image_name(filename)
                            if searchname in image_names and filename not in found_images:
                                # Copy the image to the output folder
                                src_path = os.path.join(subfolder_path, filename)
                                dst_path = os.path.join(output_folder, rename_image(filename))
                                print(f"Copying {src_path} to {dst_path}")
                                shutil.copy(src_path, dst_path)
                                found_images.add(filename)



In [7]:
json_path = '/content/drive/MyDrive/tmas/batch_1/train_batch1.json'
output_folder = '/content/drive/MyDrive/tmas/batch_1/train_batch_1'
read_json_and_find_images(json_path, plate_images_folder, output_folder)

Number of image names in JSON: 2411
Check /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/04-00829-706270-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/train_batch_1/04-00829-706270-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/03-JPN-R2012-00013-03-JPN-R2012-00013-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/train_batch_1/03-JPN-R2012-00013-03-JPN-R2012-00013-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/04-00163-631936-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/train_batch_1/04-00163-631936-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/06-06TB_0032-06MIL1277-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/train_batch_1/06-06TB_0032-06MIL1277-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate

In [8]:
import os

def count_images_in_folder(folder_path):
    # Check if the folder exists
    if not os.path.exists(folder_path):
        print(f"The folder {folder_path} does not exist.")
        return 0

    # Initialize the counter
    image_count = 0

    # Iterate over the files in the folder
    for filename in os.listdir(folder_path):
        # Check if the file is an image (assuming .png format)
        if filename.endswith('.png'):
            image_count += 1

    return image_count


In [9]:
output_folder = '/content/drive/MyDrive/tmas/batch_1/train_batch_1'
num_images = count_images_in_folder(output_folder)
print(f"Number of images in the folder {output_folder}: {num_images}")

Number of images in the folder /content/drive/MyDrive/tmas/batch_1/train_batch_1: 2411


In [10]:
valid_json_path = '/content/drive/MyDrive/tmas/batch_1/valid_batch1.json'
valid_output_folder = '/content/drive/MyDrive/tmas/batch_1/valid_batch_1'
read_json_and_find_images(valid_json_path, plate_images_folder, valid_output_folder)

Number of image names in JSON: 804
Check /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/04-00824-708449-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/valid_batch_1/04-00824-708449-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/06-06TB_0290-06MIL0881-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/valid_batch_1/06-06TB_0290-06MIL0881-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/05-PTAN-0253-TAN-576-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/valid_batch_1/05-PTAN-0253-TAN-576-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/06-06TB_0542-06MIL1220-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/valid_batch_1/06-06TB_0542-06MIL1220-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMY

In [11]:
valid_test_output_folder = '/content/drive/MyDrive/tmas/batch_1/valid_batch_1'
num_images = count_images_in_folder(valid_test_output_folder)
print(f"Number of images in the folder {valid_test_output_folder}: {num_images}")

Number of images in the folder /content/drive/MyDrive/tmas/batch_1/valid_batch_1: 804


In [None]:
test_json_path = '/content/drive/MyDrive/tmas/batch_1/test_batch1.json'
test_output_folder = '/content/drive/MyDrive/tmas/batch_1/test_batch_1'
read_json_and_find_images(test_json_path, plate_images_folder, test_output_folder)

Number of image names in JSON: 803
Check /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/04-00878-709531-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/test_batch_1/04-00878-709531-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/10-KD01544988-KD01544988-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/test_batch_1/10-KD01544988-KD01544988-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/06-A43861-06MIL1298-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/test_batch_1/06-A43861-06MIL1298-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/08-02TB2786-26537-1-21-UKMYC5-filtered.png to /content/drive/MyDrive/tmas/batch_1/test_batch_1/08-02TB2786-26537-1-21.png
Copying /content/drive/My Drive/tmas/CRYPTIC/plate-images-20240423/UKMYC5/21/06-A43

In [None]:
test_output_folder = '/content/drive/MyDrive/tmas/batch_1/test_batch_1'
num_images = count_images_in_folder(test_output_folder)
print(f"Number of images in the folder {test_output_folder}: {num_images}")

In [5]:
print(rename_image('10-YA00064211-YA00064211-1-14-UKMYC6-filtered.png'))

10-YA00064211-YA00064211-1-14.png
