**API - Documentation of scoring function**

import the libraries 

In [2]:
import math
import os
import re
import cv2

In [3]:
dictionary = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_250)
parameters = cv2.aruco.DetectorParameters 
detector = cv2.aruco.ArucoDetector(dictionary)

**Gather or download the excited metadata, and load it**

*Metadata description*

5K images of arUco tags, 5 ids and 10 different conditions captured (angle, distance, light)

In [4]:
data_1 =  r'D:\AI research internship\opencv_scripts\a_70_d_4'
data_2 =  r'D:\AI research internship\opencv_scripts\a_70_d_10'
data_4  = r'D:\AI research internship\opencv_scripts\a_28_d_10'
data_5  = r'D:\AI research internship\opencv_scripts\a_5_d_4'
data_6  = r'D:\AI research internship\opencv_scripts\a_50_d_10'
data_7  = r'D:\AI research internship\opencv_scripts\a_50_d_4'
data_8 = r'D:\AI research internship\opencv_scripts\a_70_d_20'
data_9 = r'D:\AI research internship\opencv_scripts\a_50_d_20'
data_10 = r'D:\AI research internship\opencv_scripts\a_28_d_20'
data_11 = r'D:\AI research internship\opencv_scripts\a_5_d_20'

**Load and prepere the data:**

1. - clean_string function - checks if the data was labeled, correctly if not returns the correct formatting 

clean_string(string) - accept name of an image and if it's other then id and count (For example 3id_10count returns clean format - 3_10)

2. - load the large data in batches saves your gpu, and pc from a crash 

>load_images_in_batches(directory, batch_size, batch_index=0) - accepts param-1 path of the metadata, param-2 batch size or how much images do you want to have per batch, and  param-3 starting batch_index at 0 - batch_index=0


In [5]:
def clean_string(string):
        digits = re.findall(r'\d+', string)
        if len(digits) > 1:
            digits = digits[:-1]  
        cleaned_string = '_'.join(digits)
        count = len(digits)
        
        return count, cleaned_string

In [6]:
def load_images_in_batches(directory, batch_size, batch_index=0):
    # os - intereacting with the operating system
    # listdir - returns a list of all the files
    image_files = os.listdir(directory)
    total_images = len(image_files)
    #math.ceil() function is used to round up to the nearest integer value
    num_batches = math.ceil(total_images / batch_size)

    for i in range(num_batches):
        batch_index +=1
        start_index = i * batch_size
        end_index = min(start_index + batch_size, total_images)
        batch_files = image_files[start_index:end_index]
        
        batch_images = {}
        for file in batch_files:
            
            image_path = os.path.join(directory, file)
            image = cv2.imread(image_path)
            image_name = os.path.splitext(file)[0]  # Extract name without extension
            batch_images[image_name] = image
        print()
        print('Batch number: ', batch_index)
        print()
        
        yield batch_images


**Get an array with ids we expect to get**

>get_original_id(image_dict)  - accepts param-1 which is an image_dictionary 

Example: 

for batch in load_images_in_batches(directory, batch_size):
                        original_ids = get_original_id(batch)

In [7]:
def get_original_id(image_dict):
        arr = []

        for key in image_dict.keys():
            count, key = clean_string(key)
            #print('Count of ids is', count, 'the key is', key)
            pattern = r'^(\d{1,2})'  # Create the pattern based on the ID count

            for _ in range(1, count):
                pattern += r'_(\d{1,2})'

            digit_set = set()
            match = re.match(pattern, key)
            if match:
                for group in match.groups():
                    digit_set.add(int(group))
                arr.append(digit_set)
            else:
                print(f"Key '{key}' does not match the pattern.")
        print('Original ids')        
        print(arr)
        return arr

**Create an image processing pipeline, and collect the predicted ids to predcited ids array of sets**
Each id or group of ids that were predicted are stored in sets, and each of these sets are stores in an array. 


In [8]:
def img_p_ppln(image_dict):
            arr = []
            p_id_set = set()
            for ids, img in image_dict.items():
                transformation = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                transformation = cv2.bitwise_not(transformation)
                clahe = cv2.createCLAHE(clipLimit=4, tileGridSize=(16, 16))
                transformation = clahe.apply(transformation)

                transformation = cv2.GaussianBlur(transformation, (21, 21), 0)

                transformation = cv2.adaptiveThreshold(transformation, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 37, 1)

                _,transformation = cv2.threshold(transformation, 150, 255, cv2.THRESH_BINARY)  # Renamed 'transformation' to 'de
                _,ids,_ = detector.detectMarkers(transformation)   
                p_id_set = set()
                
                if ids is not None:
                    for inner_arr in ids:
                        for i in inner_arr:
                            p_id_set.add(i)
                        
                arr.append(p_id_set)
            print('F_2 predictions: ')    
            print(arr) 
            return arr

**Function that Classifies each of predcited id by the rate as TP, FP, FN, TN**

>classify(original_ids, predicted_ids) - > needs 2 parameters, param-1 - original_ids, and param-2 predicted_ids. 

Output: Total of FP, FN, TP, TN rates, total score and total sum of all the images

In [9]:
def classify(original_ids, predicted_ids):
        TP = 0
        FP = 0
        FN = 0
        TN = 0
        FP = 0
        FP2=0
        scores = 0
        intersection = 0
        total = len(original_ids)
        for set_o, set_p in zip(original_ids, predicted_ids):
          #1 value and not empty cases part-1
          #if len(set_o)==1:
            #True positive 
            if set_o != set():
                intersection = set_o&set_p
                TP+=len(intersection)
                if len(intersection)>0:
                    scores += 1
              #false positive one {40}->{13}
                if set_o != set_p:
                    if set_p != set():
                        FP += 1
              # false negative {20}->set
                if set_p == set():
                    FN += 1
            #empty set_o part 2 
            if set_o == set():
               #True negative case set() -> set()
               if set_o == set_p:
                    TN += 1
                    scores += 1
                # set() - > {17}
               if set_o != set_p:
                    FP2 += 1
        #total_TP, total_FP, total_FN, true_negative, false_positive2, scores, total
        return TP, TN, FP, FN, FP2, scores, total   
    

**function info - prints the detailed info on how img p ppln perfomed**

In [10]:
def info(TP, TN, FP, FN, FP2, scores, total):
        precision = 0
        recall = 0

        if TP + FP + FP2 != 0:
            precision = TP / (TP + FP + FP2)
        else:
            precision = 0

        if TP + FN != 0:
            recall = TP / (TP + FN)
        else:
            recall = 0

        print(f'Predicted: Score:{(scores/total)*100}% | TP:{TP}, FN:{FN}, TN:{TN}, FP1:{FP}, FP2:{FP2} | precision:{precision}, recall:{recall}')
        print(f'Out of {total} images, {TP} were predicted accurately')

**Score function** 
scores the ppln, and can take param-1 any customized img processing ppln but which return predidcted ids in sets of array format (Example: [{3,99},set(),{3}]), param-2 directory or a path, name_folder. 

In [11]:
name = {
        data_1: 'a_70_d_4',
        data_2 : 'a_70_d_10',
        data_4: 'a_28_d_10',
        data_5: 'a_5_d_4',
        data_6: 'a_50_d_10',
        data_7: 'a_50_d_4',
        data_8: 'a_70_d_20',
        data_9: 'a_50_d_20',
        data_10 : 'a_28_d_20t',
        data_11: 'a_5_d_20'
    }
        
path = {
        data_1: 'Angle: 70, Distance: 4, Lighting: low day light at 5:48 PM',
        data_2 : 'Angle: 70, Distance: 10, Lighting: low day light at 7:50 PM',
        data_4: 'Angle: 28, Distance: 10, Lighting: low day light at 8:48 PM',
        data_5: 'Angle: 5, Distance: 4, Lighting: low day light at 3:58 PM',
        data_6: 'Angle: 50, Distance: 10, Lighting: bright artificial and day light at 7:53 ',
        data_7: 'Angle: 50, Distance: 4, Lighting: bright artificial light low evening light at 7:01 PM',
        data_8: 'Angle: 70, Distance: 20, Lighting: low day light at 7:41 PM',
        data_9: 'Angle: 50, Distance: 20, Lighting: bright artificial light at 9:32 PM',
        data_10 : 'Angle: 28, Distance: 20, Lighting: bright artificial light at 9:49 PM',
        data_11: 'Angle: 5, Distance: 20, Lighting: bright artificial light at 10:08 PM'
    }

In [12]:
def score(ppln, directory, folder_name):
                    batch_size = 150
                    for batch in load_images_in_batches(directory, batch_size):
                        original_ids = get_original_id(batch)
                        #print(original_ids)
                        predicted_ids = ppln(batch)
                       
                        #new_arr_predicted_ids = [int(x[0, 0]) if x is not None else None for x in predicted_ids]
                        #print(predicted_ids)
                        TP, TN, FP, FN, FP2, scores, total = classify(original_ids,predicted_ids)
                        info(TP, TN, FP, FN, FP2, scores, total)


In [13]:
values = path.values()
keys = path.keys()
def loop_print(values,keys):
   for value, key in zip(path.values(), path.keys()):
      folder_name = name[key]  # Get the corresponding folder name from the name dictionary
      print()
      print(f'Folder: {folder_name}')
      print()
      print('Conditions:', value)
      print()
      score(img_p_ppln, key, folder_name)  

In [14]:
values = path.values()
keys = path.keys()
loop_print(values, keys)
'''
for value, key in zip(path.values(), path.keys()):
          folder_name = name[key]  # Get the corresponding folder name from the name dictionary
          print()
          print(f'Folder: {folder_name}')
          print()
          print('Conditions:', value)
          print()
          score(img_p_ppln, key, folder_name)
'''


Folder: a_70_d_4

Conditions: Angle: 70, Distance: 4, Lighting: low day light at 5:48 PM


Batch number:  1

Original ids
[{23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {23}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}, {3}]
F_2 predictions: 
[set(), set(

"\nfor value, key in zip(path.values(), path.keys()):\n          folder_name = name[key]  # Get the corresponding folder name from the name dictionary\n          print()\n          print(f'Folder: {folder_name}')\n          print()\n          print('Conditions:', value)\n          print()\n          score(img_p_ppln, key, folder_name)\n"