In [1]:
## add ignore warnings for now, will remove and debug once full algorithm is complete
import warnings
warnings.filterwarnings("ignore")

## import packages/libraries
from time import perf_counter
import numpy as np
import pandas as pd
import math
from operator import itemgetter
from multiprocessing import Pool, cpu_count
from itertools import product
import sys
import sqlite3

## append filepath to allow files to be called from within project folder
sys.path.append('/home/gerard/Desktop/capstone_project/patoms')
sys.path.append('/home/gerard/Desktop/capstone_project')

## call locally created functions
from snapshot_2d_pattern_v2 import patoms2d
from snapshot_3d_pattern_v6 import patoms3d
from pattern_2d_compare_v2 import pattern_compare_2d
from pattern_3d_compare_v4 import pattern_compare_3d

In [19]:
## start timer to asses how long process takes
s = perf_counter()

## save 2D patterns to database and compare new patterns to existing patterns
con2d = sqlite3.connect("database_2d.db")
cur2d = con2d.cursor()

## create test data for algorithm development
np.random.seed(42)
rand_array = np.random.random((1, 720, 1280))
z_len = rand_array.shape[0]
y_len = rand_array.shape[1]
x_len = rand_array.shape[2]

dist_sim_threshold = 0.85
centroid_sim_threshold_x = 0.85
centroid_sim_threshold_y = 0.85

In [4]:
# function merge lists with common elements
def merge_lists_with_common_elements(nested_lists):
    result = []  # To store merged lists

    for sublist in nested_lists:
        # Check if this sublist overlaps with any list in the result
        for merged_list in result:
            if set(sublist) & set(merged_list):  # Common elements exist
                merged_list.extend(sublist)  # Add all elements
                merged_list[:] = list(set(merged_list))  # Remove duplicates
                break
        else:
            # If no overlap found, add the sublist as a new group
            result.append(sublist[:])

    return result

In [None]:
# ingest data frame by frame
for frame in range(rand_array.shape[0]):
    #################################################################################
    ####################### FIRST TASK: FIND PATTERNS IN FRAME ######################
    #################################################################################
    similar_pattern_groups_list_new = []
    # find patterns in data
    frame_patoms = patoms2d(x_len, y_len, rand_array[frame,:,:], frame)
    # patom = [[norm_x, norm_y], [pattern_centroid_x, pattern_centroid_y], patom_ind, frame_ind, patom_time]
    # patom[i][[0][0]: list of x_pos, patom[i][[0][1]: list of y_pos, patom[i][[1][0]: x_cent, patom[i][[1][1]: y_cent, patom[i][[2]: patom_ind, patom[i][[3]: frame_ind, patom[i][[4]: patom_time,
    num_patoms = len(frame_patoms)
    #################################################################################
    ########## SECOND TASK: COMPARE ALL PATTERNS IN FRAME TO THEMSELVES ############# -- might not be needed in future iterations but lets see
    #################################################################################
    ## compare all patoms against all other patoms in the frame, add to list that can hold patoms before comparing against exiting patoms
    atime = perf_counter()
    patom_indexes = list(product(range(num_patoms), range(num_patoms)))
    with Pool(processes=cpu_count()) as pool:
        items = [(frame_patoms[i[0]][0], frame_patoms[i[0]][1], frame_patoms[i[1]][0], frame_patoms[i[1]][1]) for i in patom_indexes]
        ## function outputs ind value of the patom_indexes list, the centroid and distance similarity measures
        res = pool.starmap(pattern_compare_2d, items)
        # res output: [1.0, (0.05407046509274386, 0.03361332388786884)]
        #print("Time to compare 2D patterns with multiprocessing (secs):", (perf_counter()-atime))
        # create list the holds the index of each patom identified in the frame that are similar to one another
        match_list_new = []
        ## loop through the output of the comparison function
        for ix, i in enumerate(res):
            ## pass if its the same patom in the frame being compared against itself
            if patom_indexes[ix][0] == patom_indexes[ix][1]:
                pass
            else:
                ## check if compared patterns fall within similarity threshold values
                if (i[1][0] >= centroid_sim_threshold_x) and (i[1][1] >= centroid_sim_threshold_y) and (i[0] >= dist_sim_threshold):
                    match_list_new.append([patom_indexes[ix][0], patom_indexes[ix][1]])
                else:
                    pass
        # merge pattern indices that have similar elements
        if match_list_new:
            similar_pattern_groups = merge_lists_with_common_elements(match_list_new)
            similar_pattern_groups_list_new.append(similar_pattern_groups)
        else:
            pass
    # add similar pattern groups back to the original patom list and only keep distinct patterns
    # flatten group list to get patoms that are similar, then compare against oirignal patom list to get those patoms not in the grouped list
    # create 'average' ref pattern for patterns in similar groups
    if similar_pattern_groups_list_new:
        flat_groups = [xs for x in similar_pattern_groups_list_new for xs in x]
        remaining_patoms = [j for j, i in enumerate(frame_patoms) if j not in flat_groups]
    
    # remaining patoms plus grouped patoms need to be compared against reference tables or else stored in new table 
    patom_index_list = similar_pattern_groups_list_new + remaining_patoms
    
    #############################################################################################################################################################
    ########## THIRD TASK: STORE NEW PATTERNS IN EMPTY TABLES AND ADD OLD (PATTERNS SIMILAR TO PREVIOUSLY RECEIVED DATA) IN THEIR RESPECTIVE TABLES #############
    #############################################################################################################################################################
    ## get all non-empty reference data tables from the database
    ref = nonref = cur2d.execute("select name from (SELECT name, count(*) as rc FROM sqlite_master WHERE type='table' AND name NOT LIKE '%ref%' group by name) where rc > 0;")
    tables_ref = ref.fetchall()  # List of tuples with table names
    # if there are non-empty reference tables then loop through tables and compare reference pattern against newly acquired patterns
    if tables_ref:
        similar_pattern_groups_list_ref = []
        table_names_ref = []
        for (ref_name,) in tables_ref:
            table_names_ref.append(ref_name)
        # compare new patterns against existing reference patterns, if patterns do not match ref pattern store in new table
        patom_ref_indexes = list(product(range(num_patoms), range(len(table_names_ref))))
        with Pool(processes=cpu_count()) as pool:
            items = [(frame_patoms[i[0]][0], frame_patoms[i[0]][1], table_names_ref[i[1]][0], table_names_ref[i[1]][1]) for i in patom_ref_indexes]
            ## function outputs ind value of the patom_indexes list, the centroid and distance similarity measures
            res = pool.starmap(pattern_compare_2d, items)
            # res output: [1.0, (0.05407046509274386, 0.03361332388786884)]
            #print("Time to compare 2D patterns with multiprocessing (secs):", (perf_counter()-atime))
            # create list the holds the index of each patom identified in the frame that are similar to one another
            match_list_ref = []
            ## loop through the output of the comparison function
            for ix, i in enumerate(res):
                ## pass if its the same patom in the frame being compared against itself
                if patom_ref_indexes[ix][0] == patom_ref_indexes[ix][1]:
                    pass
                else:
                    ## check if compared patterns fall within similarity threshold values
                    if (i[1][0] >= centroid_sim_threshold_x) and (i[1][1] >= centroid_sim_threshold_y) and (i[0] >= dist_sim_threshold):
                        match_list_ref.append([patom_indexes[ix][0], patom_indexes[ix][1]])
                    else:
                        pass
            # merge pattern indices that have similar elements
            if match_list_ref:
                similar_pattern_groups = merge_lists_with_common_elements(match_list_ref)
                similar_pattern_groups_list_ref.append(similar_pattern_groups)
            
    # if there are no non-empty reference tables then write patterns to new tables
    else:

    
    ## if there is at least one non-empty reference table then we compare current patterns against reference pattern
    # if table_names_ref:
    #     for i in table_frame_patoms:
    #         for j in table_names_ref:
    #             if 

    ## get all non-empty non reference data tables from the database
    # nonref = cur2d.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE '%ref%';")
    # tables_nonref = nonref.fetchall()  # List of tuples with table names
    # table_names_nonref = []
    # # Loop through each table and check if it's empty
    # for (table_name,) in tables_nonref:  # Unpack the tuple
    #     cur2d.execute(f"SELECT COUNT(*) FROM {table_name};")
    #     row_count = cur2d.fetchone()[0]  # Get the row count
    #     ## perform comparison on tables that have data in the database
    #     if row_count == 0:
    #         table_names_nonref.append(table_name)
    #     else:
    #         pass
    ## sort table names (in case they aren't)
    #table_names_nonref = sorted(table_names_nonref)
    
    # ## insert data into empty tables
    # for jx, j in enumerate(similar_pattern_groups):
    #     # now that I have the empty table I need to write all the patterns from each pattern group into it
    #     for patom in j:
    #         cur2d.executemany(f"INSERT INTO {table_names[jx]}(frame, norm_x_dist, norm_y_dist, pat_cent_x, pat_cent_y, patom_ind) VALUES (?,?,?,?,?,?,?,?)", table_frame_patoms[patom])

Time to get 2D patterns with multiprocessing (secs): 0.3625822459871415
Time to get 2D patterns with multiprocessing (secs): 0.35410050899372436


In [23]:
match_list

[]

In [7]:
## build function to get the average of each of the patterns in each table
## the reference table should have the most common of x, y positions, the most common x and y centroid positions
# ref = cur2d.execute("SELECT name FROM sqlite_master WHERE type='table' AND name LIKE '%ref%';")
# tables_ref = ref.fetchall()  # List of tuples with table names
# # Loop through each table delete rows and re-insert new averaged values from patterns just ingested by system
# for (ref_name,) in tables_ref:  # Unpack the tuple 
#     if ref_name.replace('_ref','') in tables_ref:
# #         cur2d.execute(f"DELETE FROM {ref_name};")
# #         cur2d.execute(f"INSERT INTO {ref_name}(avg_norm_x_pos, avg_norm_y_pos, avg_norm_dist) \
# #                         SELECT AVG(norm_x_pos) AS norm_x_pos, AVG(norm_y_pos) as norm_y_pos, AVG(norm_dist) as norm_dist FROM {ref_name.replace('_ref','')}")
# #     else:
# #         pass

# patom = [[norm_x, norm_y], [pattern_centroid_x, pattern_centroid_y], ind]

In [8]:
## get all non-empty non reference data tables from the database
# nonref = cur2d.execute("select name from (SELECT name, count(*) as rc FROM sqlite_master WHERE type='table' AND name NOT LIKE '%ref%' group by name) where rc > 0;")
# tables_nonref = nonref.fetchall()  # List of tuples with table names
# table_names_nonref = []
# # Loop through each table and create reference table
# for (table_name,) in tables_nonref[0]: 
#     # ref table requires avg pat cent x, y, the avg num of rows, which consists of the most frequent x, y positions
#     ref_table = cur2d.execute(f"""
#                     select
#                         *
#                         from
#                         (
#                         select 
#                             patom_time, --single value
#                             count(*) over() as rc, 
#                             pat_cent_x, --single value
#                             pat_cent_y, -- single value
#                             norm_x_dist, -- not sure if this is list of single values entered in rows
#                             count(round(norm_x_dist,4)) over() as x_dist_freq, 
#                             norm_y_dist,
#                             count(round(norm_y_dist,4)) over() as y_dist_freq
                                
#                             from {table_name}
#                         ) as bse
                        
#                     ;""")
        
# patom = [[norm_x, norm_y], [pattern_centroid_x, pattern_centroid_y], ind, f_ind, patom_time]
# ## sort table names (in case they aren't)
# table_names_nonref = sorted(table_names_nonref)

In [9]:
# con2d.commit()
# con2d.close()

In [10]:
### rough work ###
#     compare_list = []
# # Initialize an empty dictionary
#         result_dict = {}
#         # Iterate over the list of lists
#         for key, value in match_list:
#             # Add the value to the appropriate key in the dictionary
#             if key not in result_dict:
#                 result_dict[key] = []
#             result_dict[key].append(value)
#         compare_list.append(result_dict)

## function to find the next empty list in a set of nested lists
# def find_next_empty_list_iterative(nested_list):
#     stack = [(nested_list, [])]  # (current_list, current_path)

#     while stack:
#         current, path = stack.pop()
#         for index, element in enumerate(current):
#             current_path = path + [index]
#             if element == []:
#                 return current_path
#             elif isinstance(element, list):
#                 stack.append((element, current_path))
#     return None  # No empty list found