In [42]:
## add ignore warnings for now, will remove and debug once full algorithm is complete
import warnings
warnings.filterwarnings("ignore")

## import packages/libraries
from time import perf_counter, clock_gettime_ns, CLOCK_REALTIME
import numpy as np
import pandas as pd
from multiprocessing import Pool, cpu_count
from itertools import product
import sys
import sqlite3

## append filepath to allow files to be called from within project folder
sys.path.append('/home/gerard/Desktop/capstone_project/patoms')
sys.path.append('/home/gerard/Desktop/capstone_project')

## call locally created functions
from snapshot_2d_pattern_v6 import patoms2d
from snapshot_3d_pattern_v6 import patoms3d
from pattern_2d_compare_v4 import pattern_compare_2d
from pattern_3d_compare_v4 import pattern_compare_3d
from get_2d_table_names_v0 import table_names_2d
from updating_ref_table_v0 import update_ref_table
from patom_to_table_v0 import patom_to_table_func

In [4]:
## create in memory 2d database
con2d = sqlite3.connect(":memory:")
cur2d = con2d.cursor()
cur2d.execute('create table pat_2d_000000(x_pos_dist, y_pos_dist, x_cent, y_cent, patom_ind, frame_ind, patom_time);')
con2d.commit()

In [35]:
## read 2d ref database into memory and convert to ref patoms for comparison
working_ref_patoms = []
with open('/home/gerard/Desktop/capstone_project/ref2d_v3.db', 'rb') as f:
    con2dref = sqlite3.connect(":memory:")
    con2dref.deserialize(f.read())
    cur2dref = con2dref.cursor()
    ref_names = [name for (name,) in cur2dref.execute("select name from sqlite_master where type='table';").fetchall()]
    for i in ref_names:
        table = cur2dref.execute(f"select * from {i};").fetchall()
        # patom = [norm_x, norm_y, pattern_centroid_x, pattern_centroid_y, patom_ind, frame_ind, patom_time]
        table_array = np.array(table)

        working_ref_patoms.append(table_array)
ref_patoms_array = np.vstack(working_ref_patoms[:5000])

In [None]:
val = 0
while val == 0:
    rand_array = np.random.random((1, 720, 1280))
    y_len = rand_array.shape[1]
    x_len = rand_array.shape[2]
    # ingest data frame by frame
    for frame in range(rand_array.shape[0]):
        #################################################################################
        ####################### FIRST TASK: FIND PATTERNS IN FRAME ######################
        #################################################################################
        frame_patoms = patoms2d(x_len, y_len, rand_array[frame,:,:], frame)
        # patom = [[norm_x, norm_y], [pattern_centroid_x, pattern_centroid_y], patom_ind, frame_ind, patom_time]
        # patom[i][[0][0]: list of x_pos, patom[i][[0][1]: list of y_pos, patom[i][[1][0]: x_cent, patom[i][[1][1]: y_cent, patom[i][[2]: patom_ind, patom[i][[3]: frame_ind, patom[i][[4]: patom_time,
        num_patoms = len(frame_patoms)
        val += 1
        # ###########################################################################
        # ########## SECOND TASK: COMPARE NEW PATOMS AGAINST REF PATOMS #############
        # ###########################################################################
        atime = perf_counter()
        comp_indexes = list(product(range(num_patoms), range(len(working_ref_patoms[:100])))) ## this is the MAJOR limiting factor at the moment
        with Pool(processes=cpu_count()) as pool:
            items = [(frame_patoms[i[0]][0], frame_patoms[i[0]][1], frame_patoms[i[0]][2], frame_patoms[i[0]][3],\
                    working_ref_patoms[i[1]][0], working_ref_patoms[i[1]][1], working_ref_patoms[i[1]][2], working_ref_patoms[i[1]][3], i) for i in comp_indexes]
            ## function outputs ind value of the patom_indexes list, the centroid and distance similarity measures
            ########################################################################################################
            ########################################################################################################
            ###### ATTEMPT TO CONVERT ALL REF TABLES INTO SINGLE NUMPY ARRAY TO COMPAR AGAINST NEW PATTERNS ########
            ########################################################################################################
            ########################################################################################################
            comp_results = pool.starmap(pattern_compare_2d, items)
            comp_results = [x for x in comp_results if x is not None]
            print("Time to compare 2D patterns with multiprocessing (secs):", (perf_counter()-atime))
            # res output: [(i, j)]
            # get the remaining new patoms that were not flagged as being similar to previous patterns
            remaining_comp_indexes = list(set([x[0] for x in comp_indexes if x not in comp_results]))
            ## loop through the output of the comparison function
            if comp_results:
                for ix, i in enumerate(comp_results):
                    print('similar')
                    # get max table number from 2d database
                    mttime = perf_counter()
                    next_table_num = int([table for (table,) in cur2d.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()].pop(-1)[-6:]) + 1 
                    print('time to get max table:',perf_counter()-mttime)
                    next_table_num = str(next_table_num).zfill(6)
                    # check if table exists, if so, add patom, if not create table and add patom
                    ref_table_num = ref_names[i[1]][-6:] # check this is correct
                    new_patom = frame_patoms[i[0]] # check this is correct
                    ptttime = perf_counter()
                    patom_to_table = patom_to_table_func(new_patom)
                    print('time to convert patom to table:',perf_counter()-ptttime)
                    # add patom to relevant pattern table get name of ref table to get data table name
                    if f"pat_2d_{ref_table_num}" in [names for (names,) in cur2d.execute("select name from sqlite_master where type='table';").fetchall()]:
                        cur2d.executemany(f"INSERT INTO pat_2d_{ref_table_num}(x_pos_dist, y_pos_dist, x_cent, y_cent, patom_ind, frame_ind, patom_time) VALUES (?,?,?,?,?,?,?)", patom_to_table)
                        con2d.commit()
                        # patom = [norm_x, norm_y, pattern_centroid_x, pattern_centroid_y, patom_ind, frame_ind, patom_time]
                    else:
                        cur2d.execute(f"CREATE TABLE pat_2d_{ref_table_num}(x_pos_dist, y_pos_dist, x_cent, y_cent, patom_ind, frame_ind, patom_time);")
                        cur2d.executemany(f"INSERT INTO pat_2d_{ref_table_num}(x_pos_dist, y_pos_dist, x_cent, y_cent, patom_ind, frame_ind, patom_time) VALUES (?,?,?,?,?,?,?)", patom_to_table)
                        con2d.commit()
            else:
                for i in remaining_comp_indexes:
                    mttime = perf_counter()
                    next_table_num = int([table for (table,) in cur2d.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()].pop(-1)[-6:]) + 1 
                    print('time to get max table:',perf_counter()-mttime)
                    next_table_num = str(next_table_num).zfill(6)
                    new_patom = frame_patoms[i] # check this is correct
                    ptttime = perf_counter()
                    patom_to_table = patom_to_table_func(new_patom)
                    print('time to convert patom to table:',perf_counter()-ptttime)
                    # if patom is not matched then create new pattern & ref tables, get max table name and then create next new one
                    cur2d.execute(f"CREATE TABLE pat_2d_{next_table_num}(x_pos_dist, y_pos_dist, x_cent, y_cent, patom_ind, frame_ind, patom_time);")
                    cur2d.executemany(f"INSERT INTO pat_2d_{next_table_num}(x_pos_dist, y_pos_dist, x_cent, y_cent, patom_ind, frame_ind, patom_time) VALUES (?,?,?,?,?,?,?)", patom_to_table)
                    con2d.commit()


Time to get 2D patterns with multiprocessing (secs): 0.49216057000001


In [39]:
print(list(set(zip(ref_patoms_array[:,6], ref_patoms_array[:,7])))[0])

(71.0, 29.0)


In [79]:
# this does appear to work
def pseudo_cross_join(new_patom, ref_arrays):
    # Extract the category column
    categories1 = new_patom[:, 4]
    categories2 = ref_arrays[:, 4]
    
    # Perform a Cartesian join
    m, n = len(new_patom), len(ref_arrays)

    # Expand dimensions to compare categories using broadcasting
    category_match = categories1[:, None] == categories2[None, :]

    # Get indices where categories match
    indices1, indices2 = np.where(category_match)

    # Select matching rows
    matched_array1 = new_patom[indices1]
    matched_array2 = ref_arrays[indices2]

    # Concatenate horizontally to form the final joined result
    result = np.hstack((matched_array1, matched_array2))

    return result
st = perf_counter()
cart = pseudo_cross_join(frame_patoms[0], ref_patoms_array)
print('time in seconds:', (perf_counter()-st)/60)
print(cart.shape)


time in seconds: 0.014321356566658021
(4319574, 18)


In [80]:
print(len(set(zip(cart[:,15], cart[:,16]))))

5000


In [92]:
cart_df = pd.DataFrame(cart, columns=['px','py','pxc','pyc','pq','pqlen','ppind','pfind','ptime','rx','ry','rxc','ryc','rq','rqlen','rpind','rfind','rtime'])
cart_df['x_diff'] = abs(cart_df.iloc[:,0] - cart_df.iloc[:,9])
cart_df['y_diff'] = abs(cart_df.iloc[:,1] - cart_df.iloc[:,10])
cart_df['quad_len'] = cart_df.iloc[:,5] * cart_df.iloc[:,14]
cart_df['xc_d'] = abs(cart_df.iloc[:,2] - cart_df.iloc[:,11])
cart_df['yc_d'] = abs(cart_df.iloc[:,3] - cart_df.iloc[:,12])
cart_grouped = cart_df.groupby(['pq','ppind','pfind','rpind','rfind','xc_d','yc_d']).agg({'x_diff': 'sum', 'y_diff': 'sum', 'quad_len': 'min'})
cart_grouped['x_d'] = cart_grouped['x_diff'] / cart_grouped['quad_len']
cart_grouped['y_d'] = cart_grouped['y_diff'] / cart_grouped['quad_len']
cart_grouped = cart_grouped.reset_index()[['pq','ppind','pfind','rpind','rfind','xc_d','yc_d','x_d','y_d']].sort_values(by=['ppind','pfind','rpind','rfind','pq'])
print(cart_grouped.columns)
cond = [(cart_grouped['xc_d'] <= 0.2) & (cart_grouped['yc_d'] <= 0.2) & (cart_grouped['x_d'] <= 0.2) & (cart_grouped['y_d'] <= 0.2)]
choice = [1]
cart_grouped['similar'] = np.select(cond, choice)
cart_grouped.head()

Index(['pq', 'ppind', 'pfind', 'rpind', 'rfind', 'xc_d', 'yc_d', 'x_d', 'y_d'], dtype='object')


Unnamed: 0,pq,ppind,pfind,rpind,rfind,xc_d,yc_d,x_d,y_d,similar
0,13.0,0.0,0.0,0.0,0.0,0.100555,0.064047,0.365055,0.353862,0
4970,14.0,0.0,0.0,0.0,0.0,0.100555,0.064047,0.409414,0.385431,0
9943,23.0,0.0,0.0,0.0,0.0,0.100555,0.064047,0.316523,0.290306,0
14921,24.0,0.0,0.0,0.0,0.0,0.100555,0.064047,0.367278,0.324962,0
1,13.0,0.0,0.0,0.0,1.0,0.091584,0.044948,0.320902,0.349969,0
