In [50]:
# setup

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2
import pickle
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter

# #UNCOMMENT FOR TRAINING
# with open('allSlotsTrain.pkl', 'rb') as f:
#     allSlots = pickle.load(f)
# with open('allTabsTrain.pkl', 'rb') as f:
#     allTabs = pickle.load(f)

## UNCOMMENT FOR TESTING
with open('allSlotsTest.pkl', 'rb') as f:
    allSlots = pickle.load(f)
with open('allTabsTest.pkl', 'rb') as f:
    allTabs = pickle.load(f)



In [51]:
# extract edge points from pixel map
def extract_edge_pts(edgeMap):

    edgeMap_conv = (edgeMap > 0).astype('uint8') * 255
    contours, _ = cv2.findContours(edgeMap_conv, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    
    if len(contours) == 0:
        print("no contour found")
        return np.array([]).reshape(0, 2)
    
    contour = max(contours, key=cv2.contourArea) # LARGEST contour should be peice
    
    if len(contour.shape) == 3:
        # Standard case: (N, 1, 2) -> (N, 2)
        edgeRawPts = contour[:, 0, :]
    elif len(contour.shape) == 2:
        # Already (N, 2)
        edgeRawPts = contour
    else:
        # Unexpected - try to reshape
        edgeRawPts = contour.reshape(-1, 2)
    
    return edgeRawPts


In [52]:
# smooth edges (used for curvature calc)
def smoothCurve(curve):
    numSmoothPts = len(curve) // 3
    x = curve[:, 0]
    y = curve[:, 1]
    
    dx = np.diff(x)
    dy = np.diff(y)
    dist = np.sqrt(dx**2 + dy**2)
    
    numSmoothPts = max(numSmoothPts, 10)
    s = np.concatenate(([0], np.cumsum(dist)))
    s_uniform = np.linspace(0, s[-1], numSmoothPts)
    
    from scipy.interpolate import interp1d
    fx = interp1d(s, x, kind='linear')
    fy = interp1d(s, y, kind='linear')
    
    x_uniform = fx(s_uniform)
    y_uniform = fy(s_uniform)
    
    
    window = min(7, len(x_uniform))
    if window % 2 == 0:
        window -= 1
    
    if window < 3:
        return np.column_stack([x_uniform, y_uniform])
    
    poly = min(3, window - 1)
    
    x_sg = savgol_filter(x_uniform, window, poly, mode='interp')
    y_sg = savgol_filter(y_uniform, window, poly, mode='interp')
    
    smoothedEdgePts = np.column_stack([x_sg, y_sg])
    
    return smoothedEdgePts


In [53]:
# find tab midline curve

def find_tab_midline(edgeMap):

    # first, go from left to right and find midcurve of the tab
    mapHeight, mapWidth = edgeMap.shape
    midpoints = []
    num_cols = edgeMap.shape[1]

    for col_index in range(num_cols):
        current_column = edgeMap[:, col_index]
        if np.any(current_column != 0):
            leftBound = col_index                           # leftmost non-zero column
            rows_nonzero = np.where(current_column != 0)[0] # all row indices where non-zero; 
            topRow    = rows_nonzero[0]                     # first non-zero row
            bottomRow = rows_nonzero[-1]                    # last non-zero row
            found = 1
            midPointRow = topRow + (bottomRow-topRow)//2    # biased up, towards zero if even number
            break

    # start at midline row from previous column and traverse up and down to ensure the non-zero values found are INSIDE the tab
    midpoints = [[leftBound,midPointRow.item()]]
    gap = [] # start row, stop row, column

    for col in range (leftBound + 1, mapWidth):
        upperEdgeRow = None
        lowerEdgeRow = None
        gapRows = 0
        current_column = edgeMap[:, col]

        for row in range (midPointRow, -1, -1): # start at midpoint row from previous column, work up
            if edgeMap[row,col] == 0:
                gapRows += 1
            elif edgeMap [row,col] != 0:
                upperEdgeRow = row
                break # upper edge is found
        
        for row in range (midPointRow +1, mapHeight): # start at midpoint row from previous column, work down
            if edgeMap[row,col] == 0:
                gapRows += 1
            elif edgeMap [row,col] != 0:
                lowerEdgeRow = row
                break # lower edge is found

        gap.append ([gapRows,col])
        if (upperEdgeRow == None) or (lowerEdgeRow == None):
            midPointRow = None
            rightBound = col-1 # rightmost column with top and bottom edge
            break
    
        midPointRow = upperEdgeRow + (lowerEdgeRow-upperEdgeRow)//2
        midpoints.append([col,midPointRow])
    
    return (gap, midpoints, leftBound,rightBound)


In [54]:
# find neck and head width and midpoint locations
# assume going from left to right, the gap increases to the tab width, then decreases to the neck width, then increases again
# normalize by making distance related features relative to corners

def find_edge_features (edgeRawPts,gap,midpoints):
    neckCol = None
    cornerCol = (edgeRawPts[0,0]+ edgeRawPts[-1,0])//2 # find the column that contains the corners of the edges - average if not the same column
    upperBound = min(row[1] for row in edgeRawPts)
    lowerBound = max(row[1] for row in edgeRawPts)
    refLength = lowerBound - upperBound


    for i in range(len(gap) - 1, 2, -1):  # start from right, check 3 pts to left
        current_gap = gap[i][0]
        
        # find neck width
        if all(current_gap < gap[i-j][0] for j in range(1, 4)):
            min_gap_value = current_gap
            min_columns = [gap[i][1]]  # Start with current column
            
            # Check columns to the right for same gap value
            for j in range(i + 1, len(gap)):
                if gap[j][0] == min_gap_value:
                    min_columns.append(gap[j][1])
                else:
                    break
            
            # Check columns to the left for same gap value - if more than one column with sanme neck value
            for j in range(i - 1, -1, -1):
                if gap[j][0] == min_gap_value:
                    min_columns.insert(0, gap[j][1])  # Insert at beginning to maintain order
                else:
                    break
            neckCol = min_columns[len(min_columns) // 2]          
            neckRow = next(row for col, row in midpoints if col == neckCol) # Get the midpoint row

            break

    if neckCol is None:
        print("no local minimum found")
        return (0, 0, 0, 0, 0, 0, cornerCol, refLength)

    # now keep working to the left to find local max (tab width)
    tabWidthCol = None
    start_idx = None
    for idx, (gap_val, col) in enumerate(gap):
        if col == neckCol:
            start_idx = idx
            break

    if start_idx is not None and start_idx >= 3:  # check 3 pts to the left
        for i in range(start_idx - 1, 2, -1):  # Continue left from minimum
            current_gap = gap[i][0]
            
            # Check if current point is greater than the next 3 points to the left
            if all(current_gap > gap[i-j][0] for j in range(1, 4)):
                # Found a local maximum - collect all columns with this same gap value
                max_gap_value = current_gap
                max_columns = [gap[i][1]]  # Start with current column
                
                # Check columns to the right for same gap value
                for j in range(i + 1, len(gap)):
                    if gap[j][0] == max_gap_value:
                        max_columns.append(gap[j][1])
                    else:
                        break
                
                # Check columns to the left for same gap value
                for j in range(i - 1, -1, -1):
                    if gap[j][0] == max_gap_value:
                        max_columns.insert(0, gap[j][1])
                    else:
                        break
                
                # Pick the middle column
                tabWidthCol = max_columns[len(max_columns) // 2]
                
                # Get the midpoint row for this column
                tabWidthRow = next(row for col, row in midpoints if col == tabWidthCol)
                
                tabWidth = max_gap_value/refLength
                neckWidth = min_gap_value / refLength
                tab = [max_gap_value, tabWidthCol, tabWidthRow]
                break

    neckColDist = (cornerCol-neckCol)/refLength  # dist from corner column to neck midpoint col
    neckHeightFrac = neckRow/refLength

    tabColDist = (cornerCol-tabWidthCol)/refLength
    tabHeightFrac = tabWidthRow/refLength

    if tabWidthCol is None:
        print("No local maximum found")

    return (tabWidth,neckWidth, neckColDist, neckHeightFrac,tabColDist,tabHeightFrac, cornerCol,refLength)



In [55]:
# curvature histogram  

def calc_curvature_histogram (smoothedEdgePts):
    x = smoothedEdgePts[:, 1]
    y = smoothedEdgePts[:, 0]

    # Compute curvature
    dx = np.gradient(x)
    dy = np.gradient(y)
    d2x = np.gradient(dx)
    d2y = np.gradient(dy)
    curvature = (dx * d2y - dy * d2x) / (dx**2 + dy**2)**1.5
    curvature = np.nan_to_num(curvature) 

    # Smooth curvature using 5-point moving average
    window = 5
    kernel = np.ones(window) / window
    curv_smooth = np.convolve(curvature, kernel, mode='same')

    # fixed bins
    curv_min = -0.1
    curv_max = 0.1  
    num_bins = 12
    fixed_bins = np.linspace(curv_min, curv_max, num_bins + 1)
    hist_values, _ = np.histogram(curv_smooth, bins=fixed_bins, density=False)
    hist_values = hist_values / np.sum(hist_values)  # normalize
    print (hist_values)
    return (hist_values)



In [None]:
# distnce histogram, normalized to reference length between corners

def calculate_dist_hist(edgeRawPts, refLength, norm_range, n_bins=13, resample_points=100):
   
    from scipy.interpolate import interp1d
    
    edgeRawPts = np.array(edgeRawPts)
    
    # Resample to fixed number of points
    col_coords = edgeRawPts[:, 0]
    row_coords = edgeRawPts[:, 1]
    
    # Create parameter along the curve
    distances_along_curve = np.sqrt(np.diff(col_coords)**2 + np.diff(row_coords)**2)
    cumulative_distance = np.concatenate([[0], np.cumsum(distances_along_curve)])
    t_original = cumulative_distance / cumulative_distance[-1]
    
    # Interpolate
    f_col = interp1d(t_original, col_coords, kind='linear')
    f_row = interp1d(t_original, row_coords, kind='linear')
    
    t_new = np.linspace(0, 1, resample_points)
    col_new = f_col(t_new)
    row_new = f_row(t_new)
    
    resampled_pts = np.column_stack([col_new, row_new])
    
    # Calculate vertical reference line
    first_col = resampled_pts[0, 0]
    last_col = resampled_pts[-1, 0]
    vertical_line_col = (first_col + last_col) / 2.0
    
    # Calculate distances (positive = left, negative = right)
    distances = vertical_line_col - resampled_pts[:, 0]
    
    # NORMALIZE by refLength
    distances_normalized = distances / refLength
    
    # Create histogram with normalized range
    histogram, bin_edges = np.histogram(distances_normalized, bins=n_bins, range=norm_range)
    
    # Calculate bin size
    bin_size = (norm_range[1] - norm_range[0]) / n_bins
    
    # Calculate bin centers
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    
    # Create bin labels
    bin_labels = []
    for i in range(n_bins):
        label = f"{bin_edges[i]:.3f} to {bin_edges[i+1]:.3f}"
        bin_labels.append(label)
    
    # Normalize to proportions
    histogram_normalized = histogram / (np.sum(histogram) + 1e-10)
    
    return histogram_normalized


In [57]:
# calculate feature vector for each edge
def analyze_edge (edgeMap,edgeRawPts):
    edgeRawPts = extract_edge_pts(edgeMap)
    smoothedEdgePts = smoothCurve (edgeRawPts)
    gap, midpoints, leftBound, rightBound = find_tab_midline(edgeMap)
    tabWidth,neckWidth, neckColDist, neckHeightFrac,tabColDist,tabHeightFrac,cornerCol,refLength = find_edge_features (edgeRawPts,gap,midpoints)
    curvature_histogram = calc_curvature_histogram(smoothedEdgePts)
    dist_hist = calculate_dist_hist(edgeRawPts, refLength,norm_range=(-0.2,0.4), n_bins=12,resample_points=100)

    # assemble feature vector
    feature_vector = []
    feature_vector.append(tabWidth)
    feature_vector.append(neckWidth)
    feature_vector.append(neckColDist)
    feature_vector.append(neckHeightFrac)
    feature_vector.append(tabColDist)
    feature_vector.append(tabHeightFrac)
    feature_vector.extend(curvature_histogram)
    feature_vector.extend(dist_hist)
    return feature_vector

slot_feature_vectors = []
for i in range (len(allSlots)):
    edgeMap = allSlots[i][0]
    edgeRawPts = extract_edge_pts(edgeMap)
    current_fv= analyze_edge (edgeMap,edgeRawPts)
    slot_feature_vectors.append((allSlots[i][1],allSlots[i][2],current_fv)) # [image name, edge direction, feature vector]

tab_feature_vectors = []
for i in range (len(allTabs)):
    edgeMap = allTabs[i][0]
    edgeRawPts = extract_edge_pts(edgeMap)
    current_fv= analyze_edge (edgeMap,edgeRawPts)
    tab_feature_vectors.append((allTabs[i][1],allTabs[i][2],current_fv))


# save training examples

# # UNCOMMENT FOR TRAINING
# with open('slotFVsTrain.pkl', 'wb') as f:
#     pickle.dump(slot_feature_vectors, f)

# with open('tabFVsTrain.pkl', 'wb') as f:
#     pickle.dump(tab_feature_vectors, f)

# UNCOMMENT FOR TESTING
with open('slotFVsTest.pkl', 'wb') as f:
    pickle.dump(slot_feature_vectors, f)

with open('tabFVsTest.pkl', 'wb') as f:
    pickle.dump(tab_feature_vectors, f)

[0.         0.00701754 0.0245614  0.07719298 0.12280702 0.24210526
 0.30526316 0.10526316 0.08070175 0.02807018 0.00701754 0.        ]
[0.00353357 0.00353357 0.03180212 0.05300353 0.12014134 0.25441696
 0.30035336 0.12720848 0.06360424 0.03180212 0.00353357 0.00706714]
[0.00735294 0.00735294 0.02205882 0.06617647 0.10661765 0.28676471
 0.29779412 0.11397059 0.05882353 0.01470588 0.01838235 0.        ]
[0.01433692 0.01075269 0.01075269 0.06451613 0.10035842 0.29390681
 0.28315412 0.09318996 0.08960573 0.00716846 0.01433692 0.01792115]
[0.         0.         0.00682594 0.10580205 0.12286689 0.25938567
 0.27303754 0.12286689 0.10238908 0.00682594 0.         0.        ]
[0.0174216  0.01393728 0.03484321 0.03135889 0.1358885  0.28222997
 0.26480836 0.1358885  0.03135889 0.03832753 0.00696864 0.00696864]
[0.00357143 0.         0.025      0.03571429 0.12142857 0.28571429
 0.31428571 0.12857143 0.05714286 0.02142857 0.00357143 0.00357143]
[0.01107011 0.01107011 0.02214022 0.03690037 0.12546125