In [22]:
from preprocessors.bilateral_filter import bilateral_filter
from preprocessors.gaussian_blur import gaussian_blur
from preprocessors.noise_reduction import denoise_image
from preprocessors.average_blur import layered_average_blur

import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import loadmat
import json

import re
import ast

In [9]:
import os
# get the cwd of this notebook
cwd = os.getcwd()
pwd = os.path.abspath(os.path.join(cwd, os.pardir))
pwd

'/Users/aarjavjain/Desktop/Education/Y4/PRJ/final-implementation/backend/preprocessing'

In [5]:
def load_data(path):
    if not path.endswith('.mat'):
        raise ValueError('File must be a .mat file')
    
    mat_contents = loadmat(path)

    # Access the TVDdata struct
    tvd_data = mat_contents['TVDdata'][0, 0] # Assuming TVDdata is a 1x1 struct

    # Extract the fields from TVDdata
    fnum = tvd_data['Fnum'][0, 0] # Numerical value
    im = tvd_data['Im'] # 3D array
    return fnum, im

In [6]:
data_dir_path = "YOUR DATA DIRECTORY PATH HERE"
file_name = "YOUR MAT FILE NAME HERE"
file_path = data_dir_path + file_name
fnum, im = load_data(file_path)

In [7]:
frames = np.linspace(0, fnum-1, 10, dtype=int)

In [8]:
def sort_points_clockwise(points):
    # Calculate the centroid of the points
    centroid = np.mean(points, axis=0)
    
    # Calculate the angles of each point with respect to the centroid
    def angle_with_centroid(point):
        delta = point - centroid
        return np.arctan2(delta[1], delta[0])
    
    # Sort the points by the angles
    sorted_points = sorted(points, key=angle_with_centroid)
    
    return np.array(sorted_points)

In [9]:
def read_roi(file_name, filepath, frame_num):
    df = pd.read_csv(filepath)
    file_name = file_name.split('.')[0]
    row = df.loc[df['Filename'] == file_name.split('.')[0]]
    pts = np.asarray((json.loads(row[f'Frame{frame_num}'].values[0])))

    pts = sort_points_clockwise(pts)

    return pts

In [10]:
def read_extracted(file_name, file_path, frame_num, kernel, method):
    extracted_df = pd.read_csv(file_path)
    extracted_df['Filename'] = extracted_df['Filename'].apply(lambda x: x.split('.')[0])
    extracted_df = extracted_df[extracted_df['Filename'] == file_name.split('.')[0]]
    extracted_df = extracted_df[(extracted_df['Kernel'] == kernel)]
    extracted_df = extracted_df[(extracted_df['Method'] == method)]
    
    array_str = extracted_df[f'Frame{frame_num}'].values[0]
    # Adjust the regex to handle spaces before negative numbers as well
    array_str_commas = re.sub(r'(\d)\s+(-?\d)', r'\1, \2', array_str)

    # Replace newline characters with ', ' to correctly format the list
    array_str_final = re.sub(r'\]\s*\[', '], [', array_str_commas)
    
    try:
        list_from_str = ast.literal_eval(array_str_final)
    except ValueError as e:
        print(array_str)
        print(array_str_final)
        print(f"Error converting string to list: {e}")
    
    extracted_roi = np.array(list_from_str)

    extracted_roi = sort_points_clockwise(extracted_roi)
    
    return extracted_roi

In [57]:
file_path = 'gaussian_data.csv'
kernel = 3
method = 'avg_min_intensity'
technique = 'gaussian'
frame_num = 2
extracted_df = pd.read_csv(file_path)
extracted_df['Filename'] = extracted_df['Filename'].apply(lambda x: x.split('.')[0])
extracted_df = extracted_df[extracted_df['Filename'] == file_name.split('.')[0]]
extracted_df = extracted_df[(extracted_df['Kernel'] == kernel)]
extracted_df = extracted_df[(extracted_df['Method'] == method)]

array_str = extracted_df[f'Frame{frame_num}'].values[0]
# Adjust the regex to handle spaces before negative numbers as well
array_str_commas = re.sub(r'(\d)\s+(-?\d)', r'\1, \2', array_str)

# Replace newline characters with ', ' to correctly format the list
array_str_final = re.sub(r'\]\s*\[', '], [', array_str_commas)

try:
    list_from_str = ast.literal_eval(array_str_final)
except ValueError as e:
    print(array_str)
    print(array_str_final)
    print(f"Error converting string to list: {e}")

extracted_roi = np.array(list_from_str)

extracted_roi = sort_points_clockwise(extracted_roi)

In [91]:
roi = read_roi(file_name, '../roi_data.csv', 3)
gaussian_q1 = read_extracted(file_name, 'gaussian_data.csv', 3, 31, 'q1')

In [92]:
gaussian_q1

array([[  0,  56],
       [529,  37],
       [529,  64],
       [  0, 152]])

In [60]:
from shapely.geometry import Polygon

def calculate_IoU(roi, extracted):
    poly1 = Polygon(roi)
    poly2 = Polygon(extracted)

    # Calculate intersection and union areas
    intersection_area = poly1.intersection(poly2).area
    union_area = poly1.union(poly2).area

    # Calculate IoU
    iou = intersection_area / union_area

    return iou, intersection_area, union_area

In [26]:
calculate_IoU(roi, gaussian_q1)

(0.6414667752913611, 68714.53803670402, 107120.96196329598)

In [61]:
rois = []
for frame in range(1, 11):
    roi = read_roi(file_name, '../roi_data.csv', frame)
    rois.append(roi)
rois = sorted(rois, key=lambda x: Polygon(x).area)
rois = np.array(rois)
roi_median = rois[4]

In [62]:
# Calculate IoU for all frames (1 to 10) and techniques and methods for a given file and save the results in a dataframe

def calculate_IoU_all_frames(file_name, roi_file_path, extracted_file_path):
    iou_df = pd.DataFrame(columns=['Filename','Frame', 'Technique', 'Method', 'IoU', 'Intersection Area', 'Union Area'])
    for frame in range(1, 11):
        roi = read_roi(file_name, roi_file_path, frame)
        technique = 'gaussian'
        for method in ['q1', 'avg_min_intensity']:
            extracted = read_extracted(file_name, extracted_file_path, frame, method)
            if len(extracted) == 0:
                # Set IoU and areas to 0 if no fascicle is extracted
                # iou_df.loc[len(iou_df)] = [file_name, frame, technique, method, 0, 0, 0]
                continue
            else:
                iou, intersection_area, union_area = calculate_IoU(roi, extracted)
                iou_df.loc[len(iou_df)] = [file_name, frame, technique, kernel, method, iou, intersection_area, union_area]
    return iou_df

In [93]:
# Calculate IoU for all frames (1 to 10) and techniques and methods for a given file and save the results in a dataframe

def calculate_IoU_all_frames(file_name, roi_file_path, extracted_file_path):
    iou_df = pd.DataFrame(columns=['Filename','Frame', 'Technique', 'Kernel', 'Method', 'IoU', 'Intersection Area', 'Union Area'])
    rois = []
    for frame in range(1, 11):
        roi = read_roi(file_name, roi_file_path, frame)
        rois.append(roi)
    rois = sorted(rois, key=lambda x: Polygon(x).area)
    rois = np.array(rois)
    roi_median = rois[len(rois)//2]
    # for technique in ['denoise', 'bilateral', 'gaussian', 'layered', 'binary']:
    technique = 'gaussian'
    for kernel in range(3, 32, 2):
        for method in ['q1', 'avg_min_intensity']:
            extracts = []
            for frame in range(1, 11):
                extracted = read_extracted(file_name, extracted_file_path, frame, kernel, method)
                if len(extracted) > 0:
                    extracts.append(extracted)

            if len(extracts) > 0:
                extracts.sort(key=lambda x: Polygon(x).area)
                extracts = np.array(extracts)
                if len(extracts) == 0:
                    continue
                # Take the q3 value of extracts (0, q1, median, q3, max)
                extracted_q3 = np.percentile(extracts, 75, axis=0)
                roi_median = Polygon(roi_median).buffer(0)
                extracted_q3 = Polygon(extracted_q3).buffer(0)
                # print(extracted_q3)
                iou, intersection_area, union_area = calculate_IoU(roi_median, extracted_q3)
                iou_df.loc[len(iou_df)] = [file_name, frame, technique, kernel, method, iou, intersection_area, union_area]
            # extracted_median = extracts[len(extracts)//2]
            # iou, intersection_area, union_area = calculate_IoU(roi_median, extracted_median)
            # iou_df.loc[len(iou_df)] = [file_name, frame, technique, method, iou, intersection_area, union_area]
                # iou, intersection_area, union_area = calculate_IoU(roi, extracted)
                # iou_df.loc[len(iou_df)] = [file_name, frame, technique, method, iou, intersection_area, union_area]
    return iou_df

In [64]:
# file_names is all files in the data_dir that end with .mat
import os
file_names = [f for f in os.listdir(data_dir_path) if f.endswith('.mat')]

In [65]:
file_names

['WW182S125GN10.mat',
 'WW141S050G0.mat',
 'WW172S125GN0.mat',
 'WW172S150G0.mat',
 'WW182S175G0.mat',
 'WW172S050G0.mat',
 'WW141S125GN10.mat',
 'WW182S075G0.mat',
 'WW141S150G0.mat',
 'WW172S175G0.mat',
 'WW141S125GP10.mat',
 'WW182S150G0.mat',
 'WW141S075G0.mat',
 'WW141S175G0.mat',
 'WW182S125GP10.mat',
 'WW172S075G0.mat',
 'WW182S050G0.mat',
 'WW141S125G0.mat',
 'WW182S100G0.mat',
 'WW172S125G0.mat',
 'WW141S100G0.mat',
 'WW182S125G0.mat',
 'WW172S100G0.mat',
 'WW172S125GP10.mat']

In [34]:
file_name = "WW182S075G0.mat"

In [68]:
# roi = read_roi(file_name, '../roi_data.csv', 3)
extracted = read_extracted(file_name, 'gaussian_data.csv', 3, 21, 'q1')

In [72]:
iou_df = calculate_IoU_all_frames(file_name, '../roi_data.csv', 'gaussian_data.csv')

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [73]:
# Append dataframe to existing csv file
def append_to_csv(df, file_path):
    with open(file_path, 'a') as f:
        df.to_csv(f, header=f.tell()==0, index=False)


In [94]:
for file_name in file_names:
    append_to_csv(calculate_IoU_all_frames(file_name, '../roi_data.csv', 'gaussian_data.csv'), 'gaussian_iou_results_ext.csv')

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=

## Evaluate IoU's

In [95]:
# Read iou_results.csv
iou_results = pd.read_csv('gaussian_iou_results_ext.csv')

In [96]:
# Number of duplicate rows in iou_results dataframe 
duplicate_rows = iou_results[iou_results.duplicated()]
duplicate_rows

Unnamed: 0,Filename,Frame,Technique,Kernel,Method,IoU,Intersection Area,Union Area


In [97]:
# Remove duplicate rows from iou_results dataframe
iou_results = iou_results.drop_duplicates()
iou_results.to_csv('gaussian_iou_results.csv', index=False)

In [103]:
# For each technique and method, calculate the average IoU across all frames and save the results in new dataframe
def calculate_average_iou(iou_results):
    avg_iou_df = pd.DataFrame(columns=['Technique', 'Method', 'Average IoU'])
    for kernel in range(3, 32, 2):
        for method in ['q1', 'avg_min_intensity']:
            avg_iou = iou_results[(iou_results['Kernel'] == kernel) & (iou_results['Method'] == method)]['IoU'].mean()
            avg_iou_df.loc[len(avg_iou_df)] = [kernel, method, avg_iou]
    return avg_iou_df

In [99]:
# For each technique, count how many times it appears in the iou_results dataframe
def count_kernel_appearances(iou_results):
    kernel_count_df = pd.DataFrame(columns=['Kernel', 'Count'])
    for kernel in range(3, 32, 2):
        count = iou_results[iou_results['Kernel'] == kernel].shape[0]
        kernel_count_df.loc[len(kernel_count_df)] = [kernel, count]
    return kernel_count_df

In [100]:
count_kernel_appearances(iou_results)

Unnamed: 0,Kernel,Count
0,3,48
1,5,48
2,7,48
3,9,48
4,11,48
5,13,48
6,15,48
7,17,48
8,19,48
9,21,48


In [101]:
# For each filename, technique and method, calculate the average IoU across all frames and save the results in new dataframe
def calculate_average_iou_per_file(iou_results):
    avg_iou_df = pd.DataFrame(columns=['Filename', 'Technique', 'Method', 'Average IoU'])
    for file_name in file_names:
        for technique in ['denoise', 'bilateral', 'gaussian', 'layered']:
            for method in ['q1', 'avg_min_intensity']:
                avg_iou = iou_results[(iou_results['Filename'] == file_name) & (iou_results['Technique'] == technique) & (iou_results['Method'] == method)]['IoU'].mean()
                avg_iou_df.loc[len(avg_iou_df)] = [file_name, technique, method, avg_iou]
    return avg_iou_df

In [104]:
avg_iou = calculate_average_iou(iou_results)
avg_iou.sort_values(by='Average IoU', ascending=False)

Unnamed: 0,Technique,Method,Average IoU
1,3,avg_min_intensity,0.689935
4,7,q1,0.668107
12,15,q1,0.652393
3,5,avg_min_intensity,0.638283
16,19,q1,0.63533
8,11,q1,0.63228
22,25,q1,0.632126
26,29,q1,0.631821
6,9,q1,0.628905
29,31,avg_min_intensity,0.628835


In [106]:
# Aggregate the average IoU results for each Kernel (combining q1 and avg_min_intensity) and save the results in a new dataframe (without index column)
def aggregate_average_iou(iou_results):
    avg_iou_df = pd.DataFrame(columns=['Kernel', 'Average IoU'])
    for kernel in range(3, 32, 2):
        avg_iou_q1 = iou_results[(iou_results['Kernel'] == kernel) & (iou_results['Method'] == 'q1')]['IoU'].mean()
        avg_iou_avg_min_intensity = iou_results[(iou_results['Kernel'] == kernel) & (iou_results['Method'] == 'avg_min_intensity')]['IoU'].mean()
        avg_iou_combined = (avg_iou_q1 + avg_iou_avg_min_intensity) / 2
        avg_iou_df.loc[len(avg_iou_df)] = [kernel, avg_iou_combined]
    return avg_iou_df

aggregate_average_iou(iou_results).sort_values(by='Average IoU', ascending=False)


Unnamed: 0,Kernel,Average IoU
0,3.0,0.655567
1,5.0,0.631618
11,25.0,0.629462
13,29.0,0.620526
2,7.0,0.614958
4,11.0,0.611554
12,27.0,0.610188
14,31.0,0.607115
10,23.0,0.606829
6,15.0,0.603592


In [20]:
avg_iou = calculate_average_iou_per_file(iou_results)

In [21]:
# Get the average iou where filename starts with WW172
avg_iou[avg_iou['Filename'].str.startswith('WW172')].sort_values(by='Average IoU', ascending=False)

Unnamed: 0,Filename,Technique,Method,Average IoU
79,WW172S175G0.mat,layered,avg_min_intensity,0.793298
180,WW172S100G0.mat,gaussian,q1,0.787336
23,WW172S125GN0.mat,layered,avg_min_intensity,0.784457
31,WW172S150G0.mat,layered,avg_min_intensity,0.760117
126,WW172S075G0.mat,layered,q1,0.754373
...,...,...,...,...
22,WW172S125GN0.mat,layered,q1,0.469614
187,WW172S125GP10.mat,bilateral,avg_min_intensity,0.425615
46,WW172S050G0.mat,layered,q1,0.351319
27,WW172S150G0.mat,bilateral,avg_min_intensity,0.338278


In [22]:
# Get the average iou where filename starts with WW182
avg_iou[avg_iou['Filename'].str.startswith('WW182')].sort_values(by='Average IoU', ascending=False)

Unnamed: 0,Filename,Technique,Method,Average IoU
95,WW182S150G0.mat,layered,avg_min_intensity,0.937464
151,WW182S100G0.mat,layered,avg_min_intensity,0.930928
172,WW182S125G0.mat,gaussian,q1,0.925762
88,WW182S150G0.mat,denoise,q1,0.924735
146,WW182S100G0.mat,bilateral,q1,0.918060
...,...,...,...,...
91,WW182S150G0.mat,bilateral,avg_min_intensity,0.441501
171,WW182S125G0.mat,bilateral,avg_min_intensity,0.438204
38,WW182S175G0.mat,layered,q1,
118,WW182S125GP10.mat,layered,q1,


In [23]:
# Get the average iou where filename starts with WW141
avg_iou[avg_iou['Filename'].str.startswith('WW141')].sort_values(by='Average IoU', ascending=False)

Unnamed: 0,Filename,Technique,Method,Average IoU
103,WW141S075G0.mat,layered,avg_min_intensity,0.972499
71,WW141S150G0.mat,layered,avg_min_intensity,0.914215
55,WW141S125GN10.mat,layered,avg_min_intensity,0.891069
85,WW141S125GP10.mat,gaussian,avg_min_intensity,0.876747
139,WW141S125G0.mat,bilateral,avg_min_intensity,0.823109
...,...,...,...,...
86,WW141S125GP10.mat,layered,q1,0.100212
70,WW141S150G0.mat,layered,q1,0.090416
14,WW141S050G0.mat,layered,q1,0.068358
110,WW141S175G0.mat,layered,q1,0.028536
