### The purpose of the manual classification code is to provide to the CNN model more images to work with. The CNN's accuracy will improve when it is fed with images that it hasn't processed already.

In [1]:
import imageio as iio
import glob
import pandas as pd
import numpy as np
from numpy.polynomial.polynomial import polyfit
import matplotlib.pyplot as plt
import statistics as stats
import os

Image processing functions:

In [2]:
'''function for processing individual image pixel data'''
def img_prc(file_path):
    img = iio.imread(file_path)
    rows = img.shape[0]
    
    # df to hold all pixel values of img
    df1 = pd.DataFrame(columns=['p1','p2','p3','y'])
    
    # create dfs of pixel values of each row and concatenate each to df_1
    for row in range(rows):
        df2 = (pd.DataFrame(img[row], columns=['p1','p2','p3','opacity'])).drop(columns=['opacity'])
        df2['y'] = row
        
        df1 = (pd.concat([df1, df2])).reset_index(drop=True)
        
    '''function for acquiring and storing in a df the x- and y-coordinates of blue dot midpoints'''
    def mid_x_coord(df):
        x = []
        y = []
        dfgx = df.groupby('x')
        x_values = (df['x'].drop_duplicates()).reset_index(drop=True)
        count = len(x_values)

        # from each x group, get middle x values
        for i in range(count):
            x_coord = x_values[i]
            dff = dfgx.get_group(x_coord)
            median_y = stats.median(dff['y'])
            x.append(x_coord)
            y.append(median_y)

        # replace df with current y values and new x values
        df = pd.DataFrame({'x': x, 'y': y})
        return df
        
    '''function for acquiring and storing in a df the y-coordinates of blue dot midpoints'''
    def mid_y_coord(df, rows):
        x = []
        y = []
        dfgy = df.groupby('y')

        # obtain the 3rd y-coordinate from each group
        for row in range(rows):
            df1 = (dfgy.get_group(row)).reset_index(drop=True)

            # filter out white pixel data
            df2 = df1[df1['p1']!=255]

            # check the number of rows of each df (a multiple of 5)
            df2_obs = df2.shape[0]

            # count multiples of 5 in each group
            df2_mlt = int(df2_obs / 5)

            # take median once for 5 observations, twice for 10 observations, three times for 15 observations, etc.
            for i in range(df2_mlt):
                df3 = df2.iloc[:5]
                indices = df3.index.values
                median_x = stats.median(indices)
                x.append(median_x)
                y.append(row)

                # "move" df2 down 5 rows
                df2 = df2.iloc[5:]

        # replace df with obtained x and y values
        df = (((pd.DataFrame({'x': x, 'y': y}))).sort_values(by=['x'])).reset_index(drop=True)

        # reset y values to be read upward from botton
        df['y'] = (df['y'].sub(199)).mul(-1)

        return df
        
    # obtain x- and y- coordinates of the midpoint of each blue dot
    df1 = mid_y_coord(df1, rows)
    df1 = mid_x_coord(df1)
    
    # pass in df1 for slope processing
    classification = classify(df1)
    
    return classification

Pre- and post-classification functions:

In [3]:
def img_bulk_prc(s):
    df = pd.DataFrame({'files': s})
    df['classification'] = df['files'].apply(lambda x: img_prc(x))
    return df

'''function for outputting the classification of the processed image data using calculated slope'''
def classify(df):
    # get slope from df
    y_int, slope = polyfit(x=df['x'], y=df['y'], deg=1)
    
    if slope < 0:
        return 'echolocation'
    else:
        return 'abnormal'
    
def start_classification(lst):
    s = pd.Series(lst)
    df = img_bulk_prc(s)
    return df

Example run of current code:

In [4]:
os.getcwd()
os.chdir('C:\\Users\\theot\\Desktop\\Uni Stuff\\CSC490\\Bat_Echolocation_2019')

In [5]:
lst = ['../Bat_Echolocation_2019/data/pulses/56110632.20#_0.png', 
       '../Bat_Echolocation_2019/data/pulses/56110632.20#_1.png', 
       '../Bat_Echolocation_2019/data/pulses/56110632.20#_2.png', 
       '../Bat_Echolocation_2019/data/pulses/56110632.20#_3.png', 
       '../Bat_Echolocation_2019/data/pulses/56110632.20#_4.png']
df = start_classification(lst)
df

Unnamed: 0,files,classification
0,../Bat_Echolocation_2019/data/pulses/56110632....,echolocation
1,../Bat_Echolocation_2019/data/pulses/56110632....,echolocation
2,../Bat_Echolocation_2019/data/pulses/56110632....,echolocation
3,../Bat_Echolocation_2019/data/pulses/56110632....,echolocation
4,../Bat_Echolocation_2019/data/pulses/56110632....,echolocation
