In [52]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from numpy.polynomial.polynomial import polyfit
import os
import pandas as pd
import glob
from util.bat import *
import csv
import gc
from scipy import interpolate
from scipy.signal import savgol_filter
import time
import json
import multiprocessing

# Set some Pandas options
pd.set_option('notebook_repr_html', True)
pd.set_option('max_columns', 30)
pd.set_option('max_rows', 20)

def rough_classify(datadir, outdir):
    filenames = glob.glob(datadir + '/**/*#', recursive=True)
    fig, axes = plt.subplots()
    a = 0
    c = 0
    for filename in filenames:
        raw = list(extract_anabat(filename))
        graph = clean_graph(filename = filename, graph=[raw[0], raw[1]])
        metadata = raw[3]
        
        if "date" in metadata:
            metadata["date"] = metadata["date"].decode()
        if "timestamp" in metadata:
            metadata["timestamp"] = str(metadata["timestamp"])
        
        s = int(round(time.time() * 1000))
        
        for i, pulse in enumerate(graph):
            
            x = [point[0] for point in pulse]
            y = [point[1] for point in pulse]
            
            if polyfit(x, y, 1)[1] < 0:
                classification = '/echolocation/'
            else:
                classification = '/abnormal/'
                
            pulse_name = outdir + classification + filename.rsplit(".", 1 )[0].rsplit("\\", 1)[-1]  + '_' + str(i)

            axes.axis('off')
            axes.scatter(x, y)
            
            fig.savefig(pulse_name + ".png", transparent=True, dpi=50)
            f = open(pulse_name, "w+")
            f.write(json.dumps(metadata))
            f.close()
            
            plt.cla()
            gc.collect()
            
        a += int(round(time.time() * 1000)) - s
        c += 1
    print (a/c)

# Written by Bety Rostandy
def get_batcall_zc_csv(datadir,outdir):
    """
        Given a folder directory extract zero crossing data and create .csv files
    """
    
    filenames = glob.glob(datadir + '/**/*#', recursive=True)
    info = list()
    
    for filename in filenames:
        temp = list(extract_anabat(filename))
        temp.append(filename)
        info.append(temp)
        
    for i, item in enumerate (filenames):
        t=info[i][0]
        freq=info[i][1]
        ampl=info[i][2] #Empty array
        metadata=info[i][3]
        filename=np.repeat(info[i][4][-12:],len(t))
    
        print(metadata)
        columns=["Time", "Frequency"]
        batcall_df=pd.DataFrame({'Time':t,'Frequency':freq}, columns=columns)
        batcall_df.to_csv(outdir + '/' + info[i][4][-12:] + ".csv", index=False)
        
def get_batcall_zc_png(datadir, outdir):
    """
        Given a folder directory extract csv and create .png files
    """
    filenames = glob.glob(datadir + '/*.csv')
    fig, axes = plt.subplots()
    
    for filename in filenames:
        raw = list(extract_anabat(filename))
        graph = clean_graph(graph=[[bit[0], bit[1]] for bit in raw])
        for i, pulse in enumerate(graph):
            x = [point[0] * 1000 for point in pulse]
            y = [point[1] for point in pulse]
            axes.axis('off')
            axes.scatter(x, y)
            fig.savefig(outdir + '/' + filename.rsplit(".", 1 )[0].rsplit("\\", 1)[-1]  + '_' + str(i) + ".png", transparent=True, dpi=50)
            plt.cla()
            gc.collect()
    
def get_batcall_csv_png(datadir,outdir):
    """
        Given a folder directory extract csv and create .png files
    """
    
    filenames = glob.glob(datadir + '/*.csv')
    fig, axes = plt.subplots()
    
    for filename in filenames:
        graph = clean_graph(filename=filename)
        for i, pulse in enumerate(graph):
            x = [point[0] for point in pulse]
            y = [point[1] for point in pulse]
            axes.axis('off')
            axes.scatter(x, y)
            fig.savefig(outdir + '/' + filename.rsplit(".", 1 )[0].rsplit("\\", 1)[-1]  + '_' + str(i) + ".png", transparent=True, dpi=50)
            plt.cla()
            gc.collect()

def clean_graph(filename ='P7132033_37', graph=None, dy_cutoff = 1800, dx_cutoff = .2, pulse_size = 40):
    if graph is None:
        # Load file into 2d list
        with open(filename, 'r') as f:
            reader = csv.reader(f)
            next(reader)
            zc_str = list(reader)
    else:
        zc_str = graph

    # Format zc_str to floats
    zc_x = graph[0]
    zc_y = graph[1]

    # Distance functions
    def dist(ax, ay, bx, by):
        return np.sqrt((ax - bx)**2 + (ay - by)**2)
    def dista(pair):
        return dist(pair[0][0], pair[0][1], pair[1][0], pair[1][1])

    # Identify pulses
    graph = list()
    pulse = list()
    prev_x = 0
    for x, y in zip(zc_x, zc_y):
        if x - prev_x <= dx_cutoff:
            pulse.append([x, y])
        elif len(pulse) < pulse_size:
            pulse = [[x, y]]
        else:
            graph.append(pulse)
            pulse = [[x, y]]
        prev_x = x

    # Get 1st derivative
    graph_dy = list()
    prev_y = 0
    for pulse in graph:
        dy = list()
        for x, y in pulse:
            dy.append(abs(y - prev_y))
            prev_y = y
        graph_dy.append(dy)

    # Smooth holes
    for dy, pulse in zip(graph_dy, graph):
        i = 1
        while i < (len(dy) - 2):
            if dy[i] > dy_cutoff:
                if dy[i - 1] < dy_cutoff:
                    if dy[i + 1] < dy_cutoff:
                        pulse[i][1] = (pulse[i - 1][1] + pulse[i + 1][1])/2
                    elif dy[i + 2] < dy_cutoff:
                        pulse[i][1] = (pulse[i - 1][1] + pulse[i + 2][1])/2
                elif dy[i - 2] < dy_cutoff:
                    if dy[i + 1] < dy_cutoff:
                        pulse[i][1] = (pulse[i - 2][1] + pulse[i + 1][1])/2
                    elif dy[i + 2] < dy_cutoff:
                        pulse[i][1] = (pulse[i - 2][1] + pulse[i + 2][1])/2
            i += 1

    # Clean pulses
    clean_graph = list()
    for k, pulse in enumerate(graph):
        i = 1
        while i < len(pulse):
            j = i

            # Count neighboring points
            while j < len(pulse) - 1 and graph_dy[k][j] <= dy_cutoff:
                j += 1

            # If there are enough neighbors, it's good
            if j - i >= pulse_size:
                clean_graph.append(pulse[i:j])

            i = j + 1

    # Clean pulses more
    cleaner_graph = list()
    smooth_graph = list()
    for pulse in clean_graph:

        # Build smooth graph using Savitzky-Golay filter
        # Left param is all x values in current pulse, right param is smoothed y values
        # Params are zipped together then converted to list
        # This is the dark side of pythonic code
        smooth_pulse = list(zip([point[0] for point in pulse], savgol_filter([point[1] for point in pulse], 17, 3)))
        smooth_graph.extend(smooth_pulse)

        # Build clean pulse
        # Iterate through zipped list of smooth_pulse and pulse, producing [[ax, ay],[bx, by]]
        # Keep only those where the absolute distance between pair 1 and pair 2 is less than 1/2 dy_cutoff
        # This is the even darker side of pythonic code
        cleaner_graph.append([pair[0] for pair in zip(pulse, smooth_pulse) if dista(pair) < dy_cutoff / 2])

    return cleaner_graph

In [53]:
rough_classify("../../data/raw", "../../data/pulses")

0 4 0.0 1896.0
1 4 1896.0 3792.0
2 4 3792.0 5688.0
3 4 5688.0 7584.0


In [51]:
!pip install multiprocess

Collecting multiprocess
  Downloading https://files.pythonhosted.org/packages/ca/44/1c24627fe1462b6860012d4f9f7d6da18789f9feca8263b55fbf9371f875/multiprocess-0.70.7-cp36-cp36m-win_amd64.whl (106kB)
Collecting dill>=0.2.9 (from multiprocess)
  Downloading https://files.pythonhosted.org/packages/fe/42/bfe2e0857bc284cbe6a011d93f2a9ad58a22cb894461b199ae72cfef0f29/dill-0.2.9.tar.gz (150kB)
Building wheels for collected packages: dill
  Building wheel for dill (setup.py): started
  Building wheel for dill (setup.py): finished with status 'done'
  Stored in directory: C:\Users\Hadi Soufi\AppData\Local\pip\Cache\wheels\5b\d7\0f\e58eae695403de585269f4e4a94e0cd6ca60ec0c202936fa4a
Successfully built dill
Installing collected packages: dill, multiprocess
Successfully installed dill-0.2.9 multiprocess-0.70.7
