In [7]:
import numpy as np
import pandas as pd
import os
import datetime

In [18]:
pixel_lowest_activity = 10 # pixels below this level are considered "off"
frame_lowest_count = 1 # frames with fewer than this number of active pixels are considered blank
frame_highest_count = 2000 # frames with more than this number of active pixels are considered ... ? 

In `find_blank_mat_frames`, you seem to have set `frame_highest_count` to 63 (where you called it `MAXIMUM_PIXELS`). I've no idea what you're trying to achieve with this.

In [8]:
def splitall(path):
    """Split a file path into all its component parts"""
    allparts = []
    while 1:
        parts = os.path.split(path)
        if parts[0] == path:  # sentinel for absolute paths
            allparts.insert(0, parts[0])
            break
        elif parts[1] == path: # sentinel for relative paths
            allparts.insert(0, parts[1])
            break
        else:
            path = parts[0]
            allparts.insert(0, parts[1])
    return allparts

In [9]:
def mat_date_reader(d):
    """Read the timestamp in a mat csv file"""
    try:
        return datetime.datetime.strptime(d, '%Y-%m-%dT%H:%M:%S.%f%z')
    except ValueError:
        return datetime.datetime.strptime(d, '%Y-%m-%dT%H:%M:%S%z')

In [20]:
def remove_blank_frames(infile, outfile):
    """Find the first and last index that has 'activity'. Trim the file to only this portion, then write it out"""
    mframe = pd.read_csv(infile, 
                     converters={'Timestamp': mat_date_reader},
                    index_col='Frame')
    mframe_trimmed = mframe.drop(['Timestamp', 'Range Min (mmHg)', 'Range Max (mmHg)'], axis='columns')
    frame_activities = (mframe_trimmed > pixel_lowest_activity).sum(axis='columns')
    active_frames = frame_activities[(frame_activities > frame_lowest_count) & (frame_activities < frame_highest_count)]
    mn = active_frames.index.min()
    mx = active_frames.index.max()
    print(mframe.index.min(), mframe.index.max(), mn, mx)
    mframe.loc[mn:mx].to_csv(outfile)

In [19]:
signatures_f = {}
for root, dirs, files in os.walk(os.path.join('data', 'mat_data')):
    for file in files:
        if file.endswith('.csv'):
            subject_path = splitall(root)[2:]
            out_dir = os.path.join('data', 'trimmed_mat_data', *subject_path)
            try:
                os.makedirs(out_dir)
            except FileExistsError:
                pass
            out_file = os.path.join(out_dir, file)
            # print(root, file, out_file) # uncomment this to see what files are being trimmed
            remove_blank_frames(os.path.join(root, file), out_file)

data/mat_data/Blaine/Falls PM/BLFPM06 b6.csv data/trimmed_mat_data/Blaine/Falls PM/BLFPM06/b6.csv
1 179 1 179
data/mat_data/Blaine/Falls PM/BLFPM14 b14.csv data/trimmed_mat_data/Blaine/Falls PM/BLFPM14/b14.csv
1 144 1 144
data/mat_data/Blaine/Falls PM/BLFPM27 b27.csv data/trimmed_mat_data/Blaine/Falls PM/BLFPM27/b27.csv
1 153 1 153
data/mat_data/Blaine/Falls PM/BLFPM13 b13.csv data/trimmed_mat_data/Blaine/Falls PM/BLFPM13/b13.csv
1 284 1 284
data/mat_data/Blaine/Falls PM/BLFPM02 b2.csv data/trimmed_mat_data/Blaine/Falls PM/BLFPM02/b2.csv
1 254 1 254
data/mat_data/Blaine/Falls PM/BLFPM19 b19.csv data/trimmed_mat_data/Blaine/Falls PM/BLFPM19/b19.csv
1 161 1 161
data/mat_data/Blaine/Falls PM/BLFPM05 b5.csv data/trimmed_mat_data/Blaine/Falls PM/BLFPM05/b5.csv
1 334 1 334
data/mat_data/Blaine/Falls PM/BLFPM33 b33.csv data/trimmed_mat_data/Blaine/Falls PM/BLFPM33/b33.csv
1 167 1 167
data/mat_data/Blaine/Falls PM/BLFPM32 b32.csv data/trimmed_mat_data/Blaine/Falls PM/BLFPM32/b32.csv
1 143 1 14