In [1]:
import numpy as np
import pandas as pd
import os
import datetime

In [2]:
pixel_lowest_activity = 10 # pixels below this level are considered "off"
frame_lowest_count = 1 # frames with fewer than this number of active pixels are considered blank
frame_highest_count = 2000 # frames with more than this number of active pixels are considered ... ? 

In `find_blank_mat_frames`, you seem to have set `frame_highest_count` to 63 (where you called it `MAXIMUM_PIXELS`). I've no idea what you're trying to achieve with this.

In [5]:
def splitall(path):
    """Split a file path into all its component parts"""
    allparts = []
    while 1:
        parts = os.path.split(path)
        if parts[0] == path:  # sentinel for absolute paths
            allparts.insert(0, parts[0])
            break
        elif parts[1] == path: # sentinel for relative paths
            allparts.insert(0, parts[1])
            break
        else:
            path = parts[0]
            allparts.insert(0, parts[1])
    return allparts

In [6]:
def mat_date_reader(d):
    """Read the timestamp in a mat csv file"""
    try:
        return datetime.datetime.strptime(d, '%Y-%m-%dT%H:%M:%S.%f%z')
    except ValueError:
        return datetime.datetime.strptime(d, '%Y-%m-%dT%H:%M:%S%z')

In [7]:
def remove_blank_frames(infile, outfile):
    """Find the first and last index that has 'activity'. Trim the file to only this portion, then write it out"""
    mframe = pd.read_csv(infile, 
                     converters={'Timestamp': mat_date_reader},
                    index_col='Frame')
    mframe_trimmed = mframe.drop(['Timestamp', 'Range Min (mmHg)', 'Range Max (mmHg)'], axis='columns')
    frame_activities = (mframe_trimmed > pixel_lowest_activity).sum(axis='columns')
    active_frames = frame_activities[(frame_activities > frame_lowest_count) & (frame_activities < frame_highest_count)]
    mn = active_frames.index.min()
    mx = active_frames.index.max()
    print(mframe.index.min(), mframe.index.max(), mn, mx)
    mframe.loc[mn:mx].to_csv(outfile)

In [8]:
signatures_f = {}
for root, dirs, files in os.walk(os.path.join('data', 'mat_data')):
    for file in files:
        if file.endswith('.csv'):
            subject_path = splitall(root)[2:]
            out_dir = os.path.join('data', 'trimmed_mat_data', *subject_path)
            try:
                os.makedirs(out_dir)
            except FileExistsError:
                pass
            out_file = os.path.join(out_dir, file)
            # print(root, file, out_file) # uncomment this to see what files are being trimmed
            remove_blank_frames(os.path.join(root, file), out_file)

1 581 48 463
1 377 44 270
1 324 52 238
1 324 52 238
1 408 76 307
1 355 60 241
1 473 42 352
1 393 40 292
1 343 58 250
1 499 66 369
1 345 48 262
1 283 50 198
1 392 51 273
1 439 37 265
1 306 1 208
1 355 46 267
1 355 46 267
1 304 50 145
1 340 44 118
1 340 44 118
1 288 37 129
1 288 37 129
1 385 38 271
1 385 168 277
1 664 31 664
1 627 51 627
1 416 34 416
1 568 1 451
1 409 1 284
1 299 1 167
1 634 282 494
1 349 156 319
1 379 161 257
1 580 169 580
1 380 60 380
1 252 53 252
1 401 62 401
1 379 88 379
1 266 87 266
1 814 74 758
1 1049 403 932
1 560 51 461
1 427 47 400
1 342 63 342
1 330 42 330
1 361 44 361
1 366 1 366
1 278 52 278
1 291 48 291
1 369 39 369
1 274 1 274
1 293 51 293
1 322 69 189
1 283 63 283
1 277 80 277
1 808 61 808
1 684 39 592
1 359 71 359
1 313 37 210
1 828 43 714
1 519 1 419
1 683 1 543
1 593 1 460
1 958 1 829
1 1379 1 1273
1 1114 1 1114
1 907 45 782
1 1293 51 1232
1 1633 77 1633
1 2107 188 2107
1 539 1 462
1 580 1 498
1 519 41 409
1 434 41 318
1 428 38 332
1 328 51 328
1 491 65