### Script 3 – Event cleaning - Decisions

<b>Functions</b>: Define events, Criterion: >= 3 contiguous classifications, will throw out “noise” in data.

<b>Input</b>: smoothed file (600 TRs)
<br>
<br>
<u>Output Files</u>:
<br>
New meditation period with events (<600 TRs; step2_event_clean_short.csv file) 
<br>
Full meditation period with excluded data points, can see spaces between events of same type (600 TRs, step2_event_clean_all.txt)
<br> 
New meditation period with single zero between adjacent events of same type. For plotting purposes (<600 TRs; step2_event_clean_short_zero.csv file)
<br>
Index of excluded points (step2_event_exclude.txt file 000010000)
<br>
Index of included points (step2_event_include.txt file 111101111) – inverse of excluded

Identify % data points excluded (in notebook) – each subj loop, also print average across subjects

In [2]:
import os

import numpy as np
import pandas as pd
import scipy.io as sio

In [3]:
# set root directory
root_dir = "/Path/To/EMBODY_Study/Files"

In [5]:
# define subjects
subjects = [124]

# define analyses
analysis = "phase1_demo"

### Define Functions

In [6]:
def imports_subj_meditation(subj):

    # change directory to subject file
    os.chdir("%s/%s/results/step2/%d/"
             % (root_dir, analysis, subj))

    # read in subject's text file
    filename = "step2_3cat.txt"

    return filename

In [7]:
def read_txt_file(filename):
    """Read meditation file, returns array of digits
        filename: input is string (e.g., "step2_3cat.txt")
    """

    with open(filename, 'rw') as f:
        content = f.readlines()
        content = list(content[0])

        # filter items in content by whether or not they are a digit. if digit, convert to integer.
        content = [int(x) for x in content if x.isdigit()]

        return content

In [8]:
def write_txt_file(filename, to_write):

    with open(filename, 'w') as f:

        f.write(",".join(to_write))

In [12]:
def events(subj):
    """input is subject id (int) and smooth (True/False)
    
    output is three lists: surviving_events, included_events, excluded_events
    made where events of length 3 or greater are included."""
    
    # import meditation
    
    # navigate to subject's directory
    os.chdir("%s/%s/results/step2/%d/" % (root_dir, analysis, subj))  
    
    # smoothed classifcations file
    meditation = read_txt_file("step2_smooth.txt")
        
    surviving_events = []
    included_events = [None] * 600
    excluded_events = [None] * 600

    for i in range(2, len(meditation) - 2):

        current_value = meditation[i]

        next_1 = meditation[i + 1]
        next_2 = meditation[i + 2]

        prev_1 = meditation[i - 1]
        prev_2 = meditation[i - 2]
        

        if i == 2:
            if (current_value == prev_1) and (prev_1 == prev_2):

                surviving_events.append(prev_2)
                included_events[i - 2] = 1
                excluded_events[i - 2] = 0

                surviving_events.append(prev_1)
                included_events[i - 1] = 1
                excluded_events[i - 1] = 0

                surviving_events.append(current_value)
                included_events[i] = 1
                excluded_events[i] = 0

            else:
                included_events[i - 2] = 0
                excluded_events[i - 2] = 1

                included_events[i - 1] = 0
                excluded_events[i - 1] = 1

                included_events[i] = 0
                excluded_events[i] = 1


        elif (i > 2) and (i < 597):
            if (current_value == next_1) and (next_1 == next_2):
                surviving_events.append(current_value)
                included_events[i] = 1
                excluded_events[i] = 0

            elif (current_value == next_1) and (current_value == prev_1):
                surviving_events.append(current_value)
                included_events[i] = 1
                excluded_events[i] = 0

            elif (current_value != next_1) and (current_value == prev_1) and (prev_1 == prev_2):
                surviving_events.append(current_value)
                included_events[i] = 1
                excluded_events[i] = 0

            else:
                included_events[i] = 0
                excluded_events[i] = 1

        elif i == 597:

            if (current_value == next_1) and (next_1 == next_2):

                surviving_events.append(current_value)
                included_events[i] = 1
                excluded_events[i] = 0

                surviving_events.append(next_1)
                included_events[i + 1] = 1
                excluded_events[i + 1] = 0

                surviving_events.append(next_2)
                included_events[i + 2] = 1
                excluded_events[i + 2] = 0
                
            elif (current_value == prev_1) and (prev_1 == prev_2):
                
                surviving_events.append(current_value)
                included_events[i] = 1
                excluded_events[i] = 0
                
                if next_1 == current_value:
                    surviving_events.append(next_1)
                    included_events[i + 1] = 1
                    excluded_events[i + 1] = 0
                
                    if next_2 == current_value:
                        
                        surviving_events.append(next_2)
                        included_events[i + 2] = 1
                        excluded_events[i + 2] = 0
                    
                    else:
                        
                        included_events[i + 2] = 0
                        excluded_events[i + 2] = 1
                
                else:
                    included_events[i + 1] = 0
                    excluded_events[i + 1] = 1
                
            else:
                
                included_events[i] = 0
                excluded_events[i] = 1

                included_events[i + 1] = 0
                excluded_events[i + 1] = 1

                included_events[i + 2] = 0
                excluded_events[i + 2] = 1
                
                
    surviving_events = [str(x) for x in surviving_events]
    included_events = [str(x) for x in included_events]
    
    ### write file with percentage of events cleaned
    
    # take the sum of the total excluded events (all 1s in excluded_events)
    sum_excluded_events = sum(excluded_events)
    # total events is length of excluded_events
    total_events = len(excluded_events)
    # calculate & format percentage of events excluded after event cleaning
    percent_events_cleaned = pd.Series("{:.2f}".format(((float(sum_excluded_events) / total_events) * 100)))
    # write percentage to csv
    percent_events_cleaned.to_csv("percent_events_cleaned.csv")
    
    excluded_events = [str(x) for x in excluded_events]

    return (surviving_events, included_events, excluded_events)

In [14]:
percent_excluded_all_subjs_e3 = []

for subj in subjects:
    print("Writing 'step2_opt_event_clean.csv/txt'")
    events_df = pd.DataFrame(events(subj)[0])
    events_df.to_csv("%s/%s/results/step2/%s/step2_event_clean.csv" % (root_dir, analysis, subj))
    write_txt_file("%s/%s/results/step2/%s/step2_event_clean.txt" % (root_dir, analysis, subj), events(subj)[0])

    print("Writing 'step2_opt_include.csv/txt'")
    include_df = pd.DataFrame(events(subj)[1])
    include_df.to_csv("%s/%s/results/step2/%s/step2_include.csv" % (root_dir, analysis, subj))
    write_txt_file("%s/%s/results/step2/%s/step2_include.txt" % (root_dir, analysis, subj), events(subj)[1])

    print("Writing 'step2_opt_exclude.csv/txt")
    exclude_df = pd.DataFrame(events(subj)[2])
    exclude_df.to_csv("%s/%s/results/step2/%s/step2_exclude.csv" % (root_dir, analysis, subj))
    write_txt_file("%s/%s/results/step2/%s/step2_exclude.txt" % (root_dir, analysis, subj), events(subj)[2])


    # calculate number of events excluded from clean file & append to list for all subjects
    excluded_events_int = [int(x) for x in events(subj)[2]]
    included_events_int = [int(x) for x in events(subj)[1]]
    percent_excluded = (float(sum(excluded_events_int)) / 600) * 100
    percent_excluded_all_subjs_e3.append(percent_excluded)
    percent_excluded = round(percent_excluded, 2)
    print("")
    print(str(percent_excluded) + "%% of data points excluded from %d's clean (events3) dataset." % subj)
    print("")  

mean_excluded_e3 = round(np.mean(np.array(percent_excluded_all_subjs_e3)), 2)
print(str(mean_excluded_e3) + "%% average data points excluded from %s" % analysis)

print("")


Writing 'step2_opt_event_clean.csv/txt'
Writing 'step2_opt_include.csv/txt'
Writing 'step2_opt_exclude.csv/txt

22.67% of data points excluded from 124's clean (events3) dataset.

22.67% average data points excluded from phase1_demo

