In [4]:
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import csv
import ctypes
import json
import os
import re
import sys
import numpy as np
import pandas as pd
import shutil
import time
import zipfile
from os import listdir as ls
from os.path import basename as bname
from os.path import dirname as dname
from os.path import expanduser as xpu
from os.path import join
from pandas import DataFrame as df
from cimaq_utils import flatten
from cimaq_utils import loadimages

def NEW_prettyevents_utf8(maindir="~/extracted_eprime0_utf8",
                    outdir="~/cimaq_newsheets_utf8"):
# TrialNum ImageID Trial_part  onsetSec  durationSec
    maindir = xpu(maindir)
    
# Clear previous attempts
    p1 = pd.read_csv(join(maindir, "participants.tsv"),
                     sep='\t').set_index("sub-ID")[["sheetpaths"]]
    [shutil.rmtree(join(outdir, row[0])) for row in p1.iterrows()]

# Headers for each of the 3 '.txt' files (open-source '.edat2' (E-prime) equivalent)
    prefixes = ["Onset-Event-Encoding_CIMAQ_", "Output-Responses-Encoding_CIMAQ_",
                "Output_Retrieval_CIMAQ_"]
    EncOnsetCols = ["TrialNum", "Condition", "TrialNum_perCondi",
                  "ImageID", "Trial_part", "onsetSec", "durationSec"]

    allsheets = NEW_get_allsheetsdf()

    # Initiate empty list for each outputed variable for backup
    full_on_eve_enc, retsheets, fixnstimtimes, timings = [], [], [], []
    fixsheets, encsheets, s_ids, yallofems = [], [], [], []
    
    # Create output directories structure
    allsheets["enc_outpaths"] = [join(outdir, row[0], "events") for row in allsheetsdf.iterrows()]
    allsheets["ret_outpaths"] = [join(outdir, row[0], "behavioral") for row in allsheetsdf.iterrows()]
    [(os.makedirs(join(outdir, row[0], "events"), exist_ok=True),
     os.makedirs(join(outdir, row[0], "behavioral"), exist_ok=True))
     for row in allsheetsdf.iterrows()]
    def sheetspersub(allsheets, prefixes):
        return dict((pre[1], [[(row[0], row[1]['sheetpath'])
                                for row in allsheets.iterrows()
                                if row[1]['prefix'] == prefixes[pre[0]]]
                               for row in p1.iterrows()])
                     for pre in enumerate(prefixes))
    encsheetsA = [(row[0], row[1]['sheetpath'])
                  for row in allsheets.iterrows()
                  if row[1]['prefix'] == prefixes[0]]
        
    encsheetsB = [row[1]['sheetpath'] for row in allsheets.iterrows() if row[1]['prefix'] == prefixes[1]]
        
    retsheets = [row[1]['sheetpath'] for row in allsheets.iterrows()if row[1]['prefix'] == prefixes[2]]

    # Rows are individual sheets, 3 per participant
    for row in allsheetsdf.iterrows():
        if row[1]['prefix'] == prefixes[0]:

            newsheet = pd.read_fwf(row[1]['sheetpath'], encoding=row[1]['encoding'],
                header=None, sep='\t', names=EncOnsetCols).iloc[6:]

            # Identify stimulus & fixation cross as 2 possible conditions of a same trial
            # instead of separate (double) trials
            stimids = newsheet[["ImageID",
                               "TrialNum_perCondi"]].drop_duplicates(\
                               subset=["ImageID", "TrialNum_perCondi"]).reset_index(drop=True)
            s_ids.append((row[0]+"_"+row[1]['prefix']+"_stimids", stimids))
            newsheet.to_excel(join(maindir, "temp_"+row[0]+"_"+row[1]['prefix']+'.xlsx'))
            newsheet = pd.read_excel(join(\
                           maindir, "temp_"+row[0]+"_"+row[1]['prefix']+'.xlsx')).drop(\
                               ['TrialNum_perCondi', 'Condition'], axis=1)

            tempsheet = newsheet[['TrialNum', 'Trial_part', 'onsetSec', 'durationSec']]

            # Extract and concatenate relevant info
            fixsheet = df([row[1] for row in tempsheet.iterrows()
                                  if row[1]['Trial_part'] == 'Fixation'])
            timing = tempsheet.loc[[row[0] for row in tempsheet.iterrows()
                                     if row[0] not in fixsheet.index]]
            fixsheet = fixsheet.rename(columns={"onsetSec": "fixOnsetSec",
                                               "durationSec": "fixDurSec"})
            fixsheet = fixsheet.transpose().iloc[-2:].transpose(\
                           ).reset_index(drop=True)
#             fixsheets.append(((row[0]+"_"+row[1]['prefix']+"_fixsheet_", fixsheet)))

            timing = timing.rename(columns={"onsetSec": "stimOnsetSec",
                                            "durationSec": "stimDurSec"})
            timing = timing.transpose().iloc[-2:].transpose().reset_index(drop=True)
#             timings.append((row[0]+"_"+row[1]['prefix']+"_timing_", timing))
            fixnstimtime = pd.concat([timing, fixsheet], axis=1, sort=False)
#             fixnstimtimes.append((row[0]+"_"+row[1]['prefix']+"_fixnstimtime_", fixnstimtime))
            allofem = pd.concat([fixnstimtime, stimids], axis=1, sort=False)
            yallnametuple = (row[0]+"_"+row[1]['prefix'], allofem)
            yallofems.append(yallnametuple)
            row[1]['oldname'] = yallnametuple[0]

        if row[1]['prefix'] == prefixes[1]:
            encsheet = pd.read_csv(row[1]['sheetpath'],
                                   encoding=row[1]['encoding'],
                                   header=0,
                                   sep='\t').iloc[3:].fillna(False).rename(\
                           columns={"TrialNumber": "TrialNum",
                                    "Category": "Condition"}).set_index("TrialNum")
            encsheet = encsheet.drop(["TrialCode"], axis=1)
            encsheet["Condition"] = encsheet["Condition"].astype(\
                                        'str').replace({'CTL': '0', 'Enc': '1'})
            encnametuple = (row[0]+"_"+row[1]['prefix'],
                              encsheet.reset_index(drop=True))
            encsheets.append(encnametuple)
            row[1]['oldname'] = encnametuple[0]

        # Immediately removing last row since it was an Eprime error (St-Laurent, 2019)
        if row[1]['prefix'] == prefixes[2]:
            retsheet = pd.read_csv(row[1]['sheetpath'],
                                   encoding=row[1]['encoding'],
                                   header=0, sep='\t').iloc[:, :-1]
            retnametuple = (row[1]['prefix']+"_"+row[0],
                              retsheet.reset_index(drop=True))
            retsheets.append(retnametuple)
            row[1]['oldname'] = retnametuple[0]
        encoding = tuple(zip(sorted(yallofems), sorted(encsheets)))
        fullencsheets = [(item[0][0], pd.concat([item[0][1], item[1][1]], axis=1))
                        for item in encoding]
            
    return sorted(yallofems), sorted(encsheets), sorted(retsheets)

def main():
    yallofems, encsheets, retsheets = NEW_prettyevents_utf8()

if __name__ == '__main__':
    main()


KeyError: "None of [Index(['sheetpaths'], dtype='object')] are in the [columns]"

In [None]:
yallofems, encsheets, retsheets = NEW_prettyevents_utf8()