# Attention summary

In this notebook, we will process the attention datasets into summary datasets.

In [1]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import os

  return f(*args, **kwds)
  return f(*args, **kwds)


First we read in the data.

In [2]:
datadir = "/Users/jokedurnez/Box/CAFE Consortium/Heather Info for CAFE Physio Pilot"
# subject = 'WI_AMP_009'
# subcsv = os.path.join(datadir,
#                       'Preliminary Physio Wristband Data for Mollie',
#                       'Datavyu_Attention_csv',
#                       'Attention CSV files',
#                       '%s_Attention.csv'%subject)
subject = 'WI_AMP_005'
subcsv = os.path.join(datadir,
                      'Preliminary Physio Wristband Data for Mollie',
                      'Datavyu_Attention_csv',
                      '%s_Attention_SY.csv'%subject)
outdir = os.path.join(datadir,'preprocessed',subject)
if not os.path.exists(outdir):
    os.mkdir(outdir)
data = pd.read_csv(subcsv)

datacode = data[['TV.ordinal','TV.onset',
                 'TV.offset','TV.code01']].dropna()
data = data[['Attention.ordinal','Attention.onset',
             'Attention.offset','Attention.code01']]

data['ID'] = subject 

In [3]:
data.head()

Unnamed: 0,Attention.ordinal,Attention.onset,Attention.offset,Attention.code01,ID
0,0,25262,28866,TO,WI_AMP_005
1,1,28867,31144,O-Experimenter,WI_AMP_005
2,2,31145,34000,TO,WI_AMP_005
3,3,34001,36244,P,WI_AMP_005
4,4,36245,38386,O-Shoes,WI_AMP_005


In [4]:
datacode

Unnamed: 0,TV.ordinal,TV.onset,TV.offset,TV.code01
0,0.0,208896.0,929389.0,Baseline + NoTV
1,1.0,929390.0,1397535.0,ChildTV
2,2.0,1397536.0,1846642.0,AdultTV


The next step is to split the cells of the attention data where the condition changes.  We first define the cuts, then for each cut find the row where the condition changes, and then split up the condition.

In [5]:
data['transition'] = False

# split cells where condition changes
cuts = list(datacode['TV.onset'])+[(list(datacode['TV.offset'])[len(datacode)-1])]
for cut in cuts:
    chcell = np.where((cut > data['Attention.onset']) & (cut < data['Attention.offset']))[0]
    splitrow = data.iloc[chcell]
    cellA = {
        'Attention.ordinal': list(data.loc[chcell,'Attention.ordinal'])[0],
        'Attention.onset': list(data.loc[chcell,'Attention.onset'])[0],
        'Attention.offset': int(cut),
        'Attention.code01': list(splitrow['Attention.code01'])[0],
        'ID': subject,
        'transition': True
    }
    cellB = {
        'Attention.ordinal': list(data.loc[chcell,'Attention.ordinal'])[0]+0.5,
        'Attention.onset': int(cut),
        'Attention.offset': list(data.loc[chcell,'Attention.offset'])[0],
        'Attention.code01': list(splitrow['Attention.code01'])[0],
        'ID': subject,
        'transition': True        
    }
    data = data.drop(chcell)
    data = data.append(cellA,ignore_index=True).append(cellB,ignore_index=True)

data = data.sort_values(by='Attention.ordinal').reset_index(drop=True)

See for example below how we split up the row where the cut occurred...

In [6]:
data.iloc[17:22]

Unnamed: 0,Attention.ordinal,Attention.onset,Attention.offset,Attention.code01,ID,transition
17,17.0,169083,175202,O-Shoes,WI_AMP_005,False
18,18.0,175203,208896,TO,WI_AMP_005,True
19,18.5,208896,261732,TO,WI_AMP_005,True
20,19.0,261733,264486,TO,WI_AMP_005,False
21,20.0,264487,267274,O-Table,WI_AMP_005,False


Next we annotate all cells with the correct conditions...

In [7]:
for idx,row in datacode.iterrows():
    # annotate cells
    condtimes = (data['Attention.onset'] >= row['TV.onset']) & \
        (data['Attention.offset'] <= row['TV.offset'])
    data.loc[condtimes,'condition'] = row['TV.code01']

In [8]:
# add durations
data['duration'] = data['Attention.offset'] - data['Attention.onset']

See below how it successfully split up the cut in condition occurred...

In [9]:
data.iloc[17:22]

Unnamed: 0,Attention.ordinal,Attention.onset,Attention.offset,Attention.code01,ID,transition,condition,duration
17,17.0,169083,175202,O-Shoes,WI_AMP_005,False,,6119
18,18.0,175203,208896,TO,WI_AMP_005,True,,33693
19,18.5,208896,261732,TO,WI_AMP_005,True,Baseline + NoTV,52836
20,19.0,261733,264486,TO,WI_AMP_005,False,Baseline + NoTV,2753
21,20.0,264487,267274,O-Table,WI_AMP_005,False,Baseline + NoTV,2787


In [10]:
data['Attention.code.merged'] = data['Attention.code01'].apply(lambda y: y.split("-")[0])

### Group all data

In [11]:
grouped = data[['ID','duration','condition','Attention.code.merged']] \
    .groupby(['condition','Attention.code.merged','ID']) \
    .aggregate(['mean','count','median','sum'])
grouped.columns = ['mean','count','median','sum']

Add percentages of totaltime (in condition).

In [12]:
totaltimes = grouped[['sum']].groupby('condition').aggregate('sum')
totaltimes

Unnamed: 0_level_0,sum
condition,Unnamed: 1_level_1
AdultTV,449069
Baseline + NoTV,711805
ChildTV,466407


In [13]:
def add_percentage(row):
    time = row['sum']
    totaltime = totaltimes.loc[row.name[0],'sum']
    return time/totaltime

grouped['percentage'] = grouped.apply(add_percentage,axis=1)

In [14]:
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,count,median,sum,percentage
condition,Attention.code.merged,ID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AdultTV,O,WI_AMP_005,1892.0,3,611,5676,0.012639
AdultTV,P,WI_AMP_005,1129.5,4,900,4518,0.010061
AdultTV,TO,WI_AMP_005,8879.375,16,4912,142070,0.316366
AdultTV,TV,WI_AMP_005,19787.0,15,7139,296805,0.660934
Baseline + NoTV,I,WI_AMP_005,2005.0,4,2073,8020,0.011267
Baseline + NoTV,M,WI_AMP_005,4215.0,2,4215,8430,0.011843
Baseline + NoTV,O,WI_AMP_005,8080.652174,23,4079,185855,0.261104
Baseline + NoTV,P,WI_AMP_005,1563.0,12,1631,18756,0.02635
Baseline + NoTV,TO,WI_AMP_005,37749.538462,13,18257,490744,0.689436
ChildTV,O,WI_AMP_005,645.0,1,645,645,0.001383


In [15]:
grouped.to_csv(os.path.join(outdir,"ATTENTION_%s_summary.csv"%(subject)))

### Group data trimmed (without transition rows)

In [18]:
grouped = data[data.transition==False][['ID','duration','condition','Attention.code.merged']] \
    .groupby(['condition','Attention.code.merged','ID']) \
    .aggregate(['mean','count','median','sum'])
grouped.columns = ['mean','count','median','sum']

In [19]:
totaltimes = grouped[['sum']].groupby('condition').aggregate('sum')
totaltimes
def add_percentage(row):
    time = row['sum']
    totaltime = totaltimes.loc[row.name[0],'sum']
    return time/totaltime

grouped['percentage'] = grouped.apply(add_percentage,axis=1)

In [20]:
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,count,median,sum,percentage
condition,Attention.code.merged,ID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AdultTV,O,WI_AMP_005,2583.0,2,2583,5166,0.012019
AdultTV,P,WI_AMP_005,1129.5,4,900,4518,0.010511
AdultTV,TO,WI_AMP_005,8879.375,16,4912,142070,0.330529
AdultTV,TV,WI_AMP_005,19862.285714,14,6986,278072,0.646941
Baseline + NoTV,I,WI_AMP_005,2005.0,4,2073,8020,0.012171
Baseline + NoTV,M,WI_AMP_005,4215.0,2,4215,8430,0.012793
Baseline + NoTV,O,WI_AMP_005,8080.652174,23,4079,185855,0.282039
Baseline + NoTV,P,WI_AMP_005,1563.0,12,1631,18756,0.028463
Baseline + NoTV,TO,WI_AMP_005,36492.333333,12,16217,437908,0.664535
ChildTV,O,WI_AMP_005,645.0,1,645,645,0.001386


In [17]:
grouped.to_csv(os.path.join(outdir,"ATTENTION_%s_summary_trimmed.csv"%(subject)))