In [None]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [1]:
import pandas as pd
import numpy as np
from utilities import *

In [2]:
needed_sha = ['SHA'+str(i).rjust(2,'0') for i in range(1,11)]
needed_lga = ['LGA'+str(i).rjust(2,'0') for i in range(1,15)]

## Helper Functions

In [3]:
def get_latency(reward_timestamps):
    if len(reward_timestamps) == 0:
        return np.nan
    return reward_timestamps[0]


def get_mode(lst):
    if len(lst) == 0:
        return np.nan
    return mode(lst)


def get_bursts(reward_timestamps, interval=90):
    allBursts = []
    i = 0
    while i < len(reward_timestamps):
        oneBurst = []
        limit = reward_timestamps[i] + interval
        j = i
        while j < len(reward_timestamps) and reward_timestamps[j] <= limit:
            oneBurst.append(reward_timestamps[j])
            limit = reward_timestamps[j] + interval
            j += 1
        allBursts.append(oneBurst)
        i = j
    allBursts = [i for i in allBursts if len(i) > 1]
    return allBursts


def get_burst_max_reward(bursts):
    if len(bursts) == 0:
        return np.nan
    return max([len(i) for i in bursts])


def get_burst_mean_reward(bursts):
    rewards_fall_in_bursts = len(list(itertools.chain.from_iterable(bursts)))
    total_bursts = len(bursts)

    if total_bursts == 0:
        return np.nan

    return round(rewards_fall_in_bursts / total_bursts, 2)


def get_burst_pct_reward(reward_timestamps, bursts):
    rewards_fall_in_bursts = len(list(itertools.chain.from_iterable(bursts)))
    total_rewards = len(reward_timestamps)
    
    if total_rewards == 0:
        return np.nan
    
    return round(rewards_fall_in_bursts/total_rewards,2) * 100


def get_burst_to(bursts, timeout_timestamps):
    i,j = 0,0
    res = []
    while i < len(bursts):
        burst = bursts[i]
        end = burst[-1] + 20
        count = 0
        while j < len(timeout_timestamps) and timeout_timestamps[j] <= end:
            count += 1
            j += 1
        res.append(count)
        i += 1
    avg_timeouts = np.mean(res)
    return round(avg_timeouts,2)


def get_to_mode(reward_timestamps,timeout_timestamps):
    i,j = 0,0
    to_intervals = []
    while i < len(reward_timestamps) and j < len(timeout_timestamps):
        start = reward_timestamps[i]
        end = start + 20
        while j < len(timeout_timestamps) and timeout_timestamps[j] <= end:
            to_intervals.append(timeout_timestamps[j]-start)
            j += 1
        i += 1
    return get_mode(to_intervals)


def get_iii_mode(inter_infusion_interval):
    i = 0
    j = 1
    hist = dict()
    while i < len(inter_infusion_interval):
        limit = j*90
        _bin = []
        while i < len(inter_infusion_interval) and inter_infusion_interval[i] <= limit:
            _bin.append(inter_infusion_interval[i])
            i += 1
        hist[limit] = len(_bin)
        j += 1
    return max(hist, key=hist.get) if hist else np.nan


def partition_timestamps(trial_id, drug, timestamps):
    trial_type = trial_id[:3]
    drug = drug.lower()
    
    if trial_type == 'SHA':
        first, last = [0,600],[3600,7200]
        partitioned = [[],[]]
        i = 0
        while i < len(timestamps):
            val = timestamps[i]
            if val <= first[1]:
                partitioned[0].append(val)
            elif val > last[0] and val <= last[1]:
                partitioned[1].append(val)
            else:
                pass
            i+=1
    else:
        first = [0, 600]
        middle = [3600, 18000] 
        
        if drug == 'oxycodone':
            last = [39600, 43200]
        else:
            last = [18000, 21600]
        
        partitioned = [[],[],[]]
        i = 0
        while i < len(timestamps):
            val = timestamps[i]
            if val <= first[1]:
                partitioned[0].append(val)
            elif val > middle[0] and val <= middle[1]:
                partitioned[1].append(val)
            elif val > last[0] and val <= last[1]:
                partitioned[2].append(val)
            else:
                pass
            i+=1
    
    return partitioned


def unstack_data(df):
    rows = []
    rfids = [i for i in df.rfid.unique()]

    for rfid in rfids:
        temp = df[df.rfid==rfid]
        temp = temp.reset_index(drop=True)
        temp.sort_values(['trial_id'],inplace=True)
        temp.set_index('trial_id',inplace=True)
        temp_row = temp.iloc[:,1:].unstack().to_frame().sort_index(level=1).T
        temp_row.insert(loc=0, column='rfid', value=temp.rfid.unique())
        temp_row = temp_row.reset_index(drop=True)
        rows.append(temp_row)
        
    return rows

## Note table for filtering

In [4]:
note = pd.read_csv('/Users/yunyihuang/George Lab Dropbox/George_Lab/Experiments/DataStream/DataStream_Database_01-07-2023/Raw/trial_note.csv')
note.drop('#',axis=1,inplace=True)
note.head()

Unnamed: 0,rfid,subject,cohort,sex,drug,experiment_group,trial_id,start_date,code,to_do,note
0,933000320047006,M987,9,Male,Cocaine,Drug,LGA01,2019-10-08,Disconnect,Keep,
1,933000320186832,M1072,10,Male,Cocaine,Drug,LGA02,2020-01-14,Died,Discard,
2,933000320188172,M1565,15,Male,Cocaine,Drug,LGA01,2021-01-13,Tangled,Keep,
3,933000320188113,M1553,15,Male,Cocaine,Drug,LGA06,2021-01-21,Tangled,Discard,
4,933000320187895,M1572,15,Male,Cocaine,Drug,LGA11,2021-01-28,Sick/wound,Keep,


In [5]:
note['drug'] = note['drug'].str.lower()
note['to_do'] = note['to_do'].apply(lambda x: x.lower().strip())
note_filter = note[note['to_do']=='discard'][['rfid','trial_id','drug','to_do']]
note_filter.reset_index(drop=True,inplace=True)
note_filter.replace('SHOCK','SHOCK_V3',inplace=True)
note_filter.replace('PRE-SHOCK','PRESHOCK',inplace=True)
note_filter.replace('TREAMENT02','TREATMENT02',inplace=True)
note_filter.replace('TREAMENT03','TREATMENT03',inplace=True)
note_filter.replace('PRE-SHOCK','PRESHOCK',inplace=True)

## LGA

In [6]:
lga = pd.read_csv('/Users/yunyihuang/George Lab Dropbox/George_Lab/Experiments/DataStream/DataStream_Database_01-07-2023/Raw/trial_lga.csv')

lga['active_timestamps'] = lga['active_timestamps'].apply(deserialize_data)
lga['inactive_timestamps'] = lga['inactive_timestamps'].apply(deserialize_data)
lga['reward_timestamps'] = lga['reward_timestamps'].apply(deserialize_data)
lga['timeout_timestamps'] = lga['timeout_timestamps'].apply(deserialize_data)
lga_keep = pd.merge(lga, note_filter, how='left', left_on=['rfid','trial_id','drug'], right_on=['rfid','trial_id','drug'])
lga_keep = lga_keep[~(lga_keep['to_do']=='discard')]
lga_keep.drop(columns=['to_do'],inplace=True)

lga_keep = lga_keep[lga_keep['trial_id'].isin(needed_lga)]
cols_to_drop = ['box','start_time','end_time','start_date','end_date','active_timestamps','inactive_timestamps',
               'active_lever_presses','inactive_lever_presses']
lga_keep.drop(cols_to_drop,axis=1,inplace=True)
lga_keep.reset_index(drop=True,inplace=True)
lga_keep.head()

Unnamed: 0,rfid,subject,room,cohort,trial_id,drug,reward_presses,timeout_presses,reward_timestamps,timeout_timestamps
0,933000320499961,F1828,MTF134C,18,LGA03,cocaine,99,8.0,"[27.0, 214.0, 264.0, 585.0, 844.0, 2426.0, 251...","[220.0, 3632.0, 16915.0, 18333.0, 18853.0, 191..."
1,933000320188275,F1111,BSB273B,11,LGA04,oxycodone,195,30.0,"[194.0, 231.0, 287.0, 311.0, 441.0, 509.0, 732...","[231.0, 311.0, 732.0, 7821.0, 11996.0, 12155.0..."
2,933000320499812,F1418,MTF134C,14,LGA01,oxycodone,4,1.0,"[24241.0, 34987.0, 35966.0, 42620.0]",[24249.0]
3,933000120138654,F321,,3,LGA03,cocaine,1,,[20384.62],[]
4,933000120138654,F321,,3,LGA06,cocaine,3,,"[17298.46, 17328.62, 19537.96]",[]


In [7]:
# whole seesion
lga_keep['bursts'] = lga_keep['reward_timestamps'].apply(get_bursts)
lga_keep['inter_infusion_interval'] = lga_keep['reward_timestamps'].apply(lambda lst: [j - i for i, j in zip(lst[:-1], lst[1:])])
lga_keep['latency'] = lga_keep['reward_timestamps'].apply(get_latency)
lga_keep['burst_number'] = lga_keep['bursts'].apply(len)
lga_keep['burst_max_reward'] = lga_keep['bursts'].apply(get_burst_max_reward)
lga_keep['burst_mean_reward'] = lga_keep['bursts'].apply(get_burst_mean_reward)
lga_keep['burst_percent_reward'] = lga_keep.apply(lambda x: get_burst_pct_reward(x['reward_timestamps'],x['bursts']), axis=1)
lga_keep['burst_to'] = lga_keep.apply(lambda x: get_burst_to(x['bursts'],x['timeout_timestamps']), axis=1)
lga_keep['to_mode'] = lga_keep.apply(lambda x: get_to_mode(x['reward_timestamps'],x['timeout_timestamps']), axis=1)
lga_keep['iii_mode'] = lga_keep['inter_infusion_interval'].apply(get_iii_mode)
lga_keep.head()

Unnamed: 0,rfid,subject,room,cohort,trial_id,drug,reward_presses,timeout_presses,reward_timestamps,timeout_timestamps,bursts,inter_infusion_interval,latency,burst_number,burst_max_reward,burst_mean_reward,burst_percent_reward,burst_to,to_mode,iii_mode
0,933000320499961,F1828,MTF134C,18,LGA03,cocaine,99,8.0,"[27.0, 214.0, 264.0, 585.0, 844.0, 2426.0, 251...","[220.0, 3632.0, 16915.0, 18333.0, 18853.0, 191...","[[214.0, 264.0], [2426.0, 2513.0], [13268.0, 1...","[187.0, 50.0, 321.0, 259.0, 1582.0, 87.0, 287....",27.0,9,3.0,2.11,19.0,0.89,1.0,1620.0
1,933000320188275,F1111,BSB273B,11,LGA04,oxycodone,195,30.0,"[194.0, 231.0, 287.0, 311.0, 441.0, 509.0, 732...","[231.0, 311.0, 732.0, 7821.0, 11996.0, 12155.0...","[[194.0, 231.0, 287.0, 311.0], [441.0, 509.0],...","[37.0, 56.0, 24.0, 130.0, 68.0, 223.0, 100.0, ...",194.0,43,11.0,3.79,84.0,0.7,0.0,2160.0
2,933000320499812,F1418,MTF134C,14,LGA01,oxycodone,4,1.0,"[24241.0, 34987.0, 35966.0, 42620.0]",[24249.0],[],"[10746.0, 979.0, 6654.0]",24241.0,0,,,0.0,,8.0,10800.0
3,933000120138654,F321,,3,LGA03,cocaine,1,,[20384.62],[],[],[],20384.62,0,,,0.0,,,
4,933000120138654,F321,,3,LGA06,cocaine,3,,"[17298.46, 17328.62, 19537.96]",[],"[[17298.46, 17328.62]]","[30.159999999999854, 2209.34]",17298.46,1,2.0,2.0,67.0,0.0,,90.0


In [8]:
# first 10
lga_keep['reward_timestamps_first'] = lga_keep.apply(lambda x: partition_timestamps(x['trial_id'],x['drug'],x['reward_timestamps'])[0],axis=1)
lga_keep['timeout_timestamps_first'] = lga_keep.apply(lambda x: partition_timestamps(x['trial_id'],x['drug'],x['timeout_timestamps'])[0],axis=1)
lga_keep['bursts_first'] = lga_keep['reward_timestamps_first'].apply(get_bursts)
lga_keep['inter_infusion_interval_first'] = lga_keep['reward_timestamps_first'].apply(lambda lst: [j - i for i, j in zip(lst[:-1], lst[1:])])
lga_keep['burst_number_first'] = lga_keep['bursts_first'].apply(len)
lga_keep['burst_max_reward_first'] = lga_keep['bursts_first'].apply(get_burst_max_reward)
lga_keep['burst_mean_reward_first'] = lga_keep['bursts_first'].apply(get_burst_mean_reward)
lga_keep['burst_percent_reward_first'] = lga_keep.apply(lambda x: get_burst_pct_reward(x['reward_timestamps_first'],x['bursts_first']), axis=1)
lga_keep['burst_to_first'] = lga_keep.apply(lambda x: get_burst_to(x['bursts_first'],x['timeout_timestamps_first']), axis=1)
lga_keep['to_mode_first'] = lga_keep.apply(lambda x: get_to_mode(x['reward_timestamps_first'],x['timeout_timestamps_first']), axis=1)
lga_keep['iii_mode_first'] = lga_keep['inter_infusion_interval_first'].apply(get_iii_mode)
lga_keep.head()

Unnamed: 0,rfid,subject,room,cohort,trial_id,drug,reward_presses,timeout_presses,reward_timestamps,timeout_timestamps,...,timeout_timestamps_first,bursts_first,inter_infusion_interval_first,burst_number_first,burst_max_reward_first,burst_mean_reward_first,burst_percent_reward_first,burst_to_first,to_mode_first,iii_mode_first
0,933000320499961,F1828,MTF134C,18,LGA03,cocaine,99,8.0,"[27.0, 214.0, 264.0, 585.0, 844.0, 2426.0, 251...","[220.0, 3632.0, 16915.0, 18333.0, 18853.0, 191...",...,[220.0],"[[214.0, 264.0]]","[187.0, 50.0, 321.0]",1,2.0,2.0,50.0,1.0,6.0,270.0
1,933000320188275,F1111,BSB273B,11,LGA04,oxycodone,195,30.0,"[194.0, 231.0, 287.0, 311.0, 441.0, 509.0, 732...","[231.0, 311.0, 732.0, 7821.0, 11996.0, 12155.0...",...,"[231.0, 311.0]","[[194.0, 231.0, 287.0, 311.0], [441.0, 509.0]]","[37.0, 56.0, 24.0, 130.0, 68.0]",2,4.0,3.0,100.0,1.0,0.0,90.0
2,933000320499812,F1418,MTF134C,14,LGA01,oxycodone,4,1.0,"[24241.0, 34987.0, 35966.0, 42620.0]",[24249.0],...,[],[],[],0,,,,,,
3,933000120138654,F321,,3,LGA03,cocaine,1,,[20384.62],[],...,[],[],[],0,,,,,,
4,933000120138654,F321,,3,LGA06,cocaine,3,,"[17298.46, 17328.62, 19537.96]",[],...,[],[],[],0,,,,,,


In [9]:
# middle 60-300
lga_keep['reward_timestamps_middle'] = lga_keep.apply(lambda x: partition_timestamps(x['trial_id'],x['drug'],x['reward_timestamps'])[1],axis=1)
lga_keep['timeout_timestamps_middle'] = lga_keep.apply(lambda x: partition_timestamps(x['trial_id'],x['drug'],x['timeout_timestamps'])[1],axis=1)
lga_keep['bursts_middle'] = lga_keep['reward_timestamps_middle'].apply(get_bursts)
lga_keep['inter_infusion_interval_middle'] = lga_keep['reward_timestamps_middle'].apply(lambda lst: [j - i for i, j in zip(lst[:-1], lst[1:])])
lga_keep['burst_number_middle'] = lga_keep['bursts_middle'].apply(len)
lga_keep['burst_max_reward_middle'] = lga_keep['bursts_middle'].apply(get_burst_max_reward)
lga_keep['burst_mean_reward_middle'] = lga_keep['bursts_middle'].apply(get_burst_mean_reward)
lga_keep['burst_percent_reward_middle'] = lga_keep.apply(lambda x: get_burst_pct_reward(x['reward_timestamps_middle'],x['bursts_middle']), axis=1)
lga_keep['burst_to_middle'] = lga_keep.apply(lambda x: get_burst_to(x['bursts_middle'],x['timeout_timestamps_middle']), axis=1)
lga_keep['to_mode_middle'] = lga_keep.apply(lambda x: get_to_mode(x['reward_timestamps_middle'],x['timeout_timestamps_middle']), axis=1)
lga_keep['iii_mode_middle'] = lga_keep['inter_infusion_interval_middle'].apply(get_iii_mode)
lga_keep.head()

Unnamed: 0,rfid,subject,room,cohort,trial_id,drug,reward_presses,timeout_presses,reward_timestamps,timeout_timestamps,...,timeout_timestamps_middle,bursts_middle,inter_infusion_interval_middle,burst_number_middle,burst_max_reward_middle,burst_mean_reward_middle,burst_percent_reward_middle,burst_to_middle,to_mode_middle,iii_mode_middle
0,933000320499961,F1828,MTF134C,18,LGA03,cocaine,99,8.0,"[27.0, 214.0, 264.0, 585.0, 844.0, 2426.0, 251...","[220.0, 3632.0, 16915.0, 18333.0, 18853.0, 191...",...,"[3632.0, 16915.0]","[[13268.0, 13342.0], [15585.0, 15659.0, 15731....","[248.0, 367.0, 213.0, 220.0, 181.0, 193.0, 263...",3,3.0,2.33,10.0,0.67,1.0,450.0
1,933000320188275,F1111,BSB273B,11,LGA04,oxycodone,195,30.0,"[194.0, 231.0, 287.0, 311.0, 441.0, 509.0, 732...","[231.0, 311.0, 732.0, 7821.0, 11996.0, 12155.0...",...,"[7821.0, 11996.0, 12155.0, 13645.0, 14932.0, 1...","[[10532.0, 10577.0], [10766.0, 10808.0, 10842....","[409.0, 1385.0, 1168.0, 255.0, 262.0, 577.0, 2...",13,6.0,2.77,69.0,0.46,0.0,1440.0
2,933000320499812,F1418,MTF134C,14,LGA01,oxycodone,4,1.0,"[24241.0, 34987.0, 35966.0, 42620.0]",[24249.0],...,[],[],[],0,,,,,,
3,933000120138654,F321,,3,LGA03,cocaine,1,,[20384.62],[],...,[],[],[],0,,,,,,
4,933000120138654,F321,,3,LGA06,cocaine,3,,"[17298.46, 17328.62, 19537.96]",[],...,[],"[[17298.46, 17328.62]]",[30.159999999999854],1,2.0,2.0,100.0,0.0,,90.0


In [10]:
# last 60
lga_keep['reward_timestamps_last'] = lga_keep.apply(lambda x: partition_timestamps(x['trial_id'],x['drug'],x['reward_timestamps'])[2],axis=1)
lga_keep['timeout_timestamps_last'] = lga_keep.apply(lambda x: partition_timestamps(x['trial_id'],x['drug'],x['timeout_timestamps'])[2],axis=1)
lga_keep['bursts_last'] = lga_keep['reward_timestamps_last'].apply(get_bursts)
lga_keep['inter_infusion_interval_last'] = lga_keep['reward_timestamps_last'].apply(lambda lst: [j - i for i, j in zip(lst[:-1], lst[1:])])
lga_keep['burst_number_last'] = lga_keep['bursts_last'].apply(len)
lga_keep['burst_max_reward_last'] = lga_keep['bursts_last'].apply(get_burst_max_reward)
lga_keep['burst_mean_reward_last'] = lga_keep['bursts_last'].apply(get_burst_mean_reward)
lga_keep['burst_percent_reward_last'] = lga_keep.apply(lambda x: get_burst_pct_reward(x['reward_timestamps_last'],x['bursts_last']), axis=1)
lga_keep['burst_to_last'] = lga_keep.apply(lambda x: get_burst_to(x['bursts_last'],x['timeout_timestamps_last']), axis=1)
lga_keep['to_mode_last'] = lga_keep.apply(lambda x: get_to_mode(x['reward_timestamps_last'],x['timeout_timestamps_last']), axis=1)
lga_keep['iii_mode_last'] = lga_keep['inter_infusion_interval_last'].apply(get_iii_mode)
lga_keep.head()

Unnamed: 0,rfid,subject,room,cohort,trial_id,drug,reward_presses,timeout_presses,reward_timestamps,timeout_timestamps,...,timeout_timestamps_last,bursts_last,inter_infusion_interval_last,burst_number_last,burst_max_reward_last,burst_mean_reward_last,burst_percent_reward_last,burst_to_last,to_mode_last,iii_mode_last
0,933000320499961,F1828,MTF134C,18,LGA03,cocaine,99,8.0,"[27.0, 214.0, 264.0, 585.0, 844.0, 2426.0, 251...","[220.0, 3632.0, 16915.0, 18333.0, 18853.0, 191...",...,"[18333.0, 18853.0, 19167.0, 19167.0, 19167.0]","[[18332.0, 18369.0], [18470.0, 18547.0], [1959...","[37.0, 101.0, 77.0, 178.0, 127.0, 314.0, 426.0...",4,2.0,2.0,50.0,1.25,1.0,450.0
1,933000320188275,F1111,BSB273B,11,LGA04,oxycodone,195,30.0,"[194.0, 231.0, 287.0, 311.0, 441.0, 509.0, 732...","[231.0, 311.0, 732.0, 7821.0, 11996.0, 12155.0...",...,"[41983.0, 42042.0, 42043.0]","[[40013.0, 40071.0, 40133.0], [40375.0, 40463....","[58.0, 62.0, 104.0, 138.0, 88.0, 46.0, 23.0, 6...",5,11.0,6.2,97.0,0.6,0.0,990.0
2,933000320499812,F1418,MTF134C,14,LGA01,oxycodone,4,1.0,"[24241.0, 34987.0, 35966.0, 42620.0]",[24249.0],...,[],[],[],0,,,0.0,,,
3,933000120138654,F321,,3,LGA03,cocaine,1,,[20384.62],[],...,[],[],[],0,,,0.0,,,
4,933000120138654,F321,,3,LGA06,cocaine,3,,"[17298.46, 17328.62, 19537.96]",[],...,[],[],[],0,,,0.0,,,


In [11]:
# unstack and output final df
info_to_merge = lga_keep[['rfid','subject','drug']].drop_duplicates(ignore_index=True)
lga_keep.drop(['subject','room','cohort','drug'],axis=1,inplace=True)

rows = unstack_data(lga_keep)
lga_burst_unstacked = pd.concat(rows,ignore_index=True)
lga_burst_unstacked.columns = ['rfid'] + [i[0]+'_'+i[1] for i in lga_burst_unstacked.columns[1:]]
print(lga_burst_unstacked.shape)
lga_burst_unstacked.head()

lga_burst_final = pd.merge(lga_burst_unstacked, info_to_merge, how='left', on='rfid')
lga_burst_final = lga_burst_final[['rfid','subject','drug'] + lga_burst_final.columns[1:-2].tolist()]
lga_burst_final

(1853, 659)


Unnamed: 0,rfid,subject,drug,burst_max_reward_LGA01,burst_max_reward_first_LGA01,burst_max_reward_last_LGA01,burst_max_reward_middle_LGA01,burst_mean_reward_LGA01,burst_mean_reward_first_LGA01,burst_mean_reward_last_LGA01,...,reward_timestamps_middle_LGA14,timeout_presses_LGA14,timeout_timestamps_LGA14,timeout_timestamps_first_LGA14,timeout_timestamps_last_LGA14,timeout_timestamps_middle_LGA14,to_mode_LGA14,to_mode_first_LGA14,to_mode_last_LGA14,to_mode_middle_LGA14
0,933000320499961,F1828,cocaine,5.0,2.0,2.0,5.0,2.3,2.0,2.0,...,[],0.0,[],[],[],[],,,,
1,933000320188275,F1111,oxycodone,2.0,,,2.0,2.0,,,...,"[4991.0, 5037.0, 7480.0, 7584.0, 7644.0, 8936....",17.0,"[4991.0, 5037.0, 7488.0, 7489.0, 7499.0, 11464...",[],"[40030.0, 40200.0]","[4991.0, 5037.0, 7488.0, 7489.0, 7499.0, 11464...",0.0,,0.0,0.0
2,933000320499812,F1418,oxycodone,,,,,,,,...,"[4301.0, 4830.0, 6324.0, 7199.0, 7243.0, 7954....",14.0,"[320.0, 1537.0, 10084.0, 13616.0, 17340.0, 197...",[320.0],"[39610.0, 41747.0]","[10084.0, 13616.0, 17340.0]",6.0,1.0,6.0,18.0
3,933000120138654,F321,cocaine,,,,,,,,...,"[3864.19, 3972.56, 4171.44, 4494.59, 4661.95, ...",25.0,"[28.4, 40.98, 66.84, 83.3, 99.34, 100.48, 130....","[28.4, 40.98, 66.84, 83.3, 99.34, 100.48, 130....","[19153.74, 19465.24, 20773.23]","[3972.75, 6372.88, 11919.89, 14952.45, 15408.47]",3.32,3.32,0.36,0.19
4,933000320500007,F1829,cocaine,4.0,2.0,3.0,3.0,2.21,2.0,2.2,...,"[3703.0, 3845.0, 3967.0, 4175.0, 4316.0, 4362....",50.0,"[1293.0, 2415.0, 4362.0, 4363.0, 4363.0, 4363....",[],"[19141.0, 20318.0, 21046.0]","[4362.0, 4363.0, 4363.0, 4363.0, 4364.0, 4459....",1.0,,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1848,933000320525373,F2008,cocaine,,,,,,,,...,,,,,,,,,,
1849,933000320047893,M661,oxycodone,2.0,,,2.0,2.0,,,...,,,,,,,,,,
1850,933000120138631,F320,cocaine,4.0,3.0,3.0,4.0,2.4,3.0,2.5,...,"[3683.58, 3888.82, 3988.54, 4144.07, 4179.71, ...",29.0,"[4.22, 43.83, 47.61, 47.97, 55.41, 56.13, 108....","[4.22, 43.83, 47.61, 47.97, 55.41, 56.13, 108....",[],"[3988.76, 5305.54, 6453.88, 7959.71, 7959.92, ...",3.57,3.57,,0.22
1851,933000120138723,F117,oxycodone,4.0,3.0,3.0,2.0,2.25,3.0,2.5,...,,,,,,,,,,


In [12]:
lga_burst_final.to_csv('/Users/yunyihuang/Desktop/burst_calculation_LGA01-14.csv')

## SHA

In [None]:
sha = pd.read_csv('/Users/yunyihuang/George Lab Dropbox/George_Lab/Experiments/DataStream/DataStream_Database_01-07-2023/Raw/trial_sha.csv')

sha['active_timestamps'] = sha['active_timestamps'].apply(deserialize_data)
sha['inactive_timestamps'] = sha['inactive_timestamps'].apply(deserialize_data)
sha['reward_timestamps'] = sha['reward_timestamps'].apply(deserialize_data)
sha['timeout_timestamps'] = sha['timeout_timestamps'].apply(deserialize_data)
sha_keep = pd.merge(sha, note_filter, how='left', left_on=['rfid','trial_id','drug'], right_on=['rfid','trial_id','drug'])
sha_keep = sha_keep[~(sha_keep['to_do']=='discard')]
sha_keep.drop(columns=['to_do'],inplace=True)

sha_keep = sha_keep[sha_keep['trial_id'].isin(needed_sha)]
cols_to_drop = ['box','start_time','end_time','start_date','end_date','active_timestamps','inactive_timestamps',
               'active_lever_presses','inactive_lever_presses']
sha_keep.drop(cols_to_drop,axis=1,inplace=True)
sha_keep.reset_index(drop=True,inplace=True)
sha_keep.head()

In [None]:
# whole seesion
sha_keep['bursts'] = sha_keep['reward_timestamps'].apply(get_bursts)
sha_keep['inter_infusion_interval'] = sha_keep['reward_timestamps'].apply(lambda lst: [j - i for i, j in zip(lst[:-1], lst[1:])])
sha_keep['latency'] = sha_keep['reward_timestamps'].apply(get_latency)
sha_keep['burst_number'] = sha_keep['bursts'].apply(len)
sha_keep['burst_max_reward'] = sha_keep['bursts'].apply(get_burst_max_reward)
sha_keep['burst_mean_reward'] = sha_keep['bursts'].apply(get_burst_mean_reward)
sha_keep['burst_percent_reward'] = sha_keep.apply(lambda x: get_burst_pct_reward(x['reward_timestamps'],x['bursts']), axis=1)
sha_keep['burst_to'] = sha_keep.apply(lambda x: get_burst_to(x['bursts'],x['timeout_timestamps']), axis=1)
sha_keep['to_mode'] = sha_keep.apply(lambda x: get_to_mode(x['reward_timestamps'],x['timeout_timestamps']), axis=1)
sha_keep['iii_mode'] = sha_keep['inter_infusion_interval'].apply(get_iii_mode)
sha_keep.head()

In [None]:
# first 10
sha_keep['reward_timestamps_first'] = sha_keep.apply(lambda x: partition_timestamps(x['trial_id'],x['drug'],x['reward_timestamps'])[0],axis=1)
sha_keep['timeout_timestamps_first'] = sha_keep.apply(lambda x: partition_timestamps(x['trial_id'],x['drug'],x['timeout_timestamps'])[0],axis=1)
sha_keep['bursts_first'] = sha_keep['reward_timestamps_first'].apply(get_bursts)
sha_keep['inter_infusion_interval_first'] = sha_keep['reward_timestamps_first'].apply(lambda lst: [j - i for i, j in zip(lst[:-1], lst[1:])])
sha_keep['burst_number_first'] = sha_keep['bursts_first'].apply(len)
sha_keep['burst_max_reward_first'] = sha_keep['bursts_first'].apply(get_burst_max_reward)
sha_keep['burst_mean_reward_first'] = sha_keep['bursts_first'].apply(get_burst_mean_reward)
sha_keep['burst_percent_reward_first'] = sha_keep.apply(lambda x: get_burst_pct_reward(x['reward_timestamps_first'],x['bursts_first']), axis=1)
sha_keep['burst_to_first'] = sha_keep.apply(lambda x: get_burst_to(x['bursts_first'],x['timeout_timestamps_first']), axis=1)
sha_keep['to_mode_first'] = sha_keep.apply(lambda x: get_to_mode(x['reward_timestamps_first'],x['timeout_timestamps_first']), axis=1)
sha_keep['iii_mode_first'] = sha_keep['inter_infusion_interval_first'].apply(get_iii_mode)
sha_keep.head()

In [None]:
# last 60
sha_keep['reward_timestamps_last'] = sha_keep.apply(lambda x: partition_timestamps(x['trial_id'],x['drug'],x['reward_timestamps'])[1],axis=1)
sha_keep['timeout_timestamps_last'] = sha_keep.apply(lambda x: partition_timestamps(x['trial_id'],x['drug'],x['timeout_timestamps'])[1],axis=1)
sha_keep['bursts_last'] = sha_keep['reward_timestamps_last'].apply(get_bursts)
sha_keep['inter_infusion_interval_last'] = sha_keep['reward_timestamps_last'].apply(lambda lst: [j - i for i, j in zip(lst[:-1], lst[1:])])
sha_keep['burst_number_last'] = sha_keep['bursts_last'].apply(len)
sha_keep['burst_max_reward_last'] = sha_keep['bursts_last'].apply(get_burst_max_reward)
sha_keep['burst_mean_reward_last'] = sha_keep['bursts_last'].apply(get_burst_mean_reward)
sha_keep['burst_percent_reward_last'] = sha_keep.apply(lambda x: get_burst_pct_reward(x['reward_timestamps_last'],x['bursts_last']), axis=1)
sha_keep['burst_to_last'] = sha_keep.apply(lambda x: get_burst_to(x['bursts_last'],x['timeout_timestamps_last']), axis=1)
sha_keep['to_mode_last'] = sha_keep.apply(lambda x: get_to_mode(x['reward_timestamps_last'],x['timeout_timestamps_last']), axis=1)
sha_keep['iii_mode_last'] = sha_keep['inter_infusion_interval_last'].apply(get_iii_mode)
sha_keep.head()

In [None]:
# unstack and output final df
info_to_merge = sha_keep[['rfid','subject','drug']].drop_duplicates(ignore_index=True)
sha_keep.drop(['subject','room','cohort','drug'],axis=1,inplace=True)

rows = unstack_data(sha_keep)
sha_burst_unstacked = pd.concat(rows,ignore_index=True)
sha_burst_unstacked.columns = ['rfid'] + [i[0]+'_'+i[1] for i in sha_burst_unstacked.columns[1:]]
print(sha_burst_unstacked.shape)
sha_burst_unstacked.head()

sha_burst_final = pd.merge(sha_burst_unstacked, info_to_merge, how='left', on='rfid')
sha_burst_final = sha_burst_final[['rfid','subject','drug'] + sha_burst_final.columns[1:-2].tolist()]
sha_burst_final

In [None]:
sha_burst_final.to_csv('/Users/yunyihuang/Desktop/burst_calculation_SHA01-10.csv')