### **STEP ONE** OF DATA PROCESSING PIPELINE

##### This script imports data in which a Reliable Component Analysis was performed. Visual Stimulus was dual frequency tagged: F1 = 3 Hz(6), F2 = 3.75 Hs (7.5) (inverting stim). Each participant completed 2 sessions. For each session, a F1 and F2 bandpass filter was performed. So 1 single participant has 4 data files. This is for a frequency-based analysis of the LTP paradigm


In [1]:
# load packages
import numpy as np 
import scipy.io
from scipy.io   import  loadmat
import pandas as pd
import os
import matplotlib.pyplot as plt #import matplotlib as plt
from scipy.optimize import curve_fit 
import seaborn as sns #import mat73
import pickle as pkl
from datetime import datetime
from itertools import chain

### Set Dir Path(s): MainDir, SaveDataDir

In [2]:
# Main Directory of processed file from MatLab
#MainDir = 'D:\\AttnXV3_analysis\\RCA_F1\\RCA\\' # set dir - with USB Drive
MainDir = 'C:\\plimon\\LTP_analysis\\RCA_F1\\RCA\\' # set dir - on my computer
os.chdir(MainDir) # change old dir, to this dir
d = os.listdir(MainDir) # list files in dir
print(f'Files on hand: {d}')
##############################################
FileN_f1 = d[0] # choose one
FileN_f2  = d[1]                         
file_path1 = os.path.join(MainDir, FileN_f1) # join paths and prep 2 load
print('Current WD:',file_path1) # does path exist ... ?
print('Does File #1 Exist?',os.path.exists(file_path1)) # yes or no

file_path2 = os.path.join(MainDir, FileN_f2) # join paths and prep 2 load
print('Current WD:',file_path2) # does path exist ... ?
print('Does File #2 Exist?',os.path.exists(file_path1)) # yes or no

Files on hand: ['rcaResults_Sweep_contrast sweeps_F1_031324.mat', 'rcaResults_Sweep_contrast sweeps_F2_031324.mat', 'rcaResults_Sweep_contrast_sweeps_F1.mat', 'rcaResults_Sweep_contrast_sweeps_F2.mat']
Current WD: C:\plimon\LTP_analysis\RCA_F1\RCA\rcaResults_Sweep_contrast sweeps_F1_031324.mat
Does File #1 Exist? True
Current WD: C:\plimon\LTP_analysis\RCA_F1\RCA\rcaResults_Sweep_contrast sweeps_F2_031324.mat
Does File #2 Exist? True


In [3]:
# Save Data Dir ...
#SaveDataDir = 'D:\\AttnXV3_analysis\\RCA_F1\\AllSubjSweepRCA\\' # set dir where files (.pkl, .csv) will be saved
SaveDataDir = 'C:\\plimon\\LTP_analysis\\RCA_F1\\AllSubjSweepRCA\\' # set dir where files (.pkl, .csv) will be saved
FileOutName = 'AllRCAData_pnlApp' 
newPath = os.path.join(SaveDataDir, FileOutName)
if not os.path.exists(SaveDataDir):
    os.makedirs(SaveDataDir)
print(f'Path to save data:{SaveDataDir}')
######################################################
dnt = datetime.now() # add date and time bc im wreckless when saving ..
fdnt = dnt.strftime("%Y%m%d_%H%M%S") # set the above as a string ...
FileN = f'{FileOutName}_{fdnt}.pkl' 
#FileNToMatlab = f'{FileOutName}_{fdnt}.h5' 
#FileN = f'{FileOutName}_{fdnt}.csv' 
NewFileNPath = os.path.join(SaveDataDir,FileN)
print('Full New File Dir: ', NewFileNPath)

Path to save data:C:\plimon\LTP_analysis\RCA_F1\AllSubjSweepRCA\
Full New File Dir:  C:\plimon\LTP_analysis\RCA_F1\AllSubjSweepRCA\AllRCAData_pnlApp_20240313_105950.pkl


In [4]:
df_f1 = scipy.io.loadmat(file_path1)
df_f2 = scipy.io.loadmat(file_path2)

#### Load F1 and F2 RCA Data

In [5]:
rca_f1 = df_f1['rcaResult']['projectedData'][0,0]
f1 = [rca_f1[x,0] for x in range(rca_f1.shape[0])]

In [6]:
rca_f2 = df_f2['rcaResult']['projectedData'][0,0]
f2 = [rca_f2[x,0] for x in range(rca_f2.shape[0])]

In [7]:
# load subject names ...
SubNames = df_f1['rcaResult'][0,0][5]
FileName = [x[0][3:] for subjlist in SubNames for x in subjlist[0][2][0]]
#FileName = np.sort(FileName)
print(f'Total Data Files: {len(FileName)}')

Total Data Files: 66


In [8]:
# set all file names to the same hyphen format
FileName = [FileName.replace('_','-') for FileName in FileName]
print(FileName)

['2651-attnL-20231003-1500', '2651-attnR-20231006-0933', '2652-attnL-20231003-1635', '2652-attnR-20231011-1328', '2653-attnL-20231009-1015', '2653-attnR-20231013-0932', '2654-attnL-20231009-1131', '2654-attnR-20231016-1152', '2655-attnL-20231009-1303', '2655-attnR-20231016-0948', '2657-attnL-20231013-1508', '2657-attnR-20231020-1201', '2658-attnL-20231013-1639', '2658-attnR-20231020-1052', '2659-attnL-20231017-0940', '2659-attnR-20231018-1523', '2660-attnL-20231017-1102', '2661-attnL-20231018-1322', '2661-attnR-20231017-1358', '2663-attnR2-20231030-1659', '2663-attnR-20231019-1018', '2664-attnL-20231020-1452', '2664-attnR-20231019-1145', '2665-attnL-20231024-1329', '2665-attnR-20231019-1512', '2666-attnL-20231023-1612', '2666-attnR-20231019-1643', '2667-attnL-20231025-1522', '2667-attnR-20231023-0947', '2668-attnL-20231027-1251', '2668-attnR-20231023-1058', '2669-attnL-20231024-1432', '2669-attnR-20231023-1241', '2670-attnL-20231201-1521', '2670-attnR-20231024-0941', '2671-attnL-202310

In [9]:
#### Set some params we'll need
[NumCols, NumComps, b] = np.shape(f1[0]) # 24 x 4 x n-Trials
NumFiles = int(len(FileName))

In [10]:
# Find How Many Subject Names There are ...
string_ind = '-'
uniqueSubs = []
SessFileType = []
for n in range(NumFiles):
    x = FileName[n] # single file name string ie: 'nl-xxxx_attnX'
    if string_ind in x:
        y = x.split(string_ind)[0] # subj number
        z = x.split(string_ind)[1] # session condition name 
        # from each session name extract import info that will help us index and exclude later on ..
        uniqueSubs.append(y) # store all participant numbers (repeating - will be sorted later)
        SessFileType.append(z) # store the condtion label

In [11]:
# given double sessions.., how many unique participants do we have?
TotalFiles = np.array(uniqueSubs)
[NumSubs, SessCounts] = np.unique(TotalFiles, return_counts = True) # returns unique subject and how many sessions they did (should be 2)
print(f'Total Participants: {len(NumSubs)}')
[FileQuants, TotSess] = np.unique(SessCounts, return_counts = True) # returns counts of how many subs did 1 session and 2 sessions 
print(f'{TotSess[1]} Participants completed the study')
print(f'{TotSess[0]} Participants did not complete the study')

Total Participants: 38
28 Participants completed the study
10 Participants did not complete the study


In [12]:
TwoSess = SessCounts == 2 # index of who completed 2 sessions ..
GoodSubjs = NumSubs[TwoSess] # subs who completed 2 sessions 
SingleSessSubjs = NumSubs[~TwoSess] # subs who did not complete 2 sessions 
print(f'Subjects with 2 files: {len(NumSubs)}')
print(GoodSubjs)
print()
print(f'Single Subjs: {SingleSessSubjs}')

Subjects with 2 files: 38
['2651' '2652' '2653' '2654' '2655' '2657' '2658' '2659' '2661' '2663'
 '2664' '2665' '2666' '2667' '2668' '2669' '2670' '2671' '2672' '2674'
 '2677' '2695' '2696' '2697' '2728' '345202' '345215' '345216']

Single Subjs: ['2660' '2676' '2678' '2708' '2715' '2716' '2726' '2727' '2733' '2734']


#### Have to add this step because I ran someone for attnR **twice** ... yikes ...

In [13]:
# makes sure paticipants did complete attnL and attnR
FNameCrit = ['attnL', 'attnR'] # session names 
CleanSubjs = [] 

for i in range(len(GoodSubjs)):
    subj = GoodSubjs[i]
    f_list = [x for x in FileName if subj in x] # import all strongs were sub number is 
    list_check  = np.sort(f_list) # abc order strings -  attnL and THEN attnR
    counter = 0
    for n in range(len(FNameCrit)):
        if FNameCrit[n] in list_check[n]: # expt label should match file name in same position
            counter = counter + 1 # if so add 1 
            if (n == 1) and (counter == 2): # if both files strings are different, append
                CleanSubjs.append(GoodSubjs[i])
        else: # if not, add em to the singletons ... 
            print(f'{GoodSubjs[i]} did not match file name for {FNameCrit[n]}, moving subj to proper file ind array')
            str_nm = (np.array([GoodSubjs[i]], dtype=object))
            SingleSubs = np.concatenate((str_nm, SingleSessSubjs))

2663 did not match file name for attnL, moving subj to proper file ind array


#### How many participants **actually** completed both sessions:

In [14]:
print(f'{len(CleanSubjs)} Participants Completed AttnL and AttnR')
print(f'{len(SingleSubs)} Participants did not properly complete all sessions')
#print(CleanSubjs)

27 Participants Completed AttnL and AttnR
11 Participants did not properly complete all sessions


#### Store Data Files as AttnL and AttnR in order

In [15]:
FNameCrit = ['attnL', 'attnR']
FilePos = np.zeros((len(CleanSubjs), 2))

for name in range(len(CleanSubjs)):
    yIn = CleanSubjs[name] # import single subject who completed 2 sessions 
    all_files_avil = [x for x in FileName if yIn in x] # list
    all_files_avil = np.sort(all_files_avil) # might not be necessary but jic ...
    pos = [loc for loc, file in enumerate(FileName) if file in all_files_avil] # index position of files if files match attnL and attnR
    FilePos[name,:] = pos
#print(FilePos)

#### Create a big for loop to save all this data

In [16]:
AttnL =  {'F1': [], 'F2': []} 
AttnR =  {'F1': [], 'F2': []} 
#AttnX =  {'F1': [], 'F2': []} 

# run a different file we imported (2 F1 and F2 filtered data) to save all in the same file
for iter in range(NumFiles):
    for ind in range(len(CleanSubjs)):
        attnL_FilePos = int(FilePos[ind,0])
        attnR_FilePos = int(FilePos[ind,1])
        if iter == 0:
            data = f1
            AttnL['F1'].append(data[attnL_FilePos])  # Append value to list in 'F1' key
            AttnR['F1'].append(data[attnR_FilePos])  # Append value to list in 'F1' key
        elif iter == 1:
            data = f2
            AttnL['F2'].append(data[attnL_FilePos])  # Append value to list in 'F2' key
            AttnR['F2'].append(data[attnR_FilePos])  # Append value to list in 'F2' key

### Now Saving Subjects data who only completed 1 session 

In [17]:
SingleFiles_arr = []
for file in range(len(SingleSubs)):
    subIn = SingleSubs[file] # import one file at a time
    files_avil = [x for x in FileName if subIn in x]
    SingleFiles_arr.append(files_avil)

SinglefileNames = np.array(list(chain(*SingleFiles_arr))) #all single session names flattened
print(SinglefileNames)

['2663-attnR2-20231030-1659' '2663-attnR-20231019-1018'
 '2660-attnL-20231017-1102' '2676-attnR-20231027-1002'
 '2678-attnR-20231101-1446' '2708-attnL-20240119-1029'
 '2715-attnL-20240129-1138' '2716-attnL-20240129-1253'
 '2726-attnR-20240221-1133' '2727-attnL-20240221-1316'
 '2733-attnL-20240312-1701' '2734-attnL-20240312-0954']


In [18]:
string_catch_L = 'attnL'
string_catch_R = 'attnR'
single_sess_ind = np.zeros(len(SinglefileNames)) # size of single sessions available
SingleSessSubName = [] # save subject name 
single_sess_pos = [] # find and store the data index 

for sInFName in range(len(SinglefileNames)):
    # determin whether its attnr or attnL
    fIn = SinglefileNames[sInFName]
    #find the postion of file in the data to organize later
    pos = [posi for posi, file in enumerate(FileName) if file in fIn]
    single_sess_pos.append(pos)

    x = fIn.split(string_ind)[1]
    y = fIn.split(string_ind)[0]
    SingleSessSubName.append(y)
    # make array to findex what files are attnL and attnR
    if string_catch_L in x:
        single_sess_ind[sInFName] = 1 # attnL ind == 1
    elif string_catch_R in x:
        single_sess_ind[sInFName] = 0 # attnL ind == 0

single_sess_pos = np.array(single_sess_pos)
# print(single_sess_pos)
# print(single_sess_ind)
# print(SingleSessSubName)

#### Index and save singles sessions as a seperate dict to export in pkl file

In [19]:
single_sess_AttnL =  {'F1': [], 'F2': []} 
single_sess_AttnR =  {'F1': [], 'F2': []} 
l_subs = []
r_subs = []

for file_op in range(NumFiles):
    for oneSess in range(len(SinglefileNames)):
        sIn = SingleSessSubName[oneSess] # single sub names
        AttnXCond = single_sess_ind[oneSess] # condtion they did
        DataPos = single_sess_pos[oneSess] # position of data file is 
        DataPos = int(DataPos[0]) 

        if file_op == 0:
            dataIn = f1 # switch files 2 combine them
            if AttnXCond == 1:
                single_sess_AttnL['F1'].append(dataIn[DataPos]) # save data in this dict
                l_subs.append(sIn) # save subject name in this dict
            else:
                single_sess_AttnR['F1'].append(dataIn[DataPos])
                r_subs.append(sIn)

        elif file_op == 1:
            dataIn = f2 # switch files 2 combine them
            if AttnXCond == 1:
                single_sess_AttnL['F2'].append(dataIn[DataPos])
            else:
                single_sess_AttnR['F2'].append(dataIn[DataPos])


In [20]:
SingleSessDataOut = {}

SingleSessDataOut[0] = single_sess_AttnL['F1']
SingleSessDataOut[1] = single_sess_AttnL['F2']
SingleSessDataOut[2] = single_sess_AttnR['F1']
SingleSessDataOut[3] = single_sess_AttnR['F2']

SingleSessDataOut['AttnLSubNames'] = np.array(l_subs)
SingleSessDataOut['AttnRSubNames'] = np.array(r_subs)
#SingleSessDataOut['DataNotes'] = ['keys: 0&1 attnL[f1/f2] and 2&3 attR[f1/f2], single session data']
print(SingleSessDataOut.keys())

dict_keys([0, 1, 2, 3, 'AttnLSubNames', 'AttnRSubNames'])


In [21]:
SessDataOut = {}

SessDataOut[0] = AttnL['F1']
SessDataOut[1] = AttnL['F2']
SessDataOut[2] = AttnR['F1']
SessDataOut[3] = AttnR['F2']

SessDataOut['FullSessSubjNames'] = CleanSubjs
SessDataOut['DataNotes'] = ['keys: 0&1 attnL[f1/f2] and 2&3 attR[f1/f2]']
print(SessDataOut.keys())

dict_keys([0, 1, 2, 3, 'FullSessSubjNames', 'DataNotes'])


#### Save Data into .pkl file

In [22]:
dataOut = {}

dataOut[0] = SessDataOut
dataOut[1] = SingleSessDataOut

In [23]:
# dataOut = dict()

# dataOut['FullSessSubjNames'] = CleanSubjs
# #dataOut['OneSessSubjNames'] = CleanSubjs
# dataOut['DataNotes'] = ['keys: 0&1 attnL[f1/f2] and 2&3 attR[f1/f2]']
# dataOut[0] = AttnL['F1']
# dataOut[1] = AttnL['F2']

# dataOut[2] = AttnR['F1']
# dataOut[3] = AttnR['F2']

# print(dataOut.keys())

In [24]:
saveFile = 'y'

if saveFile == 'y':
 with open(NewFileNPath, 'wb') as file:
    pkl.dump(dataOut, file, protocol=pkl.HIGHEST_PROTOCOL)
    print('Sorted Data Saved! :))')
else:
    print('Did Not Save File! Change file name before switching to y!')

Sorted Data Saved! :))
