### **STEP ONE** OF DATA PROCESSING PIPELINE

##### This script imports data in which a Reliable Component Analysis was performed. Visual Stimulus was dual frequency tagged: F1 = 1 Hz(1.5), F2 = 1.5 Hz (3) (inverting stim). Each participant completed 2 sessions. For each session, a F1 and F2 bandpass filter was performed. So 1 single participant has 4 data files. This is for a frequency-based analysis of the indution stage of the LTP paradigm 

In [1]:
# load packages
import numpy as np 
import scipy.io
from scipy.io   import  loadmat
import pandas as pd
import os
import matplotlib.pyplot as plt #import matplotlib as plt
from scipy.optimize import curve_fit 
import seaborn as sns #import mat73
import pickle as pkl
from datetime import datetime
from itertools import chain

In [5]:
# Main Directory of processed file from MatLab
#MainDir = 'D:\\AttnXV3_analysis\\RCA_F1\\RCA\\' # set dir - with USB Drive
MainDir = 'C:\\plimon\\LTP_analysis\\RCA_F1\\Induction_RCA\\' # set dir - on my computer
os.chdir(MainDir) # change old dir, to this dir
d = os.listdir(MainDir) # list files in dir
print(f'Aviable Files to choose from: {len(d)}')
print(f'Files on hand: {d}')
##############################################
FileN_f1 = d[0] # choose one
FileN_f2  = d[1]                         
file_path1 = os.path.join(MainDir, FileN_f1) # join paths and prep 2 load
print('Current WD:',file_path1) # does path exist ... ?
print('Does File #1 Exist?',os.path.exists(file_path1)) # yes or no

file_path2 = os.path.join(MainDir, FileN_f2) # join paths and prep 2 load
print('Current WD:',file_path2) # does path exist ... ?
print('Does File #2 Exist?',os.path.exists(file_path1)) # yes or no

Aviable Files to choose from: 2
Files on hand: ['rcaResults_F1_Induction.mat', 'rcaResults_F2_Induction.mat']
Current WD: C:\plimon\LTP_analysis\RCA_F1\Induction_RCA\rcaResults_F1_Induction.mat
Does File #1 Exist? True
Current WD: C:\plimon\LTP_analysis\RCA_F1\Induction_RCA\rcaResults_F2_Induction.mat
Does File #2 Exist? True


In [6]:
# Save Data Dir ...
#SaveDataDir = 'D:\\AttnXV3_analysis\\RCA_F1\\AllSubjSweepRCA\\' # set dir where files (.pkl, .csv) will be saved
SaveDataDir = 'C:\\plimon\\LTP_analysis\\RCA_F1\\AllSubjInductRCA\\' # set dir where files (.pkl, .csv) will be saved
FileOutName = 'All_Induct_RCA' 
newPath = os.path.join(SaveDataDir, FileOutName)
if not os.path.exists(SaveDataDir):
    os.makedirs(SaveDataDir)
print(f'Path to save data:{SaveDataDir}')
######################################################
dnt = datetime.now() # add date and time bc im wreckless when saving ..
fdnt = dnt.strftime("%Y%m%d_%H%M%S") # set the above as a string ...
FileN = f'{FileOutName}_{fdnt}.pkl' 
#FileNToMatlab = f'{FileOutName}_{fdnt}.h5' 
#FileN = f'{FileOutName}_{fdnt}.csv' 
NewFileNPath = os.path.join(SaveDataDir,FileN)
print('Full New File Dir: ', NewFileNPath)

Path to save data:C:\plimon\LTP_analysis\RCA_F1\AllSubjInductRCA\
Full New File Dir:  C:\plimon\LTP_analysis\RCA_F1\AllSubjInductRCA\All_Induct_RCA_20240320_125450.pkl


In [7]:
df_f1 = scipy.io.loadmat(file_path1)
df_f2 = scipy.io.loadmat(file_path2)

#### Load F1 and F2 RCA Data

In [8]:
rca_f1 = df_f1['rcaResult']['projectedData'][0,0]
f1 = [rca_f1[x,0] for x in range(rca_f1.shape[0])]

In [9]:
rca_f2 = df_f2['rcaResult']['projectedData'][0,0]
f2 = [rca_f2[x,0] for x in range(rca_f2.shape[0])]

In [12]:
# load subject names ...
SubNames = df_f1['rcaResult'][0,0][5]
FileName = [x[0][3:] for subjlist in SubNames for x in subjlist[0][2][0]]
#FileName = np.sort(FileName)
print(f'Total Data Files: {len(FileName)}')

Total Data Files: 66


#### Make Subj Session Name Format Uniform and extract sub ID 

In [None]:
# set all file names to the same hyphen format
FileName = [FileName.replace('_','-') for FileName in FileName]
print(len(FileName))
#### Set some params we'll need
[NumCols, NumComps, b] = np.shape(f1[0]) # 24 x 4 x n-Trials
NumFiles = int(len(FileName))

print(NumCols,NumComps, b) # new data dims 
# Find How Many Subject Names There are ...
string_ind = '-'
uniqueSubs = []
SessFileType = []
for n in range(NumFiles):
    x = FileName[n] # single file name string ie: 'nl-xxxx_attnX'
    if string_ind in x:
        y = x.split(string_ind)[0] # subj number
        z = x.split(string_ind)[1] # session condition name 
        # from each session name extract import info that will help us index and exclude later on ..
        uniqueSubs.append(y) # store all participant numbers (repeating - will be sorted later)
        SessFileType.append(z) # store the condtion label

In [None]:
# given double sessions.., how many unique participants do we have?
TotalFiles = np.array(uniqueSubs)
[NumSubs, SessCounts] = np.unique(TotalFiles, return_counts = True) # returns unique subject and how many sessions they did (should be 2)
print(f'Total Participants: {len(NumSubs)}')
[FileQuants, TotSess] = np.unique(SessCounts, return_counts = True) # returns counts of how many subs did 1 session and 2 sessions 
print(f'{TotSess[1]} Participants completed the study')
print(f'{TotSess[0]} Participants did not complete the study')

In [None]:
TwoSess = SessCounts == 2 # index of who completed 2 sessions ..
GoodSubjs = NumSubs[TwoSess] # subs who completed 2 sessions 
SingleSessSubjs = NumSubs[~TwoSess] # subs who did not complete 2 sessions 
print(f'Subjects with 2 files: {len(GoodSubjs)}')
print(GoodSubjs)
print()
print(f'Single Subjs: {SingleSessSubjs}')

In [None]:
# makes sure paticipants did complete attnL and attnR
FNameCrit = ['attnL', 'attnR'] # session names 
CleanSubjs = [] 

for i in range(len(GoodSubjs)):
    subj = GoodSubjs[i]
    f_list = [x for x in FileName if subj in x] # import all strongs were sub number is 
    list_check  = np.sort(f_list) # abc order strings -  attnL and THEN attnR
    counter = 0
    for n in range(len(FNameCrit)):
        if FNameCrit[n] in list_check[n]: # expt label should match file name in same position
            counter = counter + 1 # if so add 1 
            if (n == 1) and (counter == 2): # if both files strings are different, append
                CleanSubjs.append(GoodSubjs[i])
        else: # if not, add em to the singletons ... 
            print(f'{GoodSubjs[i]} did not match file name for {FNameCrit[n]}, moving subj to proper file ind array')
            str_nm = (np.array([GoodSubjs[i]], dtype=object))
            SingleSubs = np.concatenate((str_nm, SingleSessSubjs))

In [None]:
print(f'{len(CleanSubjs)} Participants Completed AttnL and AttnR')
print(f'{len(SingleSubs)} Participants did not complete both sessions')

In [None]:
FNameCrit = ['attnL', 'attnR']
FilePos = np.zeros((len(CleanSubjs), 2))

for name in range(len(CleanSubjs)):
    yIn = CleanSubjs[name] # import single subject who completed 2 sessions 
    all_files_avil = [x for x in FileName if yIn in x] # list
    #all_files_avil = np.sort(all_files_avil) # might not be necessary but jic ...
    pos = [loc for loc, file in enumerate(FileName) if file in all_files_avil] # index position of files if files match attnL and attnR
    FilePos[name,:] = pos

In [None]:
print(f'Now iterating through {NumFiles} files to sort doub sess positions\
({len(CleanSubjs)}), each subj has 2 file positions\
 : AttnL and AttnR')

In [None]:
AttnL =  {'F1': [], 'F2': []} 
AttnR =  {'F1': [], 'F2': []} 

# run a different file we imported (2 F1 and F2 filtered data) to save all in the same file
for dtaFile in range(3): # iterate through sessions 
        for ind in range(len(CleanSubjs)): # import 1 subjects file positions for attnL attnR
            #print(iter,ind)
            attnL_FilePos = int(FilePos[ind,0]) # element pos of attnL
            attnR_FilePos = int(FilePos[ind,1]) # element pos of attnR 

            #print(attnL_FilePos,attnR_FilePos)
            if dtaFile == 0:
                data = f1
                AttnL['F1'].append(data[attnL_FilePos])  # Append value to list in 'F1' key
                AttnR['F1'].append(data[attnR_FilePos])  # Append value to list in 'F1' key
            elif dtaFile == 1:
                data = f2
                #print(f'{attnL_FilePos,attnR_FilePos}')
                AttnL['F2'].append(data[attnL_FilePos])  # Append value to list in 'F2' key
                AttnR['F2'].append(data[attnR_FilePos])  # Append value to list in 'F2' key

In [None]:
len(f1),len(f2)