#### STEP ONE OF DATA PROCESSING PIPELINE

##### This script imports data in which a Reliable Component Analysis was performed. Visual Stimulus was dual frequency tagged: F1 = 3 Hz(6), F2 = 3.75 Hs (7.5) (inverting stim). Each participant completed 2 sessions. For each session, a F1 and F2 bandpass filter was performed. So 1 single participant has 4 data files. This is for a frequency-based analysis of the LTP paradigm


In [1]:
# load packages
import numpy as np 
import scipy.io
from scipy.io   import  loadmat
import pandas as pd
import os
import matplotlib.pyplot as plt #import matplotlib as plt
from scipy.optimize import curve_fit 
import seaborn as sns #import mat73
import pickle as pkl
from datetime import datetime

### Set Dir Path(s): MainDir, SaveDataDir

In [2]:
# Main Directory of processed file from MatLab
#MainDir = 'D:\\AttnXV3_analysis\\RCA_F1\\RCA\\' # set dir - with USB Drive
MainDir = 'C:\\plimon\\LTP_analysis\\RCA_F1\\RCA\\' # set dir - on my computer
os.chdir(MainDir) # change old dir, to this dir
d = os.listdir(MainDir) # list files in dir
print(f'Files on hand: {d}')
##############################################
FileN_f1 = d[0] # choose one
FileN_f2  = d[1]                         
file_path1 = os.path.join(MainDir, FileN_f1) # join paths and prep 2 load
print('Current WD:',file_path1) # does path exist ... ?
print('Does File #1 Exist?',os.path.exists(file_path1)) # yes or no

file_path2 = os.path.join(MainDir, FileN_f2) # join paths and prep 2 load
print('Current WD:',file_path2) # does path exist ... ?
print('Does File #2 Exist?',os.path.exists(file_path1)) # yes or no

Files on hand: ['rcaResults_Sweep_contrast_sweeps_F1.mat', 'rcaResults_Sweep_contrast_sweeps_F2.mat']
Current WD: C:\plimon\LTP_analysis\RCA_F1\RCA\rcaResults_Sweep_contrast_sweeps_F1.mat
Does File #1 Exist? True
Current WD: C:\plimon\LTP_analysis\RCA_F1\RCA\rcaResults_Sweep_contrast_sweeps_F2.mat
Does File #2 Exist? True


In [3]:
# Save Data Dir ...
#SaveDataDir = 'D:\\AttnXV3_analysis\\RCA_F1\\AllSubjSweepRCA\\' # set dir where files (.pkl, .csv) will be saved
SaveDataDir = 'C:\\plimon\\LTP_analysis\\RCA_F1\\AllSubjSweepRCA\\' # set dir where files (.pkl, .csv) will be saved
FileOutName = 'AllRCAData' 
newPath = os.path.join(SaveDataDir, FileOutName)
if not os.path.exists(SaveDataDir):
    os.makedirs(SaveDataDir)
print(f'Path to save data:{SaveDataDir}')
######################################################
dnt = datetime.now() # add date and time bc im wreckless when saving ..
fdnt = dnt.strftime("%Y%m%d_%H%M%S") # set the above as a string ...
FileN = f'{FileOutName}_{fdnt}.pkl' 
#FileNToMatlab = f'{FileOutName}_{fdnt}.h5' 
#FileN = f'{FileOutName}_{fdnt}.csv' 
NewFileNPath = os.path.join(SaveDataDir,FileN)
print('Full New File Dir: ', NewFileNPath)

Path to save data:C:\plimon\LTP_analysis\RCA_F1\AllSubjSweepRCA\
Full New File Dir:  C:\plimon\LTP_analysis\RCA_F1\AllSubjSweepRCA\AllRCAData_20240227_154844.pkl


#### Load Data Files

In [4]:
df_f1 = scipy.io.loadmat(file_path1)
df_f2 = scipy.io.loadmat(file_path2)

In [5]:
rca_f1 = df_f1['rcaResult']['projectedData'][0,0]
f1 = [rca_f1[x,0] for x in range(rca_f1.shape[0])]

In [6]:
rca_f2 = df_f2['rcaResult']['projectedData'][0,0]
f2 = [rca_f2[x,0] for x in range(rca_f2.shape[0])]

In [7]:
# load subject names ...
SubNames = df_f1['rcaResult'][0,0][5]
FileName = [x[0][3:] for subjlist in SubNames for x in subjlist[0][2][0]]
#FileName = np.sort(FileName)
print(f'Total Data Files: {len(FileName)}')

Total Data Files: 55


In [8]:
#### Set some params we'll need
[NumCols, NumComps, b] = np.shape(f1[0]) # 24 x 4 x n-Trials
NumFiles = int(len(FileName))

In [9]:
# Find How Many Subject Names There are ...
string_ind = '_'
uniqueSubs = []
SessFileType = []
for n in range(NumFiles):
    x = FileName[n]
    if string_ind in x:
        y = x.split(string_ind)[0]
        z = x.split(string_ind)[1]
        uniqueSubs.append(y)
        SessFileType.append(z)

In [10]:
# double sessions.., how many unique participants do we have
TotalFiles = np.array(uniqueSubs)
[NumSubs, SessCounts] = np.unique(TotalFiles, return_counts = True)
print(f'Total Participants: {len(NumSubs)}')
[FileQuants, TotSess] = np.unique(SessCounts, return_counts = True)
print(f'{TotSess[1]} Participants completed the study')
print(f'{TotSess[0]} Participants did not complete the study')

Total Participants: 29
26 Participants completed the study
3 Participants did not complete the study


In [11]:
TwoSess = SessCounts == 2
GoodSubjs = NumSubs[TwoSess]
SingleSubjs = NumSubs[~TwoSess]
#print(f'Subjects with 2 files: {len(NumSubs)}')
#print(GoodSubjs)
#print(f'single subjs {SingleSubjs}')

In [12]:
FNameCrit = ['attnL', 'attnR']
CleanSubjs = [] # have to add this step because I ran someone for attnR twice ... yikes ...
for i in range(len(GoodSubjs)):
    subj = GoodSubjs[i]
    f_list = [x for x in FileName if subj in x]
    list_check  = np.sort(f_list)
    counter = 0
    for n in range(len(FNameCrit)):
        if FNameCrit[n] in list_check[n]:
            counter = counter + 1
            if (n == 1) and (counter == 2):
                CleanSubjs.append(GoodSubjs[i])
        else:
            print(f'{GoodSubjs[i]} did not match file name for {FNameCrit[n]}, moving subj to proper file ind array')
            str_nm = (np.array([GoodSubjs[i]], dtype=object))
            SingleSubs = np.concatenate((str_nm, SingleSubjs))

2663 did not match file name for attnL, moving subj to proper file ind array


In [13]:
print(f'{len(CleanSubjs)} Participants Completed AttnL and AttnR')
print(f'{len(SingleSubs)} Participants did not properly complete all sessions')
#print(CleanSubjs)

25 Participants Completed AttnL and AttnR
4 Participants did not properly complete all sessions


#### Store Data Files as AttnL and AttnR in order

In [14]:
FNameCrit = ['attnL', 'attnR']
FilePos = np.zeros((len(CleanSubjs), 2))

for name in range(len(CleanSubjs)):
    yIn = CleanSubjs[name]
    all_files_avil = [x for x in FileName if yIn in x] # list
    all_files_avil = np.sort(all_files_avil)
    pos = [loc for loc, file in enumerate(FileName) if file in all_files_avil]
    FilePos[name,:] = pos
#print(FilePos)

#### Create a big for loop to save all this data

In [15]:
AttnL =  {'F1': [], 'F2': []} 
AttnR =  {'F1': [], 'F2': []} 
AttnX =  {'F1': [], 'F2': []} 

Sort_Data_Runs = 2

for iter in range(Sort_Data_Runs):
    for ind in range(len(CleanSubjs)):
        attnL_FilePos = int(FilePos[ind,0])
        attnR_FilePos = int(FilePos[ind,1])
        if iter == 0:
            data = f1
            AttnL['F1'].append(data[attnL_FilePos])  # Append value to list in 'F1' key
            AttnR['F1'].append(data[attnR_FilePos])  # Append value to list in 'F1' key
        elif iter == 1:
            data = f2
            AttnL['F2'].append(data[attnL_FilePos])  # Append value to list in 'F2' key
            AttnR['F2'].append(data[attnR_FilePos])  # Append value to list in 'F2' key

#### Save Data into .pkl file

In [16]:
dataOut = dict()

dataOut['FullSessSubjNames'] = CleanSubjs
#dataOut['OneSessSubjNames'] = CleanSubjs
dataOut['DataNotes'] = ['0&1 attnL[f1/f2] and 2&3 attR[f1/f2]']
dataOut[0] = AttnL['F1']
dataOut[1] = AttnL['F2']

dataOut[2] = AttnR['F1']
dataOut[3] = AttnR['F2']

print(dataOut.keys())

dict_keys(['FullSessSubjNames', 'DataNotes', 0, 1, 2, 3])


In [17]:
saveFile = 'n'

if saveFile == 'y':
 with open(NewFileNPath, 'wb') as file:
    pkl.dump(dataOut, file, protocol=pkl.HIGHEST_PROTOCOL)
    print('Sorted Data Saved! :))')
else:
    print('Did Not Save File! Change file name before switching to y!')

Did Not Save File! Change file name before switching to y!
