## STEP ONE OF STATS ANALYSIS FROM PROCESSED DATA PIPELINE

#### This script imports CRF data and sorts each participants data in a dataframe, appends them all together an then 2 Way ANOVA Model is computed with this dataframe.

In [1]:
# load packages
import numpy as np 
from scipy.io   import  loadmat
from scipy import stats
import pandas as pd
import os
import matplotlib.pyplot as plt #import matplotlib as plt
from scipy.optimize import curve_fit 
import seaborn as sns #import mat73
import pickle as pkl
from datetime import datetime
import statsmodels.api as sm 
from statsmodels.formula.api import ols

#### import and set actual anova params

In [2]:
# Main Directory of processed file from MatLab
#MainDir = 'D:\\AttnXV3_analysis\\RCA_F1\\AvgCRFs\\' # set dir
MainDir = 'C:\\plimon\\LTP_analysis\\RCA_F1\\AvgCRFs\\' # set dir
os.chdir(MainDir) # change old dir, to this dir
d = os.listdir(MainDir) # list files in dir
print(f'Files on hand: {d}')
##############################################
FileN = d[-2] # choose one                        
file_path1 = os.path.join(MainDir, FileN) # join paths and prep 2 load
print('Current WD:',file_path1) # does path exist ... ?
print('Does File #1 Exist?',os.path.exists(file_path1)) # yes or no

Files on hand: ['AllCondCRF_AllHarmonics_pnlApp_20240315_1641.mat', 'AllCondCRF_AllHarmonics_pnlApp_20240315_1641.pkl', 'AllCondCRF_AllSess_pnlApp_20240314_1235.mat', 'AllCondCRF_AllSess_pnlApp_20240314_1235.pkl', 'AllCondCRF_AllSess_pnlApp_20240314_1237.mat', 'AllCondCRF_AllSess_pnlApp_20240314_1237.pkl', 'Clean_LTPData_20240313_1457.pkl', 'Clean_LTPData_2468F_20240316_2252.pkl', 'Clean_LTPData_firingrate_20240314_1400.pkl']
Current WD: C:\plimon\LTP_analysis\RCA_F1\AvgCRFs\Clean_LTPData_2468F_20240316_2252.pkl
Does File #1 Exist? True


In [3]:
loadData = pkl.load(open(file_path1,'rb'))
print(loadData.keys())

dict_keys(['crf', 'resp_diffs', 'mod_crf', 'coSubNames', 'ConditionLabels', 'ContLevs', 'ContLabs'])


In [4]:
data = loadData['resp_diffs']
crf_responses = loadData['crf']
SubNames = loadData['coSubNames']
data_labels = loadData['ConditionLabels']
contrast_levels = loadData['ContLevs']
contrast_labs = loadData['ContLabs']
print(data_labels) # labels for nr plots

['AtnL(100%) Hi Contrast(100%)', 'AtnL(100%) Lo Contrast(1%)', 'AtnR(1%) Hi Contrast(100%)', 'AtnR(1%) Lo Contrast(1%)']


In [5]:
data_labels = ['AttnL F1','AttnL F2','AttnR F1','AttnR F2']

#### Stack all condition differences for 1 subject - make into a function

In [6]:
NumBins = 6 # number of contrasts
NumHarms = 4 # number of harmonic data: 2F1, 4F1
NumConds = 4

#### Uniform Vars:

In [7]:
## CONTRAST
c_levels = np.arange(NumBins)
ContrastArr = np.tile(c_levels,NumHarms)# contrast ind col for df

In [8]:
## HARMONIC 
HarmArr = np.zeros(NumBins*NumHarms)
for hi in range(NumHarms):
    s = (hi)+((NumBins-1)*hi)
    e = ((hi+1)*NumBins)
    HarmArr[s:e] = int(hi) # harmonic ind col for df

#### Get Dynamic Vars per cond and save into array

In [9]:
def MakeDataLogs(NumBins, NumHarms, ContrastArr,HarmArr, dIn,costrIn, snIn, conditionFile):
    dIn = data[conditionFile]
    # print(dIn.shape) 
    costrIn = data_labels[conditionFile]
    snIn = SubNames[conditionFile]
    
    strCatch = ' '
    if strCatch in costrIn:
        attninf = costrIn.split(strCatch)[0] #  set attnX ind
        FreqFiltInf = costrIn.split(strCatch)[1] #  set freq filt ind
    # get attnX index, attnL == 1, attnR == 0
        if attninf == 'AttnL':
            AttnXArr = np.ones((NumBins*NumHarms))
        elif attninf == 'AttnR':
            AttnXArr = np.zeros((NumBins*NumHarms))
    # get freq filt index, attnL == 1, attnR == 0
        if FreqFiltInf == 'F1':
            FreqFiltArr = np.ones((NumBins*NumHarms))
        elif FreqFiltInf == 'F2':
            FreqFiltArr =  np.zeros((NumBins*NumHarms))
    # print(f'{attninf},{AttnXArr}')
    # print(f'{FreqFiltInf}{FreqFiltArr}')
    DataLogs = {}

    for su in range(len(snIn)):
        SubjDataLog = np.zeros((NumBins*NumHarms,6))
        
        sID = snIn[su]
        sVals = dIn[su,:]
        # fill array w data
        SubjDataLog[:,0] = sID # SUBJ id 
        SubjDataLog[:,1] =  AttnXArr # attention 
        SubjDataLog[:,2] = FreqFiltArr# FreqFilt (freq tagged contrast (hi / low))
        SubjDataLog[:,3] = HarmArr # Harmonic 
        SubjDataLog[:,4] =  ContrastArr# Contrast ind
        SubjDataLog[:,5] = sVals

        DataLogs[su] = SubjDataLog
    return DataLogs

In [10]:
################################################################################################################################
def MakePrePostDataLog(conditionFile,NumBins,NumHarms,dIn=crf_responses,costrIn=data_labels,snIn = SubNames):
    dIn = crf_responses[conditionFile]
    [ns,nh,nc] = dIn.shape # num sub, num harm, num cond (pre / post)
    costrIn = data_labels[conditionFile] # 
    snIn = SubNames[conditionFile]
    strCatch = ' '
    if strCatch in costrIn:
        attninf = costrIn.split(strCatch)[0] #  set attnX ind
        FreqFiltInf = costrIn.split(strCatch)[1] #  set freq filt ind
    # get attnX index, attnL == 1, attnR == 0
        if attninf == 'AttnL':
            AttnXArr = np.ones(((NumBins*NumHarms)*2))
        elif attninf == 'AttnR':
            AttnXArr = np.zeros((NumBins*NumHarms)*2)
    # get freq filt index, attnL == 1, attnR == 0
        if FreqFiltInf == 'F1':
            FreqFiltArr = np.ones(((NumBins*NumHarms)*2))
        elif FreqFiltInf == 'F2':
            FreqFiltArr =  np.zeros(((NumBins*NumHarms)*2))
    # PRE / POST IND, R5
    coInd = ['Pre','Post']
    coInd = np.tile(coInd, (NumHarms*NumBins))
    coIn = np.zeros_like((coInd))
    coIn = [1 if val == 'Post' else val for val in coInd]
    coIn = [0 if val == 'Pre' else val for val in coIn]
    # CONTRAST BIN ARRAY, R4
    clevs = np.sort(np.tile(c_levels,2))
    sweep_ind = np.tile(clevs,NumHarms)
    # HARMONIC ARRAY R3
    Harm_Ind = np.sort(np.tile(np.arange(NumHarms),NumBins*2))
    DataLogs = {}
    # save data into arr
    rshp_data = np.reshape(dIn, newshape=(ns,nh*2))
    
    for sIn in range(ns):
        SubjDataLog = np.zeros((NumBins*NumHarms*2,7))
    # sub ID, R0
        sID = snIn[sIn]
        # VALS, R6
        sVals = rshp_data[sIn,:] # 20 48 (pre post manner)
        # fill array w data
        SubjDataLog[:,0] = sID # SUBJ id 
        SubjDataLog[:,1] = AttnXArr # attention 
        SubjDataLog[:,2] = FreqFiltArr# FreqFilt (freq tagged contrast (hi / low))
        SubjDataLog[:,3] = Harm_Ind # Harmonic 
        SubjDataLog[:,4] = sweep_ind # Contrast sweep num ind
        SubjDataLog[:,5] = coIn # Pre Post Ind [0,1]
        SubjDataLog[:,6] = sVals
        DataLogs[sIn] = SubjDataLog
    return DataLogs

In [11]:
DataLogsperCond = {}

for co in range(NumConds):
    DataLogsperCond[co] = MakeDataLogs(NumBins,NumHarms,ContrastArr,HarmArr,dIn=data,costrIn=data_labels,snIn = SubNames,conditionFile = int(co))
    print()







In [12]:
coLogs = {}
for co in range(NumConds):

    dfs_In = DataLogsperCond[co]
    coLogs[co] = np.concatenate ([dfs_In[key] for key in dfs_In], axis = 0)


In [13]:
Log = np.concatenate([coLogs[key] for key in coLogs], axis = 0)

In [14]:
print(Log.shape)

(1728, 6)


In [15]:
log =  {'Subj': Log[:,0],
        'AttnX': Log[:,1], #  
        'Contrast_VEP': Log[:,2], #  
        'Harmonic': Log[:,3], # 
        'Contrast_Bins': Log[:,4], #  
        'Vals': Log[:,5]}
ResponseDiffLog = pd.DataFrame(log)

In [16]:
ResponseDiffLog

Unnamed: 0,Subj,AttnX,Contrast_VEP,Harmonic,Contrast_Bins,Vals
0,2654.0,1.0,1.0,0.0,0.0,-0.160264
1,2654.0,1.0,1.0,0.0,1.0,0.005230
2,2654.0,1.0,1.0,0.0,2.0,0.162359
3,2654.0,1.0,1.0,0.0,3.0,-0.189057
4,2654.0,1.0,1.0,0.0,4.0,-0.335596
...,...,...,...,...,...,...
1723,2726.0,0.0,0.0,3.0,1.0,-0.165616
1724,2726.0,0.0,0.0,3.0,2.0,-0.096241
1725,2726.0,0.0,0.0,3.0,3.0,-0.017477
1726,2726.0,0.0,0.0,3.0,4.0,-0.105655


In [17]:
one_way_interactions = ols('Vals ~ C(Subj)+C(AttnX)+C(Contrast_VEP)+C(Harmonic)+C(Contrast_Bins)+C(Contrast_VEP):C(AttnX)', data=ResponseDiffLog).fit()
anova_table = sm.stats.anova_lm(one_way_interactions, typ=2)

print(anova_table.to_string(float_format='{:,.12f}'.format))
# something i noticed is that with additional hamronics the variance went down from 117 to 109 lol

                                   sum_sq                 df               F         PR(>F)
C(Subj)                   12.048177163043    30.000000000000  6.180742443051 0.000000000000
C(AttnX)                   0.018036231079     1.000000000000  0.277578833967 0.598361852140
C(Contrast_VEP)            0.893332431168     1.000000000000 13.748447416623 0.000215755975
C(Harmonic)                4.548999954613     3.000000000000 23.336473818760 0.000000000000
C(Contrast_Bins)           0.987153846096     5.000000000000  3.038473086088 0.009789620192
C(Contrast_VEP):C(AttnX)   0.287653043415     1.000000000000  4.427000077055 0.035522378667
Residual                 109.551168456202 1,686.000000000000             NaN            NaN


#### Make DataFrame for ANOVA given pre and post values

In [18]:
crf_DataLogsperCond = {}

for co in range(NumConds):
    crf_DataLogsperCond[co] = MakePrePostDataLog(conditionFile = co,NumBins = 6,NumHarms = 4,dIn=crf_responses,costrIn=data_labels,snIn = SubNames)


In [19]:
co_crfLogs = {}

for co in range(NumConds):
    dfsIn = crf_DataLogsperCond[co]
    co_crfLogs[co] = np.concatenate ([dfsIn[key] for key in dfsIn], axis = 0)

In [20]:
crf_Log = np.concatenate ([co_crfLogs[key] for key in co_crfLogs], axis = 0)

In [21]:
print(crf_Log.shape)

(3456, 7)


In [22]:
co_log =  {'Subj': crf_Log[:,0],
        'AttnX': crf_Log[:,1], #  
        'Contrast_VEP': crf_Log[:,2], #  
        'Harmonic': crf_Log[:,3], # 
        'Contrast_Bins': crf_Log[:,4], #  
        'PrePost': crf_Log[:,5],
        'Vals': crf_Log[:,6]}
All_crf_Responses_Log = pd.DataFrame(co_log)

In [23]:
All_crf_Responses_Log

Unnamed: 0,Subj,AttnX,Contrast_VEP,Harmonic,Contrast_Bins,PrePost,Vals
0,2654.0,1.0,1.0,0.0,0.0,0.0,0.811857
1,2654.0,1.0,1.0,0.0,0.0,1.0,0.651592
2,2654.0,1.0,1.0,0.0,1.0,0.0,2.149527
3,2654.0,1.0,1.0,0.0,1.0,1.0,2.154757
4,2654.0,1.0,1.0,0.0,2.0,0.0,3.336312
...,...,...,...,...,...,...,...
3451,2726.0,0.0,0.0,3.0,3.0,1.0,0.185763
3452,2726.0,0.0,0.0,3.0,4.0,0.0,0.652750
3453,2726.0,0.0,0.0,3.0,4.0,1.0,0.547095
3454,2726.0,0.0,0.0,3.0,5.0,0.0,0.770253


In [24]:
one_way_interactions = ols('Vals ~ C(Subj)+C(AttnX)+C(Contrast_VEP)+C(Harmonic)+C(Contrast_Bins)+C(PrePost)+C(Contrast_VEP):C(Contrast_Bins)', data=All_crf_Responses_Log).fit()
anova_table = sm.stats.anova_lm(one_way_interactions, typ=2)

print(anova_table.to_string(float_format='{:,.12f}'.format))

                                             sum_sq                 df                  F         PR(>F)
C(Subj)                          1,321.924905213626    30.000000000000    37.476073700687 0.000000000000
C(AttnX)                             4.286706532303     1.000000000000     3.645795520704 0.056294807180
C(Contrast_VEP)                    121.001215048784     1.000000000000   102.910167631076 0.000000000000
C(Harmonic)                      4,065.072783570516     3.000000000000 1,152.432839678798 0.000000000000
C(Contrast_Bins)                   899.401263821881     5.000000000000   152.986124627252 0.000000000000
C(PrePost)                           0.681165792584     1.000000000000     0.579323818121 0.446630130168
C(Contrast_VEP):C(Contrast_Bins)    32.520501034293     5.000000000000     5.531663812692 0.000044386930
Residual                         4,008.283647734964 3,409.000000000000                NaN            NaN


In [25]:
LogsOut = {}

DiffLog = ResponseDiffLog
PrePostLog = All_crf_Responses_Log

# Save out data (export)

In [26]:
SaveDataDir = 'C:\\plimon\\LTP_analysis\\RCA_F1\\StatsDataFrames\\' # set dir where files (.pkl, .csv) will be saved
if not os.path.exists(SaveDataDir):
    os.makedirs(SaveDataDir)
print('Path to Save File is:',SaveDataDir)

FileOutName = 'LTP_LogsOut_2468F_df' # make sure this file changes each time you save
######################################################
dnt = datetime.now() # add date and time bc im wreckless when saving ..
fdnt = dnt.strftime("%Y%m%d_%H%M") # set the above as a string ...
FileN = f'{FileOutName}_{fdnt}.pkl' 
cFileN = f'{FileOutName}_{fdnt}.csv' 

NewFileNPath = os.path.join(SaveDataDir,FileN)
cNewFilePath = os.path.join(SaveDataDir,cFileN)

print('Full New File Dir: ', NewFileNPath)

############################################################################################
saveFile = 'y'

if saveFile == 'y':
 
 ResponseDiffLog.to_csv(cNewFilePath)
 
 with open(NewFileNPath, 'wb') as file:
    pkl.dump(LogsOut, file, protocol=pkl.HIGHEST_PROTOCOL)
    # save as .mat file or .csv file to import into matlab 

    
    print('Response Differences Data Frame Saved as pkl and csv! :))')
else:
    print('Did Not Save File! Change file name before switching to y!')

Path to Save File is: C:\plimon\LTP_analysis\RCA_F1\StatsDataFrames\
Full New File Dir:  C:\plimon\LTP_analysis\RCA_F1\StatsDataFrames\LTP_LogsOut_2468F_df_20240317_0656.pkl
Response Differences Data Frame Saved as pkl and csv! :))
