## STEP ONE OF STATS ANALYSIS FROM PROCESSED DATA PIPELINE

#### This script imports CRF data and sorts each participants data in a dataframe, appends them all together an then 2 Way ANOVA Model is computed with this dataframe.

In [1]:
# load packages
import numpy as np 
from scipy.io   import  loadmat
from scipy import stats
import pandas as pd
import os
import matplotlib.pyplot as plt #import matplotlib as plt
from scipy.optimize import curve_fit 
import seaborn as sns #import mat73
import pickle as pkl
from datetime import datetime
import statsmodels.api as sm 
from statsmodels.formula.api import ols

#### import and set actual anova params

In [2]:
# Main Directory of processed file from MatLab
#MainDir = 'D:\\AttnXV3_analysis\\RCA_F1\\AvgCRFs\\' # set dir
MainDir = 'C:\\plimon\\LTP_analysis\\RCA_F1\\AvgCRFs\\' # set dir
os.chdir(MainDir) # change old dir, to this dir
d = os.listdir(MainDir) # list files in dir
print(f'Files on hand: {d}')
##############################################
FileN = d[-1] # choose one                        
file_path1 = os.path.join(MainDir, FileN) # join paths and prep 2 load
print('Current WD:',file_path1) # does path exist ... ?
print('Does File #1 Exist?',os.path.exists(file_path1)) # yes or no

Files on hand: ['AllCondCRF_AllSess_pnlApp_20240314_1235.mat', 'AllCondCRF_AllSess_pnlApp_20240314_1235.pkl', 'AllCondCRF_AllSess_pnlApp_20240314_1237.mat', 'AllCondCRF_AllSess_pnlApp_20240314_1237.pkl', 'Clean_LTPData_20240313_1457.pkl', 'Clean_LTPData_firingrate_20240314_1400.pkl']
Current WD: C:\plimon\LTP_analysis\RCA_F1\AvgCRFs\Clean_LTPData_firingrate_20240314_1400.pkl
Does File #1 Exist? True


In [3]:
loadData = pkl.load(open(file_path1,'rb'))
print(loadData.keys())

dict_keys(['crf', 'resp_diffs', 'mod_crf', 'coSubNames', 'ConditionLabels', 'ContLevs', 'ContLabs'])


In [4]:
data = loadData['resp_diffs']
SubNames = loadData['coSubNames']
data_labels = loadData['ConditionLabels']
contrast_levels = loadData['ContLevs']
contrast_labs = loadData['ContLabs']
print(data_labels) # labels for nr plots

['attnL F1', 'attnL F2', 'attnR F1', 'attnR F2']


#### Stack all condition differences for 1 subject - make into a function

In [5]:
NumBins = 6 # number of contrasts
NumHarms = 2 # number of harmonic data: 2F1, 4F1
NumComp = 0 # first component from RCA
NumConds = 4

#### Uniform Vars:

In [6]:
## CONTRAST
c_levels = np.arange(NumBins)
ContrastArr = np.hstack((c_levels,c_levels)) # contrast ind col for df


In [7]:
## HARMONIC 
HarmArr = np.zeros(NumBins*NumHarms)
for hi in range(NumHarms):
    s = (hi)+((NumBins-1)*hi)
    e = ((hi+1)*NumBins)
    HarmArr[s:e] = int(hi) # harmonic ind col for df

#### Get Dynamic Vars per cond and save into array

In [8]:
def MakeDataLogs(NumBins, NumHarms, ContrastArr,HarmArr, dIn,costrIn, snIn, conditionFile):
    dIn = data[conditionFile]
    # print(dIn.shape) 
    costrIn = data_labels[conditionFile]
    snIn = SubNames[conditionFile]
    
    strCatch = ' '
    if strCatch in costrIn:
        attninf = costrIn.split(strCatch)[0] #  set attnX ind
        FreqFiltInf = costrIn.split(strCatch)[1] #  set freq filt ind
    # get attnX index, attnL == 1, attnR == 0
        if attninf == 'attnL':
            AttnXArr = np.ones((NumBins*NumHarms))
        elif attninf == 'attnR':
            AttnXArr = np.zeros((NumBins*NumHarms))
    # get freq filt index, attnL == 1, attnR == 0
        if FreqFiltInf == 'F1':
            FreqFiltArr = np.ones((NumBins*NumHarms))
        elif FreqFiltInf == 'F2':
            FreqFiltArr =  np.zeros((NumBins*NumHarms))

    print(f'{attninf},{AttnXArr}')
    print(f'{FreqFiltInf}{FreqFiltArr}')
    
    DataLogs = {}

    for su in range(len(snIn)):
        SubjDataLog = np.zeros((NumBins*NumHarms,6))
        
        sID = snIn[su]
        sVals = dIn[su,:]
        # fill array w data
        SubjDataLog[:,0] = sID # SUBJ id 
        SubjDataLog[:,1] =  AttnXArr # attention 
        SubjDataLog[:,2] = FreqFiltArr# FreqFilt (freq tagged contrast (hi / low))
        SubjDataLog[:,3] = HarmArr # Harmonic 
        SubjDataLog[:,4] =  ContrastArr# Contrast ind
        SubjDataLog[:,5] = sVals

        DataLogs[su] = SubjDataLog

    return DataLogs


In [9]:
DataLogsperCond = {}

for co in range(NumConds):
    DataLogsperCond[co] = MakeDataLogs(NumBins,NumHarms,ContrastArr,HarmArr,dIn=data,costrIn=data_labels,snIn = SubNames,conditionFile = int(co))
    print()

attnL,[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
F1[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]

attnL,[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
F2[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

attnR,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
F1[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]

attnR,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
F2[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]



In [10]:
coLogs = {}
for co in range(NumConds):

    dfs_In = DataLogsperCond[co]
    coLogs[co] = np.concatenate ([dfs_In[key] for key in dfs_In], axis = 0)


In [11]:
Log = np.concatenate ([coLogs[key] for key in coLogs], axis = 0)

In [12]:
print(Log.shape)

(1236, 6)


In [13]:
log =  {'Subj': Log[:,0],
        'AttnX': Log[:,1], #  
        'FreqFilt': Log[:,2], #  
        'Harmonic': Log[:,3], # 
        'Contrast': Log[:,4], #  
        'Vals': Log[:,5]}
ResponseDiffLog = pd.DataFrame(log)

In [14]:
ResponseDiffLog

Unnamed: 0,Subj,AttnX,FreqFilt,Harmonic,Contrast,Vals
0,2652.0,1.0,1.0,0.0,0.0,0.505150
1,2652.0,1.0,1.0,0.0,1.0,0.406819
2,2652.0,1.0,1.0,0.0,2.0,0.020986
3,2652.0,1.0,1.0,0.0,3.0,-0.599435
4,2652.0,1.0,1.0,0.0,4.0,-0.799817
...,...,...,...,...,...,...
1231,2726.0,0.0,0.0,1.0,1.0,-0.043443
1232,2726.0,0.0,0.0,1.0,2.0,-0.249052
1233,2726.0,0.0,0.0,1.0,3.0,-0.168874
1234,2726.0,0.0,0.0,1.0,4.0,-0.071073


In [15]:
one_way_interactions = ols('Vals ~ C(Subj) +C(AttnX) + C(FreqFilt) + C(Harmonic)+ C(Contrast)+  C(FreqFilt):C(AttnX)', data=ResponseDiffLog).fit()
anova_table = sm.stats.anova_lm(one_way_interactions, typ=2)

print(anova_table.to_string(float_format='{:,.12f}'.format))

                               sum_sq                 df               F         PR(>F)
C(Subj)               17.934742298376    35.000000000000  3.560642913737 0.000000000029
C(AttnX)               0.305226270540     1.000000000000  2.120914863017 0.145563288903
C(FreqFilt)            2.620361220950     1.000000000000 18.208010241541 0.000021375303
C(Harmonic)            6.426457257112     1.000000000000 44.655293559837 0.000000000036
C(Contrast)            1.255161506245     5.000000000000  1.744339168034 0.121607874315
C(FreqFilt):C(AttnX)   0.713615146520     1.000000000000  4.958672030586 0.026146542269
Residual             171.399849448194 1,191.000000000000             NaN            NaN


# Save out data (export)

In [None]:
RowPop = NumBins * NumHarms * NumConds # 6 x 2 x 4
SubRows = NumBins*NumHarms
ContrastInd = np.arange(0,NumBins)
ContrastArr = np.arange(RowPop) % NumBins

In [None]:
AttnXArr = np.zeros((RowPop))
FreqFiltArr = np.zeros((RowPop))
HarmArr = np.zeros((RowPop))
# load subject response differnces across conditions

In [None]:
for co in range(NumConds):
    HarmArr[(co*SubRows):(co+1)*SubRows]  = [0] * NumBins + [1] * NumBins # index harmonic data:  2F / 4F 

for co in range(NumConds):
    if co == 0 or co == 1: # Index if Data is AttnL or AttnR
        AttnXArr[(co*SubRows):(co+1)*(SubRows)] = 0
    else:
        AttnXArr[(co*SubRows):(co+1)*(SubRows)] = 1

for co in range(NumConds):
    if co % 2:          # Index what data was F1 = 6 Hz or F2 = 7.5 Hz 
        FreqFiltArr[(co*SubRows):(co+1)*SubRows] = [1]*SubRows
    else:
        FreqFiltArr[(co*SubRows):(co+1)*SubRows] = [0]*SubRows

In [None]:
def MakeDataFramePerSubject(RowPop,SubRows,ContrastArr,AttnXArr,FreqFiltArr,HarmArr,NumConds,soi, dictIn):
    SubjArr = [soi] * RowPop # Subject label
    ValArr = np.zeros((RowPop)) # INDEPENDENT VARIABLE 12 x 4  = 48 rows per val
    # load subject response differnces across conditions
    for co in range(NumConds):
        dIn = dictIn[co][soi,:] 
        ValArr[(co*SubRows):(co+1)*SubRows] = dIn # Sort Data Values 
    
    dataOut = {'Subj': SubjArr,
               
                'AttnX': AttnXArr, # done 
                'FreqFilt': FreqFiltArr, # done 
                'Harmonic': HarmArr, # done
                'Contrast': ContrastArr, # done 

                'Vals': ValArr}
    df = pd.DataFrame(dataOut)
    return df

In [None]:
Sub_df = {}
for su in range(NumSubs):
    Sub_df[su]= MakeDataFramePerSubject(RowPop,SubRows,ContrastArr,AttnXArr,FreqFiltArr,HarmArr,NumConds,soi=su,dictIn=data)

In [None]:
Sub_df[21] # sanity check 

In [None]:
anova_array = np.zeros((RowPop*NumSubs,6))
print(anova_array.shape)

headerArr =list(Sub_df[0].columns.values)
print(headerArr)

#### Save everyones dataframe into a big one

In [None]:
for su in range(NumSubs):
    aIn = np.array(Sub_df[su])
    anova_array[(RowPop*su):(RowPop*(su+1)),:] = aIn

#### Add header labels necessary for ANOVA

In [None]:
anova_df = pd.DataFrame(anova_array, columns = headerArr)

#### Vis Sanity Check for data frame

In [None]:
anova_df

#### Run ANOVA

In [None]:
#perform two-way ANOVA
one_way_interactions = ols('Vals ~ C(AttnX) + C(FreqFilt) + C(Harmonic)+ C(Contrast)+  C(FreqFilt):C(Harmonic)', data=anova_df).fit()
sm.stats.anova_lm(one_way_interactions, typ=2)
# can take into account if model can account for subject 
# can model compre within participant - ie: one subjs f anf f2 data are compared withing - paried comparision 