In [2]:
import numpy as np
import scipy.io
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
import matplotlib.pyplot as plt

# Load the MATLAB file
file_path = '../Data/02_SVM_Accuracy.mat'
mat = scipy.io.loadmat(file_path)

# Explore the data structure to understand how to extract the needed data
var = [v for v in list(mat.keys())[3:] if 'Sig' not in v]
#print(var)
v01 = mat.get('subsetStartFromStartSpa').flatten()
v02 = mat.get('notSubsetStartFromStartSpa').flatten()
v03 = mat.get('subsetGoalFromStartSpa').flatten()
v04 = mat.get('notSubsetGoalFromStartSpa').flatten()
v05 = mat.get('subsetStartFromGoalSpa').flatten()
v06 = mat.get('notSubsetStartFromGoalSpa').flatten()
v07 = mat.get('subsetGoalFromGoalSpa').flatten()
v08 = mat.get('notSubsetGoalFromGoalSpa').flatten()
v09 = mat.get('subsetStartFromStartCue').flatten()
v10 = mat.get('notSubsetStartFromStartCue').flatten()
v11 = mat.get('subsetGoalFromStartCue').flatten()
v12 = mat.get('notSubsetGoalFromStartCue').flatten()
v13 = mat.get('subsetStartFromGoalCue').flatten()
v14 = mat.get('notSubsetStartFromGoalCue').flatten()
v15 = mat.get('subsetGoalFromGoalCue').flatten()
v16 = mat.get('notSubsetGoalFromGoalCue').flatten()

data1 = pd.DataFrame({'vals':np.concatenate((v01,v02,v03,v04,v05,v06,v07,v08,v09,v10,v11,v12,v13,v14,v15,v16),axis=0),
                      'task_type':np.concatenate((14*['spa'],14*['spa'],14*['spa'],14*['spa'],
                                                  14*['spa'],14*['spa'],14*['spa'],14*['spa'],
                                                  14*['cue'],14*['cue'],14*['cue'],14*['cue'],
                                                  14*['cue'],14*['cue'],14*['cue'],14*['cue']),axis=0),
                      'ensemble_type':np.concatenate((14*['subs'],14*['nsub'],14*['subs'],14*['nsub'],
                                                      14*['subs'],14*['nsub'],14*['subs'],14*['nsub'],
                                                      14*['subs'],14*['nsub'],14*['subs'],14*['nsub'],
                                                      14*['subs'],14*['nsub'],14*['subs'],14*['nsub']),axis=0),
                      'predicted_locs':np.concatenate((14*['start'],14*['start'],14*['goal'],14*['goal'],
                                                       14*['start'],14*['start'],14*['goal'],14*['goal'],
                                                       14*['start'],14*['start'],14*['goal'],14*['goal'],
                                                       14*['start'],14*['start'],14*['goal'],14*['goal']),axis=0),
                      'actual_locs':np.concatenate((14*['start'],14*['start'],14*['start'],14*['start'],
                                                       14*['goal'],14*['goal'],14*['goal'],14*['goal'],
                                                       14*['start'],14*['start'],14*['start'],14*['start'],
                                                       14*['goal'],14*['goal'],14*['goal'],14*['goal']),axis=0)
                     })

data1.head()

Unnamed: 0,vals,task_type,ensemble_type,predicted_locs,actual_locs
0,0.855094,spa,subs,start,start
1,0.880678,spa,subs,start,start
2,0.871373,spa,subs,start,start
3,0.863729,spa,subs,start,start
4,0.930566,spa,subs,start,start


In [3]:
model1 = ols('vals ~ C(task_type) + C(ensemble_type) + C(actual_locs) + C(predicted_locs) \
             + C(task_type):C(ensemble_type) + C(task_type):C(actual_locs) + C(task_type):C(predicted_locs) \
             + C(ensemble_type):C(actual_locs) + C(ensemble_type):C(predicted_locs) \
             + C(actual_locs):C(predicted_locs) \
             + C(task_type):C(ensemble_type):C(actual_locs) + C(task_type):C(ensemble_type):C(predicted_locs) \
             + C(task_type):C(actual_locs):C(predicted_locs) \
             + C(ensemble_type):C(actual_locs):C(predicted_locs) \
             + C(task_type):C(ensemble_type):C(actual_locs):C(predicted_locs)', data=data1).fit()
print(model1.summary())

# for an explanation on type 1 vs 2 vs 3: 
# https://mcfromnz.wordpress.com/2011/03/02/anova-type-iiiiii-ss-explained/
res1 = anova_lm(model1,typ=3)

print(res1)

                            OLS Regression Results                            
Dep. Variable:                   vals   R-squared:                       0.427
Model:                            OLS   Adj. R-squared:                  0.385
Method:                 Least Squares   F-statistic:                     10.33
Date:                Fri, 23 Feb 2024   Prob (F-statistic):           2.72e-18
Time:                        19:05:01   Log-Likelihood:                 221.68
No. Observations:                 224   AIC:                            -411.4
Df Residuals:                     208   BIC:                            -356.8
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                                                                                                      coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

In [4]:
# Type 2 anova of cell above
model1_T2 = ols('vals ~ C(task_type) + C(ensemble_type) + C(actual_locs) + C(predicted_locs) \
             + C(task_type):C(ensemble_type) + C(task_type):C(actual_locs) + C(task_type):C(predicted_locs) \
             + C(ensemble_type):C(actual_locs) + C(ensemble_type):C(predicted_locs) \
             + C(actual_locs):C(predicted_locs) \
             + C(task_type):C(ensemble_type):C(actual_locs) + C(task_type):C(ensemble_type):C(predicted_locs) \
             + C(task_type):C(actual_locs):C(predicted_locs) \
             + C(ensemble_type):C(actual_locs):C(predicted_locs) \
             + C(task_type):C(ensemble_type):C(actual_locs):C(predicted_locs)', data=data1).fit()
print(model1_T2.summary())

# for an explanation on type 1 vs 2 vs 3: 
# https://mcfromnz.wordpress.com/2011/03/02/anova-type-iiiiii-ss-explained/
res1_T2 = anova_lm(model1_T2,typ=2)

print(res1_T2)

                            OLS Regression Results                            
Dep. Variable:                   vals   R-squared:                       0.427
Model:                            OLS   Adj. R-squared:                  0.385
Method:                 Least Squares   F-statistic:                     10.33
Date:                Fri, 23 Feb 2024   Prob (F-statistic):           2.72e-18
Time:                        19:05:10   Log-Likelihood:                 221.68
No. Observations:                 224   AIC:                            -411.4
Df Residuals:                     208   BIC:                            -356.8
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                                                                                                      coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------

In [3]:
# Load the MATLAB file
file_path = '../Data/03_Unit_Spatial_Information.mat'
mat = scipy.io.loadmat(file_path)

# Explore the data structure to understand how to extract the needed data
var = [v for v in list(mat.keys())[3:] if 'Sig' not in v]
print(var)

lens = [len(mat.get(v).flatten()) for v in var]
print(lens)

['spaSubsetTotal', 'spaSubsetStart', 'spaSubsetGoal', 'spaNotSubsetTotal', 'spaNotSubsetStart', 'spaNotSubsetGoal', 'cueSubsetTotal', 'cueSubsetStart', 'cueSubsetGoal', 'cueNotSubsetTotal', 'cueNotSubsetStart', 'cueNotSubsetGoal']
[108, 108, 108, 492, 492, 492, 90, 90, 90, 510, 510, 510]


In [4]:
# Load the MATLAB file
file_path = '../Data/03_Unit_Spatial_Information.mat'
mat = scipy.io.loadmat(file_path)

# Explore the data structure to understand how to extract the needed data
var = [v for v in list(mat.keys())[3:] if 'Sig' not in v]
#print(var)
v01 = mat.get('spaSubsetTotal').flatten()
v02 = mat.get('spaSubsetStart').flatten()
v03 = mat.get('spaSubsetGoal').flatten()
v04 = mat.get('spaNotSubsetTotal').flatten()
v05 = mat.get('spaNotSubsetStart').flatten()
v06 = mat.get('spaNotSubsetGoal').flatten()
v07 = mat.get('cueSubsetTotal').flatten()
v08 = mat.get('cueSubsetStart').flatten()
v09 = mat.get('cueSubsetGoal').flatten()
v10 = mat.get('cueNotSubsetTotal').flatten()
v11 = mat.get('cueNotSubsetStart').flatten()
v12 = mat.get('cueNotSubsetGoal').flatten()

data2 = pd.DataFrame({'vals':np.concatenate((v01,v02,v03,v04,v05,v06,v07,v08,v09,v10,v11,v12),axis=0),
                      'task_type':np.concatenate((108*['spa'],108*['spa'],108*['spa'],
                                                  492*['spa'],492*['spa'],492*['spa'],
                                                  90*['cue'],90*['cue'],90*['cue'],
                                                  510*['cue'],510*['cue'],510*['cue']),axis=0),
                      'ensemble_type':np.concatenate((108*['subs'],108*['subs'],108*['subs'],
                                                      492*['nsubs'],492*['nsubs'],492*['nsubs'],
                                                      90*['subs'],90*['subs'],90*['subs'],
                                                      510*['nsubs'],510*['nsubs'],510*['nsubs']),axis=0),
                      'information_type':np.concatenate((108*['total'],108*['start'],108*['goal'],
                                                         492*['total'],492*['start'],492*['goal'],
                                                         90*['total'],90*['start'],90*['goal'],
                                                         510*['total'],510*['start'],510*['goal']),axis=0)
                     })

data2.head()

Unnamed: 0,vals,task_type,ensemble_type,information_type
0,7.169124,spa,subs,total
1,6.189314,spa,subs,total
2,16.637914,spa,subs,total
3,6.853969,spa,subs,total
4,3.356084,spa,subs,total


In [5]:
model2 = ols('vals ~ C(task_type) + C(ensemble_type) + C(information_type) + \
             C(task_type):C(ensemble_type) + C(task_type):C(information_type) + \
             C(ensemble_type):C(information_type) + \
             C(task_type):C(ensemble_type):C(information_type)', data=data2).fit()
print(model2.summary())
res2 = anova_lm(model2,typ=2)
print(res2)

                            OLS Regression Results                            
Dep. Variable:                   vals   R-squared:                       0.057
Model:                            OLS   Adj. R-squared:                  0.055
Method:                 Least Squares   F-statistic:                     19.89
Date:                Wed, 27 Dec 2023   Prob (F-statistic):           1.75e-39
Time:                        10:03:06   Log-Likelihood:                -8879.9
No. Observations:                3600   AIC:                         1.778e+04
Df Residuals:                    3588   BIC:                         1.786e+04
Df Model:                          11                                         
Covariance Type:            nonrobust                                         
                                                                                coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------

In [6]:
# Load the MATLAB file
file_path = '../Data/04_Normed_PV_Information.mat'
mat = scipy.io.loadmat(file_path)

v01 = mat.get('spaSubsetStartDecStart').flatten()
v02 = mat.get('spaNotSubsetStartDecStart').flatten()
v03 = mat.get('spaSubsetStartDecGoal').flatten()
v04 = mat.get('spaNotSubsetStartDecGoal').flatten()
v05 = mat.get('spaSubsetGoalDecStart').flatten()
v06 = mat.get('spaNotSubsetGoalDecStart').flatten()
v07 = mat.get('spaSubsetGoalDecGoal').flatten()
v08 = mat.get('spaNotSubsetGoalDecGoal').flatten()
v09 = mat.get('cueSubsetStartDecStart').flatten()
v10 = mat.get('cueNotSubsetStartDecStart').flatten()
v11 = mat.get('cueSubsetStartDecGoal').flatten()
v12 = mat.get('cueNotSubsetStartDecGoal').flatten()
v13 = mat.get('cueSubsetGoalDecStart').flatten()
v14 = mat.get('cueNotSubsetGoalDecStart').flatten()
v15 = mat.get('cueSubsetGoalDecGoal').flatten()
v16 = mat.get('cueNotSubsetGoalDecGoal').flatten()

data3 = pd.DataFrame({'vals':np.concatenate((v01,v02,v03,v04,v05,v06,v07,v08,v09,v10,v11,v12,v13,v14,v15,v16),axis=0),
                      'task_type':np.concatenate((758*['spa'],758*['spa'],758*['spa'],758*['spa'],
                                                  758*['spa'],758*['spa'],758*['spa'],758*['spa'],
                                                  827*['cue'],827*['cue'],827*['cue'],827*['cue'],
                                                  827*['cue'],827*['cue'],827*['cue'],827*['cue']),axis=0),
                      'ensemble_type':np.concatenate((758*['subs'],758*['nsub'],758*['subs'],758*['nsub'],
                                                      758*['subs'],758*['nsub'],758*['subs'],758*['nsub'],
                                                      827*['subs'],827*['nsub'],827*['subs'],827*['nsub'],
                                                      827*['subs'],827*['nsub'],827*['subs'],827*['nsub']),axis=0),
                      'actual_locs':np.concatenate((758*['start'],758*['start'],758*['start'],758*['start'],
                                                    758*['goal'],758*['goal'],758*['goal'],758*['goal'],
                                                    827*['start'],827*['start'],827*['start'],827*['start'],
                                                    827*['goal'],827*['goal'],827*['goal'],827*['goal']),axis=0),
                      'predicted_locs':np.concatenate((758*['start'],758*['start'],758*['goal'],758*['goal'],
                                                       758*['start'],758*['start'],758*['goal'],758*['goal'],
                                                       827*['start'],827*['start'],827*['goal'],827*['goal'],
                                                       827*['start'],827*['start'],827*['goal'],827*['goal']),axis=0)
                     })

data3.head()

Unnamed: 0,vals,task_type,ensemble_type,actual_locs,predicted_locs
0,0.662857,spa,subs,start,start
1,0.976455,spa,subs,start,start
2,1.095636,spa,subs,start,start
3,1.133634,spa,subs,start,start
4,0.800119,spa,subs,start,start


In [7]:
model3 = ols('vals ~ C(task_type) + C(ensemble_type) + C(actual_locs) + C(predicted_locs) \
             + C(task_type):C(ensemble_type) + C(task_type):C(actual_locs) + C(task_type):C(predicted_locs) \
             + C(ensemble_type):C(actual_locs) + C(ensemble_type):C(predicted_locs) \
             + C(actual_locs):C(predicted_locs) \
             + C(task_type):C(ensemble_type):C(actual_locs) + C(task_type):C(ensemble_type):C(predicted_locs) \
             + C(task_type):C(actual_locs):C(predicted_locs) \
             + C(ensemble_type):C(actual_locs):C(predicted_locs) \
             + C(task_type):C(ensemble_type):C(actual_locs):C(predicted_locs)', data=data3).fit()
print(model3.summary())
res3 = anova_lm(model3,typ=2)
print(res3)

                            OLS Regression Results                            
Dep. Variable:                   vals   R-squared:                       0.051
Model:                            OLS   Adj. R-squared:                  0.050
Method:                 Least Squares   F-statistic:                     45.62
Date:                Wed, 27 Dec 2023   Prob (F-statistic):          2.13e-132
Time:                        10:03:07   Log-Likelihood:                -16264.
No. Observations:               12680   AIC:                         3.256e+04
Df Residuals:                   12664   BIC:                         3.268e+04
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                                                                                                      coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------