In [1]:

import os, shutil
from glob import glob
import numpy as np
import pandas as pd
from natsort import natsorted
import seaborn as sns
from tqdm import tqdm, trange
import matplotlib.pyplot as plt


In [2]:

in_dir = "out01_tms_induced_respose/"
in_dir2 = "data_subject_info/"

out_dir = "out02_plot_sigals/"
in_file_name = "HRF_tms_induced_response.csv"

if not os.path.exists(out_dir):
    os.mkdir(out_dir)
    
complete_df = pd.read_csv(in_dir + in_file_name)

subject_info = pd.read_csv(in_dir2 + "age_gender_edu.txt", sep = ',')
intensity_suds = pd.read_csv(in_dir2 + "intensity_suds.csv", sep = ',')
scalp_dist = pd.read_csv(in_dir2 + "dist_to_scalp.csv", sep = ',')
print(list(scalp_dist['site'].drop_duplicates()))


['L-FP', 'R-FP', 'L-aMFG', 'R-aMFG', 'L-pMFG', 'R-pMFG', 'R-IFJ', 'R-FEF', 'R-M1', 'R-preSMA', 'R-IPL']


In [3]:
# scalp_dist['site'] = scalp_dist['site'].str.replace('-', '_')
# df['range'].str.replace(',','-')

tms_sites = ["L_Fp","R_Fp","L_aMFG","R_aMFG","L_pMFG","R_pMFG","R_IFJ","R_FEF","R_M1","R_preSMA","R_IPL"]
scalp_dist['site'].replace(list(scalp_dist['site'].drop_duplicates()), tms_sites, inplace = True)

# reformat columns in intensity_suds:
intensity_suds = pd.melt(intensity_suds, id_vars=['idall', 'MT', 'intensity'], value_vars = tms_sites,
                         var_name = 'suds_site', value_name = 'suds')

data = subject_info.merge(complete_df, left_on = 'cc_post_intake_id', right_on = 'subject')
data = data.merge(intensity_suds, how = 'left', 
                  left_on = ['cc_post_intake_id', 'site'], right_on = ['idall', 'suds_site'])

data = data.merge(scalp_dist, how = 'left', 
                  left_on = ['subject', 'site'], right_on = ['subject', 'site'])

data.drop(labels = ['inputfile', 'idall', 'cc_post_intake_id', 'suds_site'], axis = 1, inplace = True)
# data.replace({'gender': {1: "male", 2: "female"}}, inplace = True)
# data.replace({'gender': {'male': 1, 'female': 2}}, inplace = True)

data


Unnamed: 0,gender,age,yrs_of_edu,subject,site,group,tms_site_response_standard,tms_site_response_individual_6mm,tms_site_response_individual_10mm,tms_site_response_individual_14mm,tms_site_response_individual_14-10mm,tms_site_response_individual_10-6mm,MT,intensity,suds,scalp_dist
0,2,45,18,1001,L_Fp,NTHC,0.287125,0.439684,0.456288,0.395807,0.354767,0.462649,62.0,74.0,10.0,14.749354
1,2,45,18,1001,L_pMFG,NTHC,-0.358279,-0.463813,-0.439631,-0.329946,-0.264645,-0.431570,62.0,74.0,5.0,13.038824
2,2,45,18,1001,R_FEF,NTHC,-0.252532,-0.423779,-0.511036,-0.496599,-0.487678,-0.540984,62.0,74.0,2.0,17.079627
3,2,45,18,1001,R_Fp,NTHC,0.245202,0.278052,0.265166,0.243850,0.230372,0.260594,62.0,74.0,7.0,16.614293
4,2,45,18,1001,R_M1,NTHC,-1.002989,-1.403756,-1.606550,-1.550770,-1.520768,-1.663231,62.0,74.0,1.0,15.276347
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
787,1,22,14,2108,R_IPL,TEHC,1.236482,0.391417,1.001566,1.383794,1.611196,1.196951,,100.0,5.0,14.361543
788,1,22,14,2108,R_M1,TEHC,0.124382,0.233850,0.166974,0.150504,0.140419,0.144036,,100.0,15.0,18.975313
789,1,22,14,2108,R_aMFG,TEHC,-0.279814,-0.045608,-0.032161,-0.082834,-0.115168,-0.027050,,100.0,20.0,13.567683
790,1,22,14,2108,R_pMFG,TEHC,0.363362,-0.020677,0.119496,0.184963,0.224639,0.165267,,100.0,20.0,14.366030


In [44]:

## correlation matrix:

d = data.loc[:, ['gender', 'age', 'yrs_of_edu', 'MT', 'intensity', 'suds', 'scalp_dist']]
print(d.count())
d.corr()


gender        792
age           792
yrs_of_edu    792
MT            581
intensity     792
suds          771
scalp_dist    689
dtype: int64


Unnamed: 0,gender,age,yrs_of_edu,MT,intensity,suds,scalp_dist
gender,1.0,0.100431,0.030555,0.026028,-0.005011,-0.027004,0.086767
age,0.100431,1.0,0.43042,-0.219282,-0.164505,0.079981,0.078526
yrs_of_edu,0.030555,0.43042,1.0,-0.085059,-0.087906,-0.001968,0.044035
MT,0.026028,-0.219282,-0.085059,1.0,0.999374,0.011761,0.134476
intensity,-0.005011,-0.164505,-0.087906,0.999374,1.0,0.114597,0.199412
suds,-0.027004,0.079981,-0.001968,0.011761,0.114597,1.0,-0.125881
scalp_dist,0.086767,0.078526,0.044035,0.134476,0.199412,-0.125881,1.0


In [14]:
import statsmodels
statsmodels.__version__

'0.13.5'

In [48]:
import statsmodels.api as sm

# backward selection

test_variables = ['tms_site_response_standard', 'tms_site_response_individual_6mm',
                  'tms_site_response_individual_10mm', 'tms_site_response_individual_14mm', 
                  'tms_site_response_individual_14-10mm', 'tms_site_response_individual_10-6mm']

roi_list = data.site.unique()
feature = ['gender', 'age', 'yrs_of_edu', 'intensity', 'suds', 'scalp_dist']

row_index = pd.MultiIndex.from_tuples([(i , j) for i in test_variables for j in feature])
result = pd.DataFrame(index = row_index, columns = tms_sites)

for var in test_variables:    
    for roi in roi_list:
        print(var)
        print(roi)
        
        X = data.loc[(data['site'] == roi) & (~data[var].isna()), feature]
        y = data.loc[(data['site'] == roi) & (~data[var].isna()), [var]]

        if len(y) < 20: continue
            
        # remove nan in X:
        row_nan = X.isna().any(axis=1)
        X = sm.add_constant(X.loc[~row_nan,:]) # adding a constant

        model = sm.OLS(y.loc[~row_nan], X).fit()
        predictions = model.predict(X) 

        print(model.summary())
        for i, f in enumerate(feature):
            result.loc[(var, f), roi] = model.pvalues[i]
        


tms_site_response_standard
L_Fp
                                OLS Regression Results                                
Dep. Variable:     tms_site_response_standard   R-squared:                       0.112
Model:                                    OLS   Adj. R-squared:                 -0.007
Method:                         Least Squares   F-statistic:                    0.9437
Date:                        Thu, 15 Dec 2022   Prob (F-statistic):              0.474
Time:                                12:37:42   Log-Likelihood:                -48.721
No. Observations:                          52   AIC:                             111.4
Df Residuals:                              45   BIC:                             125.1
Df Model:                                   6                                         
Covariance Type:                    nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------

                                   OLS Regression Results                                   
Dep. Variable:     tms_site_response_individual_6mm   R-squared:                       0.036
Model:                                          OLS   Adj. R-squared:                 -0.102
Method:                               Least Squares   F-statistic:                    0.2609
Date:                              Thu, 15 Dec 2022   Prob (F-statistic):              0.952
Time:                                      12:37:42   Log-Likelihood:                -64.360
No. Observations:                                49   AIC:                             142.7
Df Residuals:                                    42   BIC:                             156.0
Df Model:                                         6                                         
Covariance Type:                          nonrobust                                         
                 coef    std err          t      P>|t|      [0.025    

strong multicollinearity or other numerical problems.
tms_site_response_individual_10mm
L_aMFG
                                    OLS Regression Results                                   
Dep. Variable:     tms_site_response_individual_10mm   R-squared:                       0.248
Model:                                           OLS   Adj. R-squared:                  0.174
Method:                                Least Squares   F-statistic:                     3.360
Date:                               Thu, 15 Dec 2022   Prob (F-statistic):            0.00630
Time:                                       12:37:42   Log-Likelihood:                -50.949
No. Observations:                                 68   AIC:                             115.9
Df Residuals:                                     61   BIC:                             131.4
Df Model:                                          6                                         
Covariance Type:                           nonrobust       

strong multicollinearity or other numerical problems.
tms_site_response_individual_14-10mm
L_Fp
                                     OLS Regression Results                                     
Dep. Variable:     tms_site_response_individual_14-10mm   R-squared:                       0.225
Model:                                              OLS   Adj. R-squared:                  0.122
Method:                                   Least Squares   F-statistic:                     2.182
Date:                                  Thu, 15 Dec 2022   Prob (F-statistic):             0.0623
Time:                                          12:37:42   Log-Likelihood:                -38.711
No. Observations:                                    52   AIC:                             91.42
Df Residuals:                                        45   BIC:                             105.1
Df Model:                                             6                                         
Covariance Type:               

strong multicollinearity or other numerical problems.
tms_site_response_individual_10-6mm
R_M1
                                     OLS Regression Results                                    
Dep. Variable:     tms_site_response_individual_10-6mm   R-squared:                       0.045
Model:                                             OLS   Adj. R-squared:                 -0.052
Method:                                  Least Squares   F-statistic:                    0.4682
Date:                                 Thu, 15 Dec 2022   Prob (F-statistic):              0.829
Time:                                         12:37:42   Log-Likelihood:                -77.912
No. Observations:                                   66   AIC:                             169.8
Df Residuals:                                       59   BIC:                             185.2
Df Model:                                            6                                         
Covariance Type:                         

In [46]:

def format_table(report):
    report2 = report.copy()
    report2[report.gt(0)] = report2[report.gt(0)].astype(str).apply(lambda x : x.str[:5])
    
    report2[report.le(0.05)] = \
    report2[report.le(0.05)].apply(lambda x : x.str[:5]).add('*')

    report2[report.le(0.01)] = \
    report2[report.le(0.01)].apply(lambda x : x.str[:5]).add('**')
        
    return report2

result2 = format_table(result)
result2.to_csv(out_dir + "multiple_regression_pvalues.csv")
result2


Unnamed: 0,Unnamed: 1,L_Fp,R_Fp,L_aMFG,R_aMFG,L_pMFG,R_pMFG,R_IFJ,R_FEF,R_M1,R_preSMA,R_IPL
tms_site_response_standard,gender,0.076,0.873,0.498,0.051,0.59,0.761,0.107,0.362,0.538,0.89,0.148
tms_site_response_standard,age,0.404,0.533,0.009**,0.273,0.721,0.118,0.740,0.803,0.839,0.263,0.714
tms_site_response_standard,yrs_of_edu,0.696,0.737,0.088,0.337,0.716,0.307,0.901,0.962,0.48,0.234,0.674
tms_site_response_standard,intensity,0.290,0.708,0.249,0.247,0.535,0.454,0.696,0.075,0.172,0.478,0.911
tms_site_response_standard,suds,0.302,0.916,0.155,0.773,0.551,0.994,0.079,0.47,0.886,0.356,0.055
tms_site_response_standard,scalp_dist,0.314,0.799,0.775,0.714,0.866,0.739,0.225,0.928,0.945,0.807,0.172
tms_site_response_individual_6mm,gender,0.025*,0.97,0.269,0.904,0.551,0.938,0.397,0.606,0.365,0.314,0.020*
tms_site_response_individual_6mm,age,0.546,0.93,0.145,0.889,0.767,0.577,0.468,0.691,0.78,0.099,0.487
tms_site_response_individual_6mm,yrs_of_edu,0.155,0.347,0.028*,0.512,0.833,0.714,0.359,0.706,0.101,0.275,0.358
tms_site_response_individual_6mm,intensity,0.258,0.993,0.261,0.903,0.269,0.749,0.425,0.412,0.091,0.762,0.580
