In [13]:
import os
import glob
import numpy as np
import pandas as pd
import statsmodels.api as sm

from sklearn.preprocessing import MinMaxScaler
import pingouin as pg
import numpy as np

In [224]:
homedir = '/home/raghuram/Desktop/radiomics/TEXTURES/'
os.chdir(homedir)

In [15]:
t1ce_file = 'expt_t1ce.csv'
t1w_file = 'expt_t1w.csv'
t2f_file = 'expt_t2f.csv'
t2w_file = 'expt_t2w.csv'

In [151]:
def pre_process_dataframe(csv_file_name, experiment_number):
    # Binarize magnetization strength values
    # Scanner names and manufacturer are categorical 
    # One hot encode them
    
    # Code for the above here
    # Experiments in ascending order
    
    experiment_df = pd.read_csv(csv_file_name)
    experiment_df = experiment_df[experiment_df['experiment_number'] == experiment_number]
    fill_values = {'mag_field_strength':1.5}
    experiment_df.fillna(value=fill_values, inplace=True)
    experiment_df['mag_field_strength_binarized'] = (experiment_df['mag_field_strength']>=1.5).astype(int)
    experiment_df.drop(columns=[ 'Tumor','experiment_number', 'scale', 'algo', 'ng', 'flip_angle', 
                                'VOLUME_ET', 'VOLUME_NET', 'VOLUME_ED', 'VOLUME_TC',
                               'VOLUME_BRAIN', 'mag_field_strength'], inplace=True)
    
    experiment_df.dropna(subset=['VOLUME_WT'], inplace=True)
    experiment_df = pd.get_dummies(experiment_df, columns=['scanner_manufacturer', 'scanner_model'], drop_first=True)
    radiomics_features  = list(experiment_df.columns)[:42]

    radiomics_df = experiment_df[radiomics_features]
    scaled_df = experiment_df.drop(columns=radiomics_features)
    print(list(scaled_df.columns))
    scaled_df = scaled_df - scaled_df.min()/(scaled_df.max()-scaled_df.min())
    
    return (scaled_df, radiomics_df)
        


In [152]:
def results_summary_to_dataframe(results,response_variable):
    '''take the result of an statsmodel results table and transforms it into a dataframe'''
    pvals = results.pvalues
    coeff = results.params
    ci_bounds = results.conf_int()
 
    results_df = pd.DataFrame({"pvals":pvals,
                               "coeff":coeff,
                               "response_variable":response_variable,
                              })
    #Reordering...
    results_df = results_df[["coeff","pvals", "response_variable"]]
    return results_df

In [203]:
def linear_regression(results_folder, radiomic_df, scaled_df, experiment_number):
    # Load the data corresponding to the sequence argument here
    # Perform regression with Tr and Te 
    
    radiomic_features = list(radiomic_df.columns)
    y = np.array(radiomic_df)
    X = np.array(scaled_df)
    X = sm.add_constant(X)
    result_list = []
    for idx, column in enumerate(radiomic_features):
        
        model = sm.OLS(y[:, idx], X)
        results = model.fit()
        results_df = results_summary_to_dataframe(results, column)
#         results_df['significant'] = (results_df['pvals']<0.05).astype(bool)
        result_list.append(results_df)
       
    pd.concat(result_list).to_csv(os.path.join(results_folder, str(experiment_number)+'_'+'t1w.csv'), index=False)

In [225]:
results_folder = '/home/raghuram/Desktop/radiomics/TEXTURES/results/t1w/linear_regression'
for expt_number in range(1, 26):
    if expt_number > 1:
        break
    scaled_df, radiomics_df = pre_process_dataframe(t1w_file, expt_number)
    linear_regression(results_folder, radiomics_df, scaled_df, expt_number) 
    print('Finished experiment {}'.format(expt_number))

['repetition_time', 'excitation_time', 'VOLUME_WT', 'IDH1', '1p_19q_co_del_status', 'mag_field_strength_binarized', 'scanner_manufacturer_Philips', 'scanner_manufacturer_Siemens', 'scanner_model_Avanto', 'scanner_model_GENESIS', 'scanner_model_GENESIS_SIGNA', 'scanner_model_Ingenia', 'scanner_model_Intera', 'scanner_model_Intera Achieva', 'scanner_model_MAGNETOM VISION', 'scanner_model_SIGNA EXCITE', 'scanner_model_Signa HDxt', 'scanner_model_Symphony', 'scanner_model_Verio']
Finished experiment 1


In [227]:
results_folder = '/home/raghuram/Desktop/radiomics/TEXTURES/results/t1w/linear_regression'
os.chdir(results_folder)

In [196]:
csv_file_list = sorted(glob.glob('*.csv'), key=lambda _: int(_.split('_')[0]))

In [208]:
input_variables = ['intercept','repetition_time', 'excitation_time', 'VOLUME_WT', 'IDH1', '1p_19q_co_del_status', 'mag_field_strength_binarized', 'scanner_manufacturer_Philips', 'scanner_manufacturer_Siemens', 'scanner_model_Avanto', 'scanner_model_GENESIS', 'scanner_model_GENESIS_SIGNA', 'scanner_model_Ingenia', 'scanner_model_Intera', 'scanner_model_Intera Achieva', 'scanner_model_MAGNETOM VISION', 'scanner_model_SIGNA EXCITE', 'scanner_model_Signa HDxt', 'scanner_model_Symphony', 'scanner_model_Verio']
input_columns = input_variables*42
# for idx, csv_file in enumerate(csv_file_list):
#     df = pd.read_csv(csv_file)
# #     df['signifcant'] = (df['pvals']<0.05).astype(bool)
# #     df['input_variable'] = input_columns
#     df.set_index('response_variable', inplace=True)
#     if idx != 0:
#         df.drop(columns=['Unnamed: 0'], inplace=True)
#     df.to_csv(csv_file, index=False)
#     break

In [249]:
df = pd.read_csv('1_t1w.csv')
# df.drop(columns=['Unnamed: 0'], inplace=True)
# df['signifcant'] = (df['pvals']<0.05).astype(bool)
# df['input_variable'] = input_columns
# df = df.set_index(['response_variable', 'input_variable'], inplace=False)

# if idx != 0:
#     df.drop(columns=['Unnamed: 0'], inplace=True)
# df.to_csv(csv_file, index=False)


In [250]:
df = df.pivot_table(index='response_variable', columns='input_variable', values=['coeff', 'pvals'])

In [251]:
df

Unnamed: 0_level_0,coeff,coeff,coeff,coeff,coeff,coeff,coeff,coeff,coeff,coeff,...,pvals,pvals,pvals,pvals,pvals,pvals,pvals,pvals,pvals,pvals
input_variable,1p_19q_co_del_status,IDH1,VOLUME_WT,excitation_time,intercept,mag_field_strength_binarized,repetition_time,scanner_manufacturer_Philips,scanner_manufacturer_Siemens,scanner_model_Avanto,...,scanner_model_GENESIS,scanner_model_GENESIS_SIGNA,scanner_model_Ingenia,scanner_model_Intera,scanner_model_Intera Achieva,scanner_model_MAGNETOM VISION,scanner_model_SIGNA EXCITE,scanner_model_Signa HDxt,scanner_model_Symphony,scanner_model_Verio
response_variable,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
GLCM_Contrast,0.07085538,-0.2542854,-4.846333e-07,0.01485449,0.4609472,0.2078627,8.739952e-05,0.1284752,-0.0229739,0.1867824,...,0.01386879,0.4979679,0.003239545,0.221489,0.790689,0.006989047,0.584033,0.9226734,0.8527125,0.02771255
GLCM_Correlation,-0.007198097,0.02553227,5.324581e-08,-0.001527719,0.5455483,0.2978584,-8.438402e-06,0.07754706,0.1384143,-0.06330036,...,1.30995e-09,1.27372e-07,0.002719704,0.215962,0.787933,2.815461e-36,3.0394140000000004e-23,5.404075e-12,0.000626738,0.8971288
GLCM_Dissimilarity,0.04176178,-0.1066813,-2.607308e-07,0.007409831,0.3049122,0.1269329,4.639235e-05,0.07735151,0.003322462,0.07956827,...,0.001736428,0.5790472,0.001606814,0.22374,0.917637,4.68243e-05,0.1855073,0.7344648,0.8142722,0.003313342
GLCM_Energy,-0.003484085,0.006475999,2.128904e-08,-0.0006162125,0.03233604,0.02180425,-3.851873e-06,0.001562435,0.01302096,-0.01152166,...,0.3700873,0.03791165,0.0002851604,0.276048,0.924666,0.000293787,0.002120921,0.08483917,0.03566467,0.0004338931
GLCM_Entropy,0.08379105,-0.1821764,-4.736099e-07,0.01469876,2.698455,1.365586,9.524139e-05,0.4899866,0.480069,0.02481824,...,1.167627e-10,0.003942738,0.0007647218,0.27811,0.984057,4.3785719999999997e-26,2.997087e-12,6.207195e-05,0.2938292,9.137368e-06
GLCM_Homogeneity,-0.01672435,0.03545217,1.030848e-07,-0.002799566,0.4588201,0.2646963,-1.763184e-05,0.05797339,0.1308365,-0.0732581,...,0.02143333,0.0004905776,0.0009434628,0.2451,0.992631,3.52657e-20,9.345642e-12,2.239441e-05,0.006119383,0.03931142
GLCM_SumAverage,0.000449046,-0.0002023719,4.252169e-10,6.080742e-05,0.04093191,0.02222388,-1.511939e-07,0.006353599,0.00955529,-0.003538923,...,4.472046e-32,7.132953000000001e-27,0.2021391,0.247258,0.061081,3.001253e-63,3.019551e-48,2.538327e-29,1.613389e-13,3.004874e-06
GLCM_Variance,-0.0002352361,0.0005795289,5.052522e-09,-8.206712e-05,0.04721894,0.02566724,-1.605593e-08,0.006556839,0.01078225,-0.003511841,...,1.548052e-24,2.220367e-22,0.0238844,0.496356,0.680529,8.646872e-56,7.571256000000001e-40,5.574816e-26,1.140783e-11,0.0001244505
GLRLM_GLN,-7057.042,26842.24,0.01625822,-2791.607,14031.37,11013.27,-16.26272,-4496.649,3833.752,12169.8,...,0.9404818,0.7352253,5.304112e-10,0.940929,0.83956,0.8949133,0.6323935,0.9327431,0.9937745,0.8136372
GLRLM_GLV,0.0001126098,-0.0007943794,-4.744221e-09,3.734086e-05,0.001448756,0.0006781129,1.789296e-07,0.0006559305,7.754332e-05,0.0005524015,...,0.01764011,0.7270477,0.003226713,0.516083,0.744033,0.003192339,0.9166714,0.8893325,0.9159421,0.02086922


In [256]:
df.xs('Global_Kurtosis')

       input_variable               
coeff  1p_19q_co_del_status             5.843376e-01
       IDH1                            -7.163675e-01
       VOLUME_WT                        2.558214e-08
       excitation_time                  1.221579e-01
       intercept                       -1.825431e-02
       mag_field_strength_binarized     3.492502e-02
       repetition_time                  4.910637e-05
       scanner_manufacturer_Philips     6.870013e-01
       scanner_manufacturer_Siemens    -2.985075e-01
       scanner_model_Avanto             5.109446e-01
       scanner_model_GENESIS           -7.743108e-01
       scanner_model_GENESIS_SIGNA      3.690463e-01
       scanner_model_Ingenia           -2.455002e+00
       scanner_model_Intera            -3.749239e-01
       scanner_model_Intera Achieva    -7.752709e-01
       scanner_model_MAGNETOM VISION   -5.317932e-02
       scanner_model_SIGNA EXCITE      -3.131971e-01
       scanner_model_Signa HDxt         3.117133e-01
       sc