In [5]:
import pandas as pd
import numpy as np
from sklearn import datasets, linear_model

scatter_data_ridge = pd.read_csv("result/scatter_data_ridge.csv", sep = ",", header = 0, )
scatter_data_svr = pd.read_csv("result/scatter_data_svr.csv", sep = ",", header = 0, )
scatter_data_gpr = pd.read_csv("result/scatter_data_gpr.csv", sep = ",", header = 0, )
scatter_data_dnn = pd.read_csv("result/scatter_data_dnn.csv", sep = ",", header = 0, index_col = 0)

scatter_data_list = list((scatter_data_ridge, scatter_data_svr, scatter_data_gpr, scatter_data_dnn))

In [6]:
# add gender to scatter_data so that we can make scatter plot for female and male separately:
subject_behav = pd.read_csv("data/pnc_cognitive_response.csv", delimiter = ",")
subject_info = subject_behav.loc[:, ['SUBJID', 'Sex']]

method_list = ['ridge', 'svr', 'gpr', 'dnn']
for i in range(len(scatter_data_list)):
    scatter_data = scatter_data_list[i]
    scatter_data_gender = scatter_data.merge(subject_info, left_on = 'SUBJID', right_on = 'SUBJID')
    scatter_data_gender.set_index( 'feature', inplace = True)
    scatter_data_gender.sort_index(inplace = True)
    scatter_data_gender2 = scatter_data_gender.replace({'Sex': {'F': 0, 'M': 1}}, inplace = False)
    #scatter_data_gender['brain age corr'] = np.nan
    
    feature_list = scatter_data_gender2.index.unique(level = 0)
    regr = linear_model.LinearRegression()
    
    for i_feature in range(len(feature_list)):
        feature_name = feature_list[i_feature]
        scatter_data_feature = scatter_data_gender2.loc[feature_name,:]
        
        y_train = scatter_data_feature.loc[:,'brain age'].values
        
        ####### fit model with sex:
        X_train = np.hstack((scatter_data_feature.loc[:,['Sex', 'chronological age']].values,
                             np.vstack((scatter_data_feature.loc[:,'chronological age'].values**2))))
        
        # Train the model using the training sets
        regr.fit(X_train, y_train)

        # Make predictions using the testing set
        y_pred = regr.predict(X_train)
        
        scatter_data_gender.at[feature_name, 'brain age fit sex'] = y_pred
        
        
        ####### fit model with sex and interaction terms:
        X_train = np.hstack((X_train, np.vstack((X_train[:,0]*X_train[:,1])), np.vstack((X_train[:,0]*X_train[:,2]))))

        # Train the model using the training sets
        regr.fit(X_train, y_train)

        # Make predictions using the testing set
        y_pred = regr.predict(X_train)
        
        scatter_data_gender.at[feature_name, 'brain age fit intersex'] = y_pred
        
        
        ######## fit model without sex:
        X_train = np.transpose(np.vstack((scatter_data_feature.loc[:,'chronological age'].values,
                                          scatter_data_feature.loc[:,'chronological age'].values**2)))

        regr.fit(X_train, y_train)

        # Make predictions using the testing set
        y_pred = regr.predict(X_train)
        
        scatter_data_gender.at[feature_name, 'brain age fit'] = y_pred
    
    scatter_data_gender.to_csv('result/allsubjects/out02_scatter_data_gender_'+ method_list[i] + '.csv')