In [1]:
import pandas as pd
import numpy as np
from sklearn import datasets, linear_model

scatter_data_hc_ridge = pd.read_csv("result/healthy_controls/scatter_data_ridge.csv", sep = ",", header = 0, )
scatter_data_hc_svr = pd.read_csv("result/healthy_controls/scatter_data_svr.csv", sep = ",", header = 0, )
scatter_data_hc_gpr = pd.read_csv("result/healthy_controls/scatter_data_gpr.csv", sep = ",", header = 0, )
scatter_data_hc_dnn = pd.read_csv("result/healthy_controls/scatter_data_dnn.csv", sep = ",", header = 0, )


# feature_name = 'Multi-modal'
# feature_name = 'GMV&DTI'
feature_name = 'GMV&rsfMRI'

scatter_data_ridge = pd.read_csv("result/disorders/age_prediction_ridge_"+feature_name+".csv", sep = ",", header = 0, )
scatter_data_svr = pd.read_csv("result/disorders/age_prediction_svr_"+feature_name+".csv", sep = ",", header = 0, )
scatter_data_gpr = pd.read_csv("result/disorders/age_prediction_gpr_"+feature_name+".csv", sep = ",", header = 0, )
scatter_data_dnn = pd.read_csv("result/disorders/age_prediction_dnn_"+feature_name+".csv", sep = ",", header = 0, )




In [2]:
def combine_data(hc_list, disorder_list):
    # combine multi-modal hc data to disorder data:
    out_list = []
    
    for i in range(len(hc_list)):
        hc_data = hc_list[i]
        dis_data = disorder_list[i]
    
        hc_data = hc_data.loc[hc_data['feature']=='Multi-modal',:]
        hc_data.loc[:, 'group'] = 'HC'
        hc_data.drop(columns = ['feature', 'CV'], inplace = True)
        hc_data.columns = ['SUBJID', 'age', 'predict age', 'group']
        hc_data = hc_data[['group', 'SUBJID', 'age', 'predict age']]
        
        dis_data.columns = ['group', 'SUBJID', 'age', 'predict age']
        
        out_data = pd.concat([hc_data, dis_data], axis = 0)
        out_list.append(out_data)
    
    return(out_list)

scatter_data_list_dis = list((scatter_data_ridge, scatter_data_svr, scatter_data_gpr, scatter_data_dnn))
scatter_data_list_hc = list((scatter_data_hc_ridge, scatter_data_hc_svr, scatter_data_hc_gpr, scatter_data_hc_dnn))

scatter_data_list = combine_data(scatter_data_list_hc, scatter_data_list_dis)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [3]:
# add gender to scatter_data so that we can make scatter plot for female and male separately:
subject_behav = pd.read_csv("data/pnc_cognitive_response.csv", delimiter = ",")
subject_info = subject_behav.loc[:, ['SUBJID', 'Sex']]

method_list = ['ridge', 'svr', 'gpr', 'dnn']

for i_method in range(len(scatter_data_list)):
    scatter_data = scatter_data_list[i_method]
    scatter_data_gender = scatter_data.merge(subject_info, left_on = 'SUBJID', right_on = 'SUBJID')
    scatter_data_gender.set_index( 'group', inplace = True)
    scatter_data_gender.sort_index(inplace = True)
    scatter_data_gender2 = scatter_data_gender.replace({'Sex': {'F': 0, 'M': 1}}, inplace = False)
    #scatter_data_gender['brain age corr'] = np.nan
    
    scatter_data_hc=scatter_data_gender2.loc['HC',:]
    # Train the model using the HC data:
    X_train = np.hstack((scatter_data_hc.loc[:,['Sex', 'age']].values,
                     np.vstack((scatter_data_hc.loc[:,'age'].values**2))))
    y_train = scatter_data_hc.loc[:,'predict age'].values

    regr = linear_model.LinearRegression()
    regr.fit(X_train, y_train)
    
    dis_list = scatter_data_gender2.index.unique(level = 0)
    
    for i_dis in range(len(dis_list)):
        
        dis_name = dis_list[i_dis]
        scatter_data_dis = scatter_data_gender2.loc[dis_name,:]
        X_test = np.hstack((scatter_data_dis.loc[:,['Sex', 'age']].values,
                             np.vstack((scatter_data_dis.loc[:,'age'].values**2))))
        y_test = scatter_data_dis.loc[:,'predict age'].values
        # Make predictions using the disorder data set
        y_pred = regr.predict(X_test)
        
        scatter_data_gender.at[dis_name, 'brain age fit'] = y_pred
    
    scatter_data_gender.to_csv('result/disorders/out02_scatter_data_'+feature_name+'_fit_brainage_'+ method_list[i_method] + '.csv')