In [1]:
import numpy as np
from os import path
import pandas as pd
import pickle

from dimensional_structure.EFA_plots import get_communality, get_adjusted_communality
from selfregulation.utils.plot_utils import format_num
from selfregulation.utils.r_to_py_utils import get_attr
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.utils import get_recent_dataset, get_retest_data


Using TensorFlow backend.
  _nan_object_mask = _nan_object_array != _nan_object_array
  from pandas.core import datetools
  reference_scores = pandas.DataFrame.from_csv(os.path.join(file_loc,'survey_subscale_reference.csv'))


In [2]:
# load results
results = load_results(get_recent_dataset())

In [3]:
def print_result(s, survey_val, task_val):
    num1 = format_num(survey_val, 3)
    num2 = format_num(task_val, 3)
    print('%s:\nsurvey: %s\ntask: %s' % (s, num1, num2))

# Main Paper Numeric Results

## EFA bootstrapped factor reliability

In [4]:
survey_boot_sds = results['survey'].EFA.get_boot_stats()['sds'].mean().mean()
task_boot_sds = results['task'].EFA.get_boot_stats()['sds'].mean().mean()

print_result('Loading Average SDs across bootstrap samples', survey_boot_sds, task_boot_sds)

# of components not specified, using BIC determined #
# of components not specified, using BIC determined #
Loading Average SDs across bootstrap samples:
survey: 0.016
task: 0.016


## Variance Expalined by EFA models

Get the variance explained for each factor analytic model. Extracted from the fa function from R's Psych package. Equivalent to taking the mean of individual variable communalities.

In [5]:
# get variance explained by survey and task EFA
survey_EFA_rout = results['survey'].EFA.results['factor_tree_Rout_oblimin'][12]
survey_EFA_cummvar = get_attr(survey_EFA_rout, 'Vaccounted')[2,-1]

task_EFA_rout = results['task'].EFA.results['factor_tree_Rout_oblimin'][5]
task_EFA_cummvar = get_attr(task_EFA_rout, 'Vaccounted')[2,-1]

print_result('Variance Expalined', survey_EFA_cummvar, task_EFA_cummvar)

Variance Expalined:
survey: 0.572
task: 0.237


"Adjusted" variable explained taking into account the reliability of each measure. We first drop out variables with very low reliability (<.2) before conducting this analysis as they can have extreme effects on the communality of a variable. 

To ensure that the effect of adjustment isn't explained by dropping out these variables we show the variance explained for the reliable measure subsets. It is clear that subsetting is now meaningfully changing the variance explained values for surveys or tasks.

In [6]:
def return_adjusted(results):
    communality = get_communality(results.EFA)
    retest_data = get_retest_data(dataset=results.dataset.replace('Complete','Retest'))
    # reorder data in line with communality
    retest_data = retest_data.loc[communality.index]
    adjusted, *_ = get_adjusted_communality(communality, retest_data)
    return adjusted

survey_adjusted = return_adjusted(results['survey'])
task_adjusted = return_adjusted(results['task'])
print_result('Adjusted Variance Expalined', survey_adjusted.mean(), task_adjusted.mean())

# also calculate the unadjusted communality for the remaining variables
unadjusted_survey = get_communality(results['survey'].EFA)[survey_adjusted.index].mean()
unadjusted_task = get_communality(results['task'].EFA)[task_adjusted.index].mean()
print_result('Unadjusted Variance Expalined for reliable subset', 
             unadjusted_survey, unadjusted_task)

Adjusted Variance Expalined:
survey: 0.854
task: 0.684
Unadjusted Variance Expalined for reliable subset:
survey: 0.572
task: 0.273


## Factor Correlations

In [7]:
results['survey'].EFA.get_scores().corr()

# of components not specified, using BIC determined #


Unnamed: 0,Sensation Seeking,Mindfulness,Impulsivity,Emotional Control,Goal-Directedness,Reward Sensitivity,Risk Perception,Eating Control,Ethical Risk-Taking,Social Risk-Taking,Financial Risk-Taking,Agreeableness
Sensation Seeking,1.0,-0.0769,0.177107,0.163535,-0.047678,0.319966,-0.182242,0.016,0.232938,0.29242,0.304155,-0.160819
Mindfulness,-0.0769,1.0,-0.318716,0.463437,0.435908,0.074991,0.09395,-0.451621,-0.27291,0.043419,-0.105012,0.227789
Impulsivity,0.177107,-0.318716,1.0,-0.114203,-0.399204,0.233959,-0.144583,0.184118,0.133801,0.050341,0.153207,-0.154823
Emotional Control,0.163535,0.463437,-0.114203,1.0,0.286664,0.122571,-0.082857,-0.371895,-0.106479,0.06941,0.099854,0.080508
Goal-Directedness,-0.047678,0.435908,-0.399204,0.286664,1.0,0.079591,0.143923,-0.204944,-0.121244,0.002166,-0.021007,0.146247
Reward Sensitivity,0.319966,0.074991,0.233959,0.122571,0.079591,1.0,0.088095,0.071221,0.050741,0.213569,0.148853,0.007211
Risk Perception,-0.182242,0.09395,-0.144583,-0.082857,0.143923,0.088095,1.0,-0.045313,-0.129149,-0.065739,-0.227738,0.141915
Eating Control,0.016,-0.451621,0.184118,-0.371895,-0.204944,0.071221,-0.045313,1.0,0.243305,-0.070266,0.073621,-0.134568
Ethical Risk-Taking,0.232938,-0.27291,0.133801,-0.106479,-0.121244,0.050741,-0.129149,0.243305,1.0,0.037039,0.274195,-0.214702
Social Risk-Taking,0.29242,0.043419,0.050341,0.06941,0.002166,0.213569,-0.065739,-0.070266,0.037039,1.0,0.072165,-0.01438


In [8]:
results['task'].EFA.get_scores().corr()

# of components not specified, using BIC determined #


Unnamed: 0,Speeded IP,Strategic IP,Discounting,Perc / Resp,Caution
Speeded IP,1.0,0.292228,-0.10815,-0.120801,0.174412
Strategic IP,0.292228,1.0,-0.189523,-0.144948,-0.008188
Discounting,-0.10815,-0.189523,1.0,0.019427,-0.01296
Perc / Resp,-0.120801,-0.144948,0.019427,1.0,0.126365
Caution,0.174412,-0.008188,-0.01296,0.126365,1.0


## Prediction Results



In [52]:
def get_pred_summary(predictions, scores='scores_cv'):
    R2s = []
    for k,v in predictions.items():
        R2s.append(v[scores][0]['R2'])
    return np.mean(R2s), np.min(R2s), np.max(R2s), np.array(R2s)



Prediction Performance:
survey: ['0.100', '0.036', '0.285']
task: ['0.012', '0.002', '0.042']


In [61]:
for classifier in ['ridge', 'svm']:
    for EFA in [True, False]:
        EFA_tag = 'Ontology' if EFA else 'Raw'
        print('*'*30, EFA_tag, classifier, '*'*30)
        # Load the prediction resultings using ridge regression with EFA factors as predictors
        survey_prediction_results = results['survey'].load_prediction_object(classifier=classifier, EFA=EFA)['data']
        task_prediction_results = results['task'].load_prediction_object(classifier=classifier, EFA=EFA)['data']

        print ('Mean, Min, Max Performance')
        survey_out = get_pred_summary(survey_prediction_results)
        task_out = get_pred_summary(task_prediction_results)
        print_result('Prediction Performance', survey_out[:-1], task_out[:-1])

        # evaluate degree of overestimation when performing insample prediction
        survey_insample = get_pred_summary(survey_prediction_results, scores='scores_insample')[-1]
        task_insample = get_pred_summary(task_prediction_results, scores='scores_insample')[-1]

        survey_exaggeration = np.mean(survey_insample/survey_out[-1])
        task_exaggeration = np.mean(task_insample/task_out[-1])
        print_result('\nInsample exaggeration Proportion', survey_exaggeration, task_exaggeration)

        survey_exaggeration = np.mean(survey_insample-survey_out[-1])
        task_exaggeration = np.mean(task_insample-task_out[-1])
        print_result('\nInsample exaggeration Absolute', survey_exaggeration, task_exaggeration)

****************************** Ontology ridge ******************************
Mean, Min, Max Performance
Prediction Performance:
survey: ['0.100', '0.036', '0.285']
task: ['0.012', '0.002', '0.042']

Insample exaggeration Proportion:
survey: 1.642
task: 3.818

Insample exaggeration Absolute:
survey: 0.040
task: 0.014
****************************** Raw ridge ******************************
Mean, Min, Max Performance
Prediction Performance:
survey: ['0.103', '0.035', '0.257']
task: ['0.011', '0.000', '0.040']

Insample exaggeration Proportion:
survey: 3.577
task: 35485.737

Insample exaggeration Absolute:
survey: 0.173
task: 0.275
****************************** Ontology svm ******************************
Mean, Min, Max Performance
Prediction Performance:
survey: ['0.086', '0.004', '0.278']
task: ['0.010', '0.000', '0.042']

Insample exaggeration Proportion:
survey: 1.775
task: 30.629

Insample exaggeration Absolute:
survey: 0.027
task: 0.010
****************************** Raw svm *********



Insample exaggeration Proportion:
survey: 1.642
task: 3.818
Insample exaggeration Absolute:
survey: 0.040
task: 0.014
