Skip to content

Commit

Permalink
Merge 59ace0a into ccd7121
Browse files Browse the repository at this point in the history
  • Loading branch information
jbiggsets committed Dec 11, 2018
2 parents ccd7121 + 59ace0a commit 10937f6
Show file tree
Hide file tree
Showing 17 changed files with 118 additions and 93 deletions.
10 changes: 5 additions & 5 deletions conda_requirements.txt
@@ -1,14 +1,14 @@
python>=3.6.3,<3.7
ipython=6.3.1
ipython=6.5.0
jupyter=1.0.0
joblib=0.11
matplotlib=2.1.2
nose=1.3.7
notebook=5.4.1
notebook=5.7.2
numpy=1.13.3
pandas=0.22.0
pandas=0.23.4
scikit-learn=0.19.1
scipy=1.0.0
scipy=1.1.0
seaborn=0.9.0
skll=1.5.2
statsmodels=0.9.0
Expand All @@ -22,4 +22,4 @@ sphinx
sphinx_rtd_theme
coverage
parameterized
setuptools
setuptools
10 changes: 5 additions & 5 deletions requirements.txt
@@ -1,13 +1,13 @@
ipython==6.3.1
ipython==6.5.0
jupyter==1.0.0
joblib==0.11
matplotlib==2.1.2
nose==1.3.7
notebook==5.4.1
notebook==5.7.2
numpy==1.13.3
pandas==0.22.0
pandas==0.23.4
scikit-learn==0.19.1
scipy==1.0.0
scipy==1.1.0
seaborn==0.9.0
skll==1.5.2
statsmodels==0.9.0
Expand All @@ -18,4 +18,4 @@ sphinx
sphinx_rtd_theme
coverage
parameterized
setuptools
setuptools
19 changes: 10 additions & 9 deletions rsmtool/analyzer.py
Expand Up @@ -310,9 +310,8 @@ def compute_basic_descriptives(df, selected_features):
'N': len(df_desc)})

# reorder the columns to make it look better
df_output = df_output[['mean', 'std. dev.', 'min', 'max',
'skewness', 'kurtosis', 'Correlation',
'p', 'N']]
df_output = df_output[['mean', 'std. dev.', 'min', 'max', 'skewness',
'kurtosis', 'Correlation', 'p', 'N']]

return df_output

Expand Down Expand Up @@ -599,7 +598,7 @@ def metrics_helper(human_scores,
Reference standard deviation for system scores. This is used to compute SMD and
should be the standard deviation for the whole population when SMD are computed
for individual subgroups.
When None, this will be computed as the standard devaiation of `system_scores`.
When None, this will be computed as the standard deviation of `system_scores`.
Returns
-------
Expand Down Expand Up @@ -679,7 +678,6 @@ def metrics_helper(human_scores,
rmse = np.sqrt(mse)

# return everything as a series

metrics = pd.Series({'kappa': unweighted_kappa,
'wtkappa': quadratic_weighted_kappa,
'exact_agr': human_system_agreement,
Expand Down Expand Up @@ -715,7 +713,8 @@ def compute_disattenuated_correlations(human_machine_corr,
Series containing of pearson's correlation coefficients human-machine correlations
human_human_corr : pandas Series
Series containing of pearson's correlation coefficients for human-human correlations.
This can contain a single value or have the index matching that of human-machine correlations
This can contain a single value or have the index matching that of human-machine
correlations
Returns
-------
Expand All @@ -733,14 +732,16 @@ def compute_disattenuated_correlations(human_machine_corr,
# we now concatenate the two series on index
df_correlations = pd.concat([human_machine_corr, human_human_corr],
axis=1,
sort=True,
keys=['corr_HM', 'corr_HH'])

# if any of the HH correlations are negative, we will ignore these
# and treat them as Nones
with np.errstate(invalid='ignore'):
df_correlations['sqrt_HH'] = np.sqrt(df_correlations['corr_HH'])

df_correlations['corr_disattenuated'] = df_correlations['corr_HM'] / df_correlations['sqrt_HH']
df_correlations['corr_disattenuated'] = (df_correlations['corr_HM'] /
df_correlations['sqrt_HH'])

return df_correlations

Expand Down Expand Up @@ -791,7 +792,7 @@ def compute_correlations_by_group(self,
df_desc_all[grouping_variable] = 'All data'

# combine the two data frames
df_desc_combined = pd.concat([df_desc, df_desc_all])
df_desc_combined = pd.concat([df_desc, df_desc_all], sort=True)
df_desc_combined.reset_index(drop=True, inplace=True)

# compute the various (marginal and partial) correlations with score
Expand Down Expand Up @@ -1085,7 +1086,7 @@ def compute_metrics_by_group(self,
df_preds_all[grouping_variable] = 'All data'

# combine the two data frames
df_preds_combined = pd.concat([df_test, df_preds_all])
df_preds_combined = pd.concat([df_test, df_preds_all], sort=True)
df_preds_combined.reset_index(drop=True, inplace=True)

# group by the grouping_variable columns
Expand Down
4 changes: 2 additions & 2 deletions rsmtool/comparer.py
Expand Up @@ -103,7 +103,7 @@ def make_summary_stat_df(df):
warnings.simplefilter('ignore')
series.append(df.apply(summary_func))

res = pd.concat(series, axis=1)
res = pd.concat(series, axis=1, sort=True)
res.columns = ['MEAN', 'SD', 'MEDIAN', 'MIN', 'MAX']
return res

Expand Down Expand Up @@ -197,7 +197,7 @@ def compute_correlations_between_versions(df_old,
df_merged['{}%%%old'.format(feature)])[0]})
correlation_list.append(df_cor)

df_correlations = pd.concat(correlation_list)
df_correlations = pd.concat(correlation_list, sort=True)
df_correlations.index = df_correlations['Feature']
df_correlations.index.name = None

Expand Down
4 changes: 2 additions & 2 deletions rsmtool/modeler.py
Expand Up @@ -169,7 +169,7 @@ def ols_coefficients_to_dataframe(coefs):
'coefficient': coefs['const']}])

# append the non-intercept frame to the intercept one
df_coef = df_intercept.append(df_non_intercept, ignore_index=True)
df_coef = df_intercept.append(df_non_intercept, sort=True, ignore_index=True)

# we always want to have the feature column first
df_coef = df_coef[['feature', 'coefficient']]
Expand Down Expand Up @@ -216,7 +216,7 @@ def skll_learner_params_to_dataframe(learner):
'coefficient': intercept}])

# append the non-intercept frame to the intercept one
df_coef = df_intercept.append(df_non_intercept, ignore_index=True)
df_coef = df_intercept.append(df_non_intercept, sort=True, ignore_index=True)

# we always want to have the feature column first
df_coef = df_coef[['feature', 'coefficient']]
Expand Down
8 changes: 4 additions & 4 deletions rsmtool/notebooks/comparison/consistency.ipynb
Expand Up @@ -10,7 +10,7 @@
"source": [
"def create_degradation_df(df_degradation, df_eval, df_consistency):\n",
" df_eval_for_degradation = df_eval[df_degradation.columns].copy()\n",
" df_consistency_for_degradation = pd.concat([df_consistency]*len(df_eval))\n",
" df_consistency_for_degradation = pd.concat([df_consistency]*len(df_eval), sort=True)\n",
" df_consistency_for_degradation = df_consistency_for_degradation[df_degradation.columns].copy()\n",
" df_consistency_for_degradation.index = df_eval_for_degradation.index\n",
"\n",
Expand All @@ -21,7 +21,7 @@
" df = pd.concat(\n",
" [df_consistency_for_degradation,\n",
" df_eval_for_degradation,\n",
" df_degradation])\n",
" df_degradation], sort=True)\n",
" return df\n",
"\n",
"\n",
Expand Down Expand Up @@ -84,7 +84,7 @@
" df_degradation_new['version'] = 'new'\n",
" \n",
" \n",
" df = pd.concat([df_degradation_old, df_degradation_new])\n",
" df = pd.concat([df_degradation_old, df_degradation_new], sort=True)\n",
" df = df[['type', 'version', 'corr', 'kappa',\n",
" 'wtkappa', 'exact_agr', 'adj_agr', 'SMD']]\n",
"\n",
Expand Down Expand Up @@ -137,7 +137,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion rsmtool/notebooks/comparison/feature_descriptives.ipynb
Expand Up @@ -125,7 +125,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions rsmtool/notebooks/comparison/header.ipynb
Expand Up @@ -334,8 +334,8 @@
" df_diff['version'] = 'change'\n",
"\n",
" tmp_df = pd.DataFrame(df_old, copy=True)\n",
" tmp_df = tmp_df.append(df_new)\n",
" tmp_df = tmp_df.append(df_diff)\n",
" tmp_df = tmp_df.append(df_new, sort=True)\n",
" tmp_df = tmp_df.append(df_diff, sort=True)\n",
" tmp_df.index.name = 'for' \n",
" tmp_df = tmp_df.reset_index().sort_values(by=['for', 'version'], ascending=[True, False]).set_index(tmp_df.index.names)\n",
" tmp_df.index.name = None\n",
Expand Down
8 changes: 5 additions & 3 deletions rsmtool/notebooks/consistency.ipynb
Expand Up @@ -62,15 +62,17 @@
" markdown_strs.append(' - `wtkappa` < -0.1')\n",
" display(Markdown('\\n'.join(markdown_strs)))\n",
" df_eval_for_degradation = df_eval[df_degradation.columns].copy()\n",
" df_consistency_for_degradation = pd.concat([df_consistency]*len(df_eval))\n",
" df_consistency_for_degradation = pd.concat([df_consistency]*len(df_eval), sort=True)\n",
" df_consistency_for_degradation = df_consistency_for_degradation[df_degradation.columns].copy()\n",
" df_consistency_for_degradation.index = df_eval_for_degradation.index\n",
"\n",
" df_consistency_for_degradation['type'] = 'H-H'\n",
" df_eval_for_degradation['type'] = 'H-M'\n",
" df_degradation['type'] = 'diff'\n",
"\n",
" df = pd.concat([df_consistency_for_degradation, df_eval_for_degradation, df_degradation])\n",
" df = pd.concat([df_consistency_for_degradation,\n",
" df_eval_for_degradation,\n",
" df_degradation], sort=True)\n",
" df = df[['type','corr', 'kappa', 'wtkappa', 'exact_agr', 'adj_agr', 'SMD']]\n",
" df = df.reset_index()\n",
" df = df.set_index(['index', 'type']).sort_index(level='index')\n",
Expand Down Expand Up @@ -127,7 +129,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion rsmtool/notebooks/features_by_group.ipynb
Expand Up @@ -35,7 +35,7 @@
" df_train_feats_all = df_train_merged.copy()\n",
" df_train_feats_all[group] = 'All data'\n",
"\n",
" df_train_combined = pd.concat([df_train_feats, df_train_feats_all])\n",
" df_train_combined = pd.concat([df_train_feats, df_train_feats_all], sort=True)\n",
" df_train_combined.reset_index(drop=True, inplace=True)\n",
"\n",
" # decide on the the height per plot\n",
Expand Down
4 changes: 2 additions & 2 deletions rsmtool/notebooks/summary/evaluation.ipynb
Expand Up @@ -44,7 +44,7 @@
" df_eval.columns = new_column_names\n",
" evals.append(df_eval) \n",
" if len(evals) > 0:\n",
" df_evals = pd.concat(evals)\n",
" df_evals = pd.concat(evals, sort=True)\n",
" else:\n",
" df_evals = pd.DataFrame()\n",
" return(df_evals)\n",
Expand Down Expand Up @@ -133,7 +133,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down
8 changes: 4 additions & 4 deletions rsmtool/notebooks/summary/model.ipynb
Expand Up @@ -49,7 +49,7 @@
" summs.append(model_summary)\n",
" \n",
" if not len(summs) == 0:\n",
" df_summ = pd.concat(summs)\n",
" df_summ = pd.concat(summs, sort=True)\n",
" display(Markdown(\"## Model summary\"))\n",
" display(HTML(df_summ[['N features', 'N negative',\n",
" 'learner', 'train_label']].to_html(index=True, \n",
Expand All @@ -63,7 +63,7 @@
" file_format=file_format_summarize)\n",
" \n",
" if not len(betas) == 0:\n",
" df_betas_all = pd.concat(betas, axis=1)\n",
" df_betas_all = pd.concat(betas, axis=1, sort=True)\n",
" df_betas_all.fillna('-', inplace=True)\n",
" display(Markdown(\"## Standardized coefficients\"))\n",
" display(HTML(df_betas_all.to_html(index=True, \n",
Expand Down Expand Up @@ -96,7 +96,7 @@
" fit.index = [model_id]\n",
" fits.append(fit)\n",
" if len(fits)>0:\n",
" df_fit = pd.concat(fits)\n",
" df_fit = pd.concat(fits, sort=True)\n",
" display(Markdown(\"## Model fit\"))\n",
" display(HTML(df_fit[['N responses', 'N features',\n",
" 'R2','R2_adjusted']].to_html(index=True,\n",
Expand Down Expand Up @@ -132,7 +132,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions rsmtool/notebooks/summary/preprocessed_features.ipynb
Expand Up @@ -24,7 +24,7 @@
" model_corrs.index = [model_id]\n",
" corrs.append(model_corrs)\n",
" if not len(corrs) == 0:\n",
" df_summ = pd.concat(corrs)\n",
" df_summ = pd.concat(corrs, sort=True)\n",
" display(header)\n",
" display(HTML(df_summ.to_html(index=True, classes = ['sortable'],\n",
" escape=False,\n",
Expand Down Expand Up @@ -109,7 +109,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 10937f6

Please sign in to comment.