## Model

The table shows main model parameters for each experiment: the total number of features used in the model (linear models only), the number of features with negative coefficients (linear models only), the learner, and the label used to train the model.  For linear models, the second table shows standardized coefficients for all features. 

In [None]:
def summarize_models(model_list, file_format_summarize):
    
    writer = DataWriter(summary_id)
    
    summs = []
    betas = []
    for (model_id, config, csvdir, file_format) in model_list:
        coef_file = join(csvdir, '{}_betas.{}'.format(model_id, file_format))
        if exists(coef_file):
            df_coefs = DataReader.read_from_file(coef_file)
            model_summary = pd.DataFrame({'N features': [len(df_coefs)],
                                          'N negative': len(df_coefs[df_coefs['standardized'] < 0]),
                                          'learner': config['model'],
                                          'train_label': config['train_label_column']},
                                         index=[model_id])
            summs.append(model_summary)
            df_betas = pd.DataFrame({model_id : df_coefs['standardized'].values},
                                     index = df_coefs['feature'].values)
            betas.append(df_betas)
        else:
            if 'model' in config:
                model_summary = pd.DataFrame({'N features': '-',
                                              'N negative': '-',
                                              'learner': config['model'],
                                              'train_label': config['train_label_column']},
                                            index=[model_id])
                summs.append(model_summary)
   
    if not len(summs) == 0:
        df_summ = pd.concat(summs, sort=True)
        display(Markdown("## Model summary"))
        display(HTML(df_summ[['N features', 'N negative',
                              'learner', 'train_label']].to_html(index=True, 
                                                                 classes = ['sortable'],
                                                                 escape=False,
                                                                 float_format=int_or_float_format_func)))

        writer.write_experiment_output(output_dir,
                                       {'model_summary': df_summ},
                                       index=True,
                                       file_format=file_format_summarize)
        
    if not len(betas) == 0:
        df_betas_all = pd.concat(betas, axis=1, sort=True)
        df_betas_all.fillna('-', inplace=True)
        display(Markdown("## Standardized coefficients"))
        display(HTML(df_betas_all.to_html(index=True, 
                                          classes = ['sortable'],
                                          escape=False,
                                          float_format=int_or_float_format_func)))

        writer.write_experiment_output(output_dir,
                                       {'betas': df_betas_all},
                                       index=True,
                                       file_format=file_format_summarize)

summarize_models(model_list, file_format_summarize)

In [None]:
def summarize_model_fit(file_format_summarize):
    fits = []
    for (model_id, config, csvdir, file_format) in model_list:
        model_fit_file = join(csvdir, '{}_model_fit.{}'.format(model_id, file_format))
        if exists(model_fit_file):
            fit = DataReader.read_from_file(model_fit_file)
            fit.index = [model_id]
            fits.append(fit)
    if len(fits)>0:
        df_fit = pd.concat(fits, sort=True)
        display(Markdown("## Model fit"))
        display(HTML(df_fit[['N responses', 'N features',
                             'R2','R2_adjusted']].to_html(index=True,
                                                          classes=['sortable'],
                                                          escape=False,
                                                          float_format=int_or_float_format_func)))
    
        writer = DataWriter(summary_id)
        writer.write_experiment_output(output_dir,
                                       {'model_fit': df_fit},
                                       index=True,
                                       file_format=file_format_summarize)

    
summarize_model_fit(file_format_summarize)