In [21]:
import pandas as pd
from statsmodels.stats.contingency_tables import cochrans_q
import numpy as np

In [10]:
tasks = {
    'binary': ['any', 0, 1], 
    #'type': ['any', 0, 1, 2, 3]
}
representations = ['bow', 'freq', 'tfidf']
models = ['decision-tree', 'svm', 'naive-bayes', 'naive-bayes-multinomial']

In [11]:
#find the mode of a numpy array
def mode(array):
    freq = np.bincount(array)
    return np.argmax(freq)

In [12]:
#combine columns in a dataframe whose names contain certain substrings
def combine_columns(df, substrings):
    '''
    df: the input dataframe that has many column names that contain certain common substrings
    substrings: the list of substrings used for combination
    
    returns: `output_df`, a DataFrame with `substrings` as its columns. Each row is the mode
    of the value for that row of all of the columns in `df` that contained that substring.
    '''
    output_df = pd.DataFrame()
    
    for column_substring in substrings:
        temp_df = pd.DataFrame()
        to_be_combined = [column for column in df.columns.values if column_substring in column]
        
        for column in to_be_combined:
            temp_df[column] = df[column]
            
        array = np.array(temp_df).astype(int)
        array = [mode(row) for row in array]
            
        output_df[column_substring] = array
    
    return output_df

In [22]:
#find which models performed best using basic accuracy score
def evaluate_models(df, substrings):
    output_df = pd.DataFrame()
    temp_df = combine_columns(df, substrings)
    for column in substrings:
        temp_df[column] = temp_df[column] == df['Actual']
        output_df[column] = [sum(temp_df[column])/len(temp_df),]
    output_df.index = ['Score']
    return output_df

#test model significance difference w/ Friedman Chi^2 while combining certain columns
def test_model_difference(df, substrings):
    df = combine_columns(df, substrings)
    columns = [df[substring] for substring in substrings]
    friedman_result = cochrans_q(*columns)
    return friedman_result

In [23]:
#find specific rankings for each model
def overall_evaluation(task, classification):
    df = pd.read_csv(f'{task}_predictions.csv')
    if classification != 'any':
        df = df[df['Actual'] == classification]
        df = df.reset_index()
    
    temp_df = evaluate_models(df, df.columns.values).transpose()
    temp_df = temp_df.sort_values('Score', ascending=False)
    friedman_result = test_model_difference(df, df.columns.values[1:])
    print(f'Task: {task}.')
    print(f'Friedman chi square test p-value: {friedman_result.pvalue}')
    print(temp_df)
    print()

In [24]:
#evaluate algorithms and representations, find significance level
def algorithm_and_rep_eval(task, classification):
    df = pd.read_csv(f'{task}_predictions.csv')
    if classification != 'any':
        df = df[df['Actual'] == classification]
        df = df.reset_index()
    for grouping in [representations, models]:
        temp_df = evaluate_models(df, grouping).transpose()
        temp_df = temp_df.sort_values('Score', ascending=False)
        friedman_result = test_model_difference(df, grouping)
        temp_df.columns = ['Score']
        
        print(f'Task: {task}. Grouping: {grouping}.')
        print(f'Friedman chi square test p-value: {friedman_result.pvalue}')
        print(temp_df)
        print(f'Mean: {temp_df["Score"].mean()}')
        print()

In [25]:
for task in tasks:
    print(f'\nTask: {task.title()}\n')
    for classification in tasks[task]:
        print(f'Classification: {classification}')
        overall_evaluation(task, classification) #find the rankings for all models
        algorithm_and_rep_eval(task, classification) #group representations and algorithms together, evaluate


Task: Binary

Classification: any


AttributeError: module 'statsmodels.stats' has no attribute 'contingency_tables'