In [None]:
# Imports
from scipy.stats import chi2_contingency
from scipy.stats import chi2

from itertools import combinations

import pandas as pd


def is_dependent(df, attr1, attr2, significance=0.05):
    # Returns True if attr1 and attr2 in a specificied
    # dataframe are considered dependent using the Chi^2 test
    
    observation = create_observation_table(df, attr1, attr2)
    chi, pval, dof, exp = chi2_contingency(observation)
    
    p = 1 - significance
    
    critical_value = chi2.ppf(p, dof)
    
    return (chi > critical_value)


def create_observation_table(df, attr1, attr2):
    # Creates the observation table for two attributes
    # in a specified dataframe
    
    # Get unique values for attributes
    index = df[attr1].unique()
    cols = df[attr2].unique()

    # Sort elements in cols/index
    [arr.sort() for arr in [index, cols]]
    
    # Create empty table
    observation = pd.DataFrame([], index=index, columns=cols)
    
    # Insert data
    for idx, val in df.groupby([attr1, attr2]).size().items():
        row, col = idx
        observation[col].loc[row] = val
        
    observation.fillna(0, inplace=True)
        
    return observation


if __name__ == '__main__':
    # Read from data source
    DATA_SOURCE = r'../data/credit_output.csv'

    df = pd.read_csv(DATA_SOURCE)

    # List of all nominal attributes
    nominal_attributes = [
        'personal_status', 
        'own_telephone', 
        'job', 
        'other_parties', 
        'works_outside_US', 
        'purpose', 
        'foreign_worker', 
        'location', 
        'property_magnitude', 
        'housing', 
        'employment', 
        'state',
        'other_payment_plans', 
        'class', 
        'credit_history'
    ]

    
    

    
    # Iterate through combinations, determine dependence
    for c in combinations(nominal_attributes, 2):
        is_corr = is_dependent(df, *c)
        if is_corr:
            print(f'{str(c[0]) + " & " + str(c[1]):<60}: {is_corr}')
    

personal_status & own_telephone                             : True
personal_status & property_magnitude                        : True
personal_status & housing                                   : True
personal_status & employment                                : True
personal_status & credit_history                            : True
own_telephone & job                                         : True
own_telephone & property_magnitude                          : True
own_telephone & housing                                     : True
own_telephone & employment                                  : True
