In [1]:
import os
import math
import numpy as np
import pandas as pd

from factor_analyzer import (FactorAnalyzer,
                             calculate_bartlett_sphericity,
                             calculate_kaiser_meyer_olkin)

In [None]:
expected_dir = '../expected'
data_dir = '../data'

In [None]:
def do_analysis(top_dir, test_name, factors, method, rotation):
    """
    Use the `FactorAnalyzer()` class to perform the factor analysis
    and return a dictionary with relevant results for given scenario.
    """
    
    filename = os.path.join(top_dir, test_name + '.csv')
    data = pd.read_csv(filename)

    rotation = None if rotation == 'none' else rotation
    method = {'uls': 'minres'}.get(method, method)

    fa = FactorAnalyzer()
    fa.analyze(data, factors, method=method, rotation=rotation)
    
    evalues, values = fa.get_eigenvalues()

    return {'value': values,
            'evalues': evalues,
            'loading': fa.loadings,
            'uniquenesses': fa.get_uniqueness(),
            'communalities': fa.get_communalities()}

In [None]:
def get_r_output(top_dir, test_name, factors, method, rotation):
    """
    Get the R output for the given scenario.
    """
    output = {}
    for output_type in ['value',
                        'evalues',
                        'loading',
                        'uniquenesses',
                        'communalities']:
        
        filename = '{}_{}_{}_{}_{}.csv'.format(output_type,
                                               method,
                                               rotation,
                                               factors,
                                               test_name)

        filename = os.path.join(top_dir, test_name, filename)
        
        data = pd.read_csv(filename)
        output[output_type] = data

    return output

In [None]:
def normalize(data):
    """
    """
    # check for possible index column
    possible_index = [col for col in data.columns if 'Unnamed' in col]
    
    # get numeric columns
    numeric_cols = [col for col in data.dtypes[data.dtypes != 'object'].index.values
                    if col not in possible_index]
    
    # take absolute value
    data[numeric_cols] = data[numeric_cols].abs()

    # set index, if 
    if len(possible_index) == 1:
        data.set_index(possible_index[0], inplace=True)
    
    # sort the values
    data = data[data.abs().sum().sort_values(ascending=False).index.values]

    # update index name and column names
    data.index.name = ''
    data.columns = ['col{}'.format(i) for i in range(1, data.shape[1] + 1)]
    return data.reset_index(drop=True)

In [None]:
def check_close(data1, data2):
    """
    Check to make sure all values are close.
    """
    data1 = normalize(data1)
    data2 = normalize(data2)
    
    assert data1.shape == data2.shape
    
    arr = np.empty(shape=data1.shape, dtype=bool)
    for i in range(data1.shape[0]):
        for j in range(data2.shape[1]):
            check = math.isclose(data1.iloc[i, j],
                                 data2.iloc[i, j],
                                 rel_tol=0,
                                 abs_tol=0.1)
            arr[i, j] = check
    return arr.sum(None) / arr.size

In [None]:
def check_all(data_dir,
              expected_dir,
              test_name,
              factors,
              method,
              rotation):
    """
    Check all results for given scenario
    """
    results1 = get_r_output(expected_dir, test_name, factors, method, rotation)
    results2 = do_analysis(data_dir, test_name, factors, method, rotation)
    
    for output_type in ['value',
                        'evalues',
                        'loading',
                        'uniquenesses',
                        'communalities']:

        data1 = results1[output_type]
        data2 = results2[output_type]

        yield check_close(data1, data2)

In [None]:
def view_output(data_dir,
                expected_dir,
                test_name,
                factors,
                method,
                rotation):

    results1 = get_r_output(expected_dir, test_name, factors, method, rotation)
    results2 = do_analysis(data_dir, test_name, factors, method, rotation)
    
    for output_type in ['value',
                        'evalues',
                        'loading',
                        'uniquenesses',
                        'communalities']:

        data1 = results1[output_type]
        data2 = results2[output_type]

        data1 = normalize(data1)
        data2 = normalize(data2)
        
        print(output_type)
        print(data1)
        print(data2)

In [None]:
view_output(data_dir,
          expected_dir,
         'test01',
          2,
         'uls',
         'none')