In [3]:
import os
import sys
import pandas as pd

sys.path.append('..')
from factor_analyzer.factor_analyzer import FactorAnalyzer

In [4]:
EXPECTED_DIR = '../tests/expected'
DATA_DIR = '../tests/data'

In [11]:
def do_analysis(filename, factors, method, rotation):
    """
    Use the `FactorAnalyzer()` class to perform the factor analysis
    and return a dictionary with relevant results.
    """

    filename = os.path.join(DATA_DIR, filename)
    data = pd.read_csv(filename)

    rotation = None if rotation == 'none' else rotation

    fa = FactorAnalyzer(factors)
    fa.analyze(data, method=method, rotation=rotation)

    data = fa.loadings
    data = data[data.abs().sum().sort_values(ascending=False).index.values]
    return data

def get_data_by_type(filename, factors, method, rotation, filetype='loadings'):
    """
    Get the R output data by file type (e.g. 'loadings' or 'commonalities').
    """

    subdir, _ = os.path.splitext(filename)

    new_file_name = '_'.join([filetype, method, rotation, str(factors), subdir + '.csv'])
    new_file_name = os.path.join(EXPECTED_DIR, subdir, new_file_name)

    data = pd.read_csv(new_file_name)
    data.set_index(data.columns.values[0], inplace=True)
    data.columns = ['RFactor{}'.format(num)for num in range(1, data.shape[1] + 1)]
    data = data[data.abs().sum().sort_values(ascending=False).index.values]
    del data.index.name
    return data

def isclose(a, b, rel_tol=1e-06):
    """
    Check if two values are close within a certain relative tolerance.
    """
    return abs(a-b) <= rel_tol * max(abs(a), abs(b))

def check_agreement(df1, df2, precision=1e-06):

    assert df1.shape == df2.shape

    df1 = df1.abs()
    df2 = df2.abs()
    
    result = []
    for i in range(df1.shape[0]):
        for j in range(df1.shape[1]):
            result.append(isclose(df1.iloc[i, j], df2.iloc[i, j], precision))
    return round((sum(result) / len(result)) * 100, 4)

def do_comparison(factors=3, method='minres', rotation='none'):
    
    tests = ['test{}.csv'.format(str(i).zfill(2)) for i in range(1, 11)]
    
    precisions = {}
    for precision in [1e-2, 1e-3, 1e-4, 1e-5]:

        test_with_precision = []
        for test in tests:

            data_p = do_analysis(test, factors, method, rotation)
            data_r = get_data_by_type(test, factors, method, rotation)

            test_with_precision.append(check_agreement(data_p, data_r, precision))

        precisions['Precision_{0:f}'.format(precision)] = test_with_precision
    
    results = pd.DataFrame(precisions)
    results['Test'] = tests
    results.set_index('Test', inplace=True)
    results = results[list(reversed(results.columns.values))]
    return results.to_latex()

In [12]:
for factors in [2, 3]:
    for method in ['ml', 'minres']:
        for rotation in ['none', 'promax', 'varimax']:
            print(factors, method, rotation)
            print(do_comparison(factors, method, rotation))
            print('--------------------------------------')

2 ml none
\begin{tabular}{lrrrr}
\toprule
{} &  Precision\_0.010000 &  Precision\_0.001000 &  Precision\_0.000100 &  Precision\_0.000010 \\
Test       &                     &                     &                     &                     \\
\midrule
test01.csv &            100.0000 &            100.0000 &             98.7500 &             86.2500 \\
test02.csv &            100.0000 &            100.0000 &            100.0000 &             95.0000 \\
test03.csv &            100.0000 &            100.0000 &             91.6667 &             87.5000 \\
test04.csv &            100.0000 &            100.0000 &            100.0000 &            100.0000 \\
test05.csv &            100.0000 &            100.0000 &            100.0000 &             95.0000 \\
test06.csv &            100.0000 &            100.0000 &             95.4545 &             48.8636 \\
test07.csv &            100.0000 &            100.0000 &             97.8261 &             86.9565 \\
test08.csv &            100.0000 & 

\begin{tabular}{lrrrr}
\toprule
{} &  Precision\_0.010000 &  Precision\_0.001000 &  Precision\_0.000100 &  Precision\_0.000010 \\
Test       &                     &                     &                     &                     \\
\midrule
test01.csv &               100.0 &            100.0000 &            100.0000 &             91.6667 \\
test02.csv &               100.0 &             93.3333 &             73.3333 &             30.0000 \\
test03.csv &               100.0 &             97.2222 &             88.8889 &             36.1111 \\
test04.csv &               100.0 &             96.6667 &             96.6667 &             80.0000 \\
test05.csv &               100.0 &            100.0000 &            100.0000 &            100.0000 \\
test06.csv &               100.0 &             99.2424 &             91.6667 &             55.3030 \\
test07.csv &               100.0 &             98.5507 &             91.3043 &             60.8696 \\
test08.csv &               100.0 &           