In [1]:
import os
import sys
import numpy as np
import pandas as pd
from math import isclose

sys.path.append('..')
from factor_analyzer.factor_analyzer import FactorAnalyzer

In [2]:
EXPECTED_DIR = '../tests/expected'
DATA_DIR = '../tests/data'

In [3]:
def do_analysis(data, factors, method, rotation):
    """
    Use the `FactorAnalyzer()` class to perform the factor analysis
    and return a dictionary with relevant results.
    """
    rotation = None if rotation == 'none' else rotation

    fa = FactorAnalyzer(factors)
    fa.analyze(data, method=method, rotation=rotation)

    data = fa.loadings
    data = data[data.abs().sum().sort_values(ascending=False).index.values]
    return data

def get_data_by_type(filename, factors, method, rotation, filetype='loadings'):
    """
    Get the R output data by file type (e.g. 'loadings' or 'commonalities').
    """

    subdir, _ = os.path.splitext(filename)

    new_file_name = '_'.join([filetype, method, rotation, str(factors), subdir + '.csv'])
    new_file_name = os.path.join(EXPECTED_DIR, subdir, new_file_name)

    data = pd.read_csv(new_file_name)
    data.set_index(data.columns.values[0], inplace=True)
    data.columns = ['RFactor{}'.format(num)for num in range(1, data.shape[1] + 1)]
    data = data[data.abs().sum().sort_values(ascending=False).index.values]
    del data.index.name
    return data

def check_agreement(df1, df2, precision=1e-06):

    assert df1.shape == df2.shape

    df1 = df1.abs()
    df2 = df2.abs()
    
    result = []
    for i in range(df1.shape[0]):
        for j in range(df1.shape[1]):
            result.append(isclose(df1.iloc[i, j], df2.iloc[i, j], rel_tol=precision))
    return round((sum(result) / len(result)) * 100, 7)

def do_comparison(factors=3, method='minres', rotation='none'):
    
    if rotation == 'none':
        save_rotation = 'unrotated'
    else:
        save_rotation = rotation
    
    tests = ['test{}.csv'.format(str(i).zfill(2)) for i in range(1, 11)]
    
    precisions = []
    columns = []
    sizes = None
    
    for precision in [1e-4, 1e-3]:

        data_size = []
        test_with_precision = []
        for test in tests:
            
            filename = os.path.join(DATA_DIR, test)
            data = pd.read_csv(filename)

            data_p = do_analysis(data, factors, method, rotation)
            data_r = get_data_by_type(test, factors, method, rotation)

            test_with_precision.append(check_agreement(data_p, data_r, precision))
            
            size = data.shape[0]
            data_size.append(size)
        
        columns.append((save_rotation.title(), '{0:.4f}'.format(precision)))
        precisions.append(test_with_precision)
        sizes = data_size
        
    columns = pd.MultiIndex.from_tuples(columns, names=['Rotation', 'Relative Tolerance'])


    results = pd.DataFrame(np.array(precisions).T, columns=columns)
    results['Dataset'] = ['{}'.format(i) for i in range(1, len(tests) + 1)]
    results.set_index('Dataset', inplace=True)
    results = results[list(reversed(results.columns.values))]
    return results.round(2), sizes

In [4]:
def get_comparison_to_latex():
    result = []
    for method in ['minres', 'ml']:
        for factors in [2, 3]:
            
            frames = []
            all_sizes = None
            for rotation in ['none', 'varimax', 'promax']:
            
                compare, sizes = do_comparison(factors, method, rotation)
                frames.append(compare)
                all_sizes = sizes

            method_name = {'minres': 'ULS', 'ml': 'MLE'}[method]
            combined = pd.concat(frames, axis=1)
            combined['N'] = sizes
            latex_combined = combined.to_latex(multicolumn_format='c')
            latex = ['\\begin{{table}}[H]\n\caption{{Match Rates (\%), '
                     '{} Method, {} Factors}}\n'.format(method_name, factors),
                     latex_combined,
                     '\end{table}']
            latex = ''.join(latex)
            result.append(latex)
    return result

In [5]:
for latex in get_comparison_to_latex():
    print(latex)

\begin{table}[H]
\caption{Match Rates (\%), ULS Method, 2 Factors}
\begin{tabular}{lrrrrrrr}
\toprule
Rotation & \multicolumn{2}{c}{Unrotated} & \multicolumn{2}{c}{Varimax} & \multicolumn{2}{c}{Promax} &       N \\
Relative Tolerance &    0.0010 &  0.0001 &  0.0010 &  0.0001 &  0.0010 & \multicolumn{2}{c}{0.0001} \\
Dataset &           &         &         &         &         &         &         \\
\midrule
1       &    100.00 &  100.00 &  100.00 &  100.00 &  100.00 &  100.00 &     405 \\
2       &    100.00 &  100.00 &  100.00 &  100.00 &  100.00 &  100.00 &    1678 \\
3       &    100.00 &  100.00 &  100.00 &  100.00 &  100.00 &   87.50 &     175 \\
4       &    100.00 &   85.00 &  100.00 &  100.00 &  100.00 &   95.00 &     496 \\
5       &    100.00 &   95.00 &  100.00 &  100.00 &  100.00 &  100.00 &   42000 \\
6       &    100.00 &  100.00 &  100.00 &  100.00 &  100.00 &  100.00 &     459 \\
7       &    100.00 &   97.83 &  100.00 &  100.00 &  100.00 &  100.00 &    2571 \\
8       &

In [None]:
fa = FactorAnalyzer(3)

In [None]:
data = pd.read_csv(os.path.join('../tests/data', 'test10.csv'))

In [None]:
fa.analyze(data, method='minres', rotation=None)

In [None]:
fa.loadings