In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import FactorAnalysis
from sklearn.preprocessing import StandardScaler



In [16]:
#!pip install factor_analyzer



In [17]:
from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
from factor_analyzer.factor_analyzer import calculate_kmo

In [20]:
from factor_analyzer import FactorAnalyzer
df_features = pd.read_excel('beer_rnd.xlsx')
fa = FactorAnalyzer(rotation=None)
fa.fit(df_features)
FactorAnalyzer(bounds=(0.005, 1), impute='median', is_corr_matrix=False,
        method='minres', n_factors=3, rotation=None, rotation_kwargs={},
        use_smc=True)
fa.loadings_



array([[ 0.52495245,  0.77110983,  0.01550465],
       [ 0.25386052,  0.94335272, -0.03092207],
       [ 0.6173724 ,  0.72510909, -0.01353723],
       [-0.63567841,  0.12368894,  0.21350641],
       [ 0.89226793, -0.21680669,  0.12283438],
       [ 0.75631755, -0.51665806,  0.09321009],
       [ 0.79393729, -0.53264044, -0.05042948],
       [ 0.02622928,  0.21146813,  0.34257097],
       [ 0.03055222, -0.0602808 ,  0.21774456],
       [ 0.08414565,  0.03080317, -0.15656457],
       [ 0.22344494, -0.06521564, -0.00777709],
       [ 0.00961745, -0.00906637,  0.40934774],
       [-0.03729326, -0.01041327, -0.0507358 ],
       [-0.03892195, -0.04502418,  0.07258424]])

In [None]:
"""

1. Chose rotation options
varimax (orthogonal rotation)
promax (oblique rotation)
oblimin (oblique rotation)
oblimax (orthogonal rotation)
quartimin (oblique rotation)
quartimax (orthogonal rotation)
equamax (orthogonal rotation)

2. method ({'minres', 'ml', 'principal'},

3. impute ({'drop', 'mean', 'median'}, optional)
"""

In [21]:
fa.get_communalities()

array([0.87042584, 0.9553157 , 0.90711512, 0.46497098, 0.85823548,
       0.84763991, 0.9165854 , 0.16276162, 0.05197991, 0.03254179,
       0.0542412 , 0.16774027, 0.00407335, 0.00881057])

In [None]:
"""
get_communalities()
Calculate the communalities, given the factor loading matrix.
"""

In [22]:
fa.get_eigenvalues()


(array([3.38655702, 2.79466471, 1.26759646, 1.18217245, 1.12968654,
        0.99271966, 0.88386983, 0.81545409, 0.66464548, 0.51059022,
        0.17321278, 0.11239238, 0.07083513, 0.01560324]),
 array([ 3.18537829e+00,  2.67902609e+00,  4.38032732e-01,  2.98123826e-01,
         1.91641746e-01,  1.50999069e-01,  1.09904304e-01,  2.61913170e-03,
        -1.01774347e-02, -6.45267451e-02, -9.74272954e-02, -1.37550290e-01,
        -2.12646857e-01, -2.30959439e-01]))

In [None]:
"""
Calculate the eigenvalues, given the factor correlation matrix.

"""

In [12]:
fa.get_factor_variance()



(array([3.18537843, 2.67902567, 0.43803303]),
 array([0.22752703, 0.19135898, 0.03128807]),
 array([0.22752703, 0.41888601, 0.45017408]))

In [None]:
"""
Calculate the factor variance information, including variance, proportional variance and cumulative variance for each factor
variance (numpy array) – The factor variances.
proportional_variance (numpy array) – The proportional factor variances.
cumulative_variances (numpy array) – The cumulative factor variances.
"""

In [23]:
fa.get_uniquenesses()


array([0.12957416, 0.0446843 , 0.09288488, 0.53502902, 0.14176452,
       0.15236009, 0.0834146 , 0.83723838, 0.94802009, 0.96745821,
       0.9457588 , 0.83225973, 0.99592665, 0.99118943])

In [None]:
"""
Calculate the uniquenesses, given the factor loading matrix.
"""

In [24]:
fa.transform(df_features)

array([[-1.76044995,  0.26357822,  1.06544013],
       [ 1.1243285 ,  1.99709873,  0.91948358],
       [ 1.00616831,  0.39432251, -0.02344607],
       [ 0.62481328, -1.31133006,  0.45989287],
       [-1.22542965,  0.21144108, -0.25554909],
       [-0.32259625,  0.87981674, -0.59766014],
       [-0.06229376, -0.60563571, -0.51077425],
       [-0.26043261, -0.87911958, -0.33220267],
       [ 0.98294132, -0.90702132,  0.19242692],
       [-1.63590423,  0.23705871,  0.14110019],
       [ 1.20252947,  1.94332049, -0.93690622],
       [ 1.01775227,  0.4319837 ,  0.30315218],
       [ 0.68983987, -1.31452842,  0.85574364],
       [-1.29471273,  0.20206078,  0.64842327],
       [-0.36196266,  0.8585262 , -1.58850211],
       [-0.17556007, -0.60350496, -0.24268199],
       [-0.32620053, -0.89485289, -1.03884892],
       [ 1.26597045, -0.88115393, -0.39381238],
       [-1.64999008,  0.31400521,  0.86418769],
       [ 1.0082889 ,  1.93355553,  0.22683084],
       [ 1.0054717 ,  0.45180683,  0.282

In [None]:
"""
transform(X)
Get the factor scores for new data set.
"""

In [25]:
from factor_analyzer import FactorAnalyzer, Rotator
df_features = pd.read_excel('beer_rnd.xlsx')
fa = FactorAnalyzer(rotation=None)
fa.fit(df_features)
rotator = Rotator()
rotator.fit_transform(fa.loadings_)

array([[ 0.13331485,  0.9233854 , -0.00351956],
       [-0.1850847 ,  0.95809621, -0.05577641],
       [ 0.23699164,  0.92195168, -0.03090593],
       [-0.62881226, -0.16293613,  0.20740765],
       [ 0.89491453,  0.19957174,  0.13241825],
       [ 0.90471191, -0.13011561,  0.11048165],
       [ 0.9477878 , -0.1312512 , -0.03250851],
       [-0.07456213,  0.20930711,  0.33673824],
       [ 0.05045812, -0.03576305,  0.21944222],
       [ 0.06457001,  0.06101124, -0.15700361],
       [ 0.22950447,  0.03928456, -0.00506219],
       [ 0.0061692 ,  0.00543984,  0.40947847],
       [-0.0281452 , -0.02687877, -0.05058383],
       [-0.01637362, -0.05586331,  0.07363262]])

In [None]:
"""
The Rotator class takes an (unrotated) factor loading matrix and performs one of several rotations.
"""

In [28]:
df_features = pd.read_excel('beer_rnd.xlsx')
fa = FactorAnalyzer(rotation=None)
fa.fit(df_features)
rotator = Rotator()
rotator.fit_transform(fa.loadings_)
#Computes the factor rotation, and returns the new loading matrix.

array([[ 0.13331485,  0.9233854 , -0.00351956],
       [-0.1850847 ,  0.95809621, -0.05577641],
       [ 0.23699164,  0.92195168, -0.03090593],
       [-0.62881226, -0.16293613,  0.20740765],
       [ 0.89491453,  0.19957174,  0.13241825],
       [ 0.90471191, -0.13011561,  0.11048165],
       [ 0.9477878 , -0.1312512 , -0.03250851],
       [-0.07456213,  0.20930711,  0.33673824],
       [ 0.05045812, -0.03576305,  0.21944222],
       [ 0.06457001,  0.06101124, -0.15700361],
       [ 0.22950447,  0.03928456, -0.00506219],
       [ 0.0061692 ,  0.00543984,  0.40947847],
       [-0.0281452 , -0.02687877, -0.05058383],
       [-0.01637362, -0.05586331,  0.07363262]])

![image.png](attachment:image.png)
![image-2.png](attachment:image-2.png)

In [45]:
import pandas as pd
from factor_analyzer import (ConfirmatoryFactorAnalyzer, ModelSpecificationParser)
X = pd.read_excel('beer_rnd.xlsx')
model_dict = {"F1": ["V1", "V2", "V3", "V4", "V5", "V6", "V7"],"F2": ["V8", "V9", "V10", "V11", "V12", "V13", "V14"]}
model_spec = ModelSpecificationParser.parse_model_specification_from_dict(X, model_dict)
cfa = ConfirmatoryFactorAnalyzer(model_spec, disp=False)
cfa.fit(X.values)
cfa.loadings_



array([[ 5.07459918,  0.        ],
       [ 2.90177852,  0.        ],
       [ 2.18542547,  0.        ],
       [-0.54352112,  0.        ],
       [ 0.82261688,  0.        ],
       [-0.10012378,  0.        ],
       [ 0.11760903,  0.        ],
       [ 0.        ,  2.30008734],
       [ 0.        ,  2.96039761],
       [ 0.        ,  1.24508137],
       [ 0.        ,  2.06840672],
       [ 0.        ,  1.79001419],
       [ 0.        ,  2.22072389],
       [ 0.        ,  1.85773298]])

In [46]:
cfa.factor_varcovs_

array([[ 1.        , -0.36341742],
       [-0.36341742,  1.        ]])

In [47]:
cfa.get_standard_errors()

(array([[0.37658981, 0.        ],
        [0.2695423 , 0.        ],
        [0.22151428, 0.        ],
        [0.24008408, 0.        ],
        [0.23247835, 0.        ],
        [0.22913306, 0.        ],
        [0.22072388, 0.        ],
        [0.        , 0.31444472],
        [0.        , 0.33810126],
        [0.        , 0.27704203],
        [0.        , 0.303782  ],
        [0.        , 0.29013916],
        [0.        , 0.31444358],
        [0.        , 0.30104371]]),
 array([0.94191747, 0.4929516 , 0.37251548, 0.78063285, 0.7033508 ,
        0.72970785, 0.676803  , 0.98310212, 1.0703759 , 0.85603125,
        0.93929388, 0.88152652, 0.99469217, 0.94897406]))

In [48]:
cfa.transform(X.values)

array([[ -3.50016841,   3.47278739],
       [ 14.27485057,   7.07812875],
       [  7.10438377,   8.07543999],
       [ -5.3945639 ,   2.28864365],
       [ -3.4829502 , -10.2000735 ],
       [ -0.30767802,  10.53402561],
       [ -4.49659084,   2.13979382],
       [ -1.78446017,  -5.5406897 ],
       [ -2.41282281,  -0.63516034],
       [ -3.50016841,   0.26431575],
       [ 14.27485057,  -0.77177844],
       [  7.10438377,  -1.09442776],
       [ -5.3945639 ,   6.63542372],
       [ -3.4829502 ,   5.96608967],
       [ -0.30767802,   0.35515553],
       [ -4.49659084,   3.86408233],
       [ -1.78446017,  -9.96930772],
       [ -2.41282281,   0.11656827],
       [ -3.50016841,   1.41864378],
       [ 14.27485057,   5.12851297],
       [  7.10438377,  -3.84054408],
       [ -5.3945639 ,  -5.54715161],
       [ -3.4829502 ,   1.10844569],
       [ -0.30767802,  -4.09839274],
       [ -4.49659084,  -4.36816254],
       [ -1.78446017,  -6.71130942],
       [ -2.41282281,   1.53324794],
 

In [49]:
cfa.get_model_implied_cov()

array([[ 2.60651546e+01,  1.47253629e+01,  1.10901583e+01,
        -2.75815183e+00,  4.17445093e+00, -5.08088070e-01,
         5.96818703e-01, -4.24181584e+00, -5.45955856e+00,
        -2.29617623e+00, -3.81455098e+00, -3.30114010e+00,
        -4.09545396e+00, -3.42602694e+00],
       [ 1.47253629e+01,  1.11219618e+01,  6.34162069e+00,
        -1.57717791e+00,  2.38705199e+00, -2.90537045e-01,
         3.41275367e-01, -2.42557287e+00, -3.12190760e+00,
        -1.31300909e+00, -2.18125249e+00, -1.88767173e+00,
        -2.34187961e+00, -1.95908505e+00],
       [ 1.10901583e+01,  6.34162069e+00,  7.08467044e+00,
        -1.18782490e+00,  1.79776788e+00, -2.18813067e-01,
         2.57025777e-01, -1.82677923e+00, -2.35121197e+00,
        -9.88870618e-01, -1.64277346e+00, -1.42166808e+00,
        -1.76374707e+00, -1.47545181e+00],
       [-2.75815183e+00, -1.57717791e+00, -1.18782490e+00,
         5.78387538e+00, -4.47109648e-01,  5.44193911e-02,
        -6.39229935e-02,  4.54324848e-01,  5.