In [15]:
import numpy as np
import pandas as pd
from sklearn.utils.extmath import randomized_svd
from tabulate import tabulate
import warnings
warnings.filterwarnings("ignore")

class FA():
    """
    Clase para la obtención de los Factores de una matriz de datos mediante método de Factores Principales.
    """
    def __init__(self, n_factors=None, svd_method="randomized"):
        self.n_factors = n_factors
        self.svd_method = svd_method

    def loadings(self, X, n_factors = None):
        """
        Returns the loading matrix of the specified number of principal components.
        If not specified retuns for all the components whose Eigen value greater than 0
        """
        self.X_corr = np.corrcoef(X, rowvar=False)
        self.X_corr_inv = np.linalg.inv(self.X_corr)
        self.X_corr_inv_diag = np.diag(self.X_corr_inv)
        self.R = 1 - (1 / self.X_corr_inv_diag)
        self.X_corr_o = self.X_corr.copy()
        np.fill_diagonal(self.X_corr, self.R)
        self.eigenvalues, _ = np.linalg.eig(self.X_corr)
        #agregar la otra opción. 
        self.eigenvectors, self.eigenvalues_p, _ = randomized_svd(self.X_corr, n_components=self.X_corr.shape[1], random_state=1234567890)     
        
        self.loadings = self.eigenvectors * np.sqrt(self.eigenvalues_p)
        positive_filter = np.array([i for i, value in enumerate(np.round(self.eigenvalues_p, decimals=10)) if value in np.round(self.eigenvalues, decimals=10)])
        self.postive_loadings = self.loadings[:, positive_filter]

        if n_factors == None:
            self.postive_loadings
        else:
            self.postive_loadings = self.postive_loadings[:, :n_factors]
                    
        loadings_matrix = pd.DataFrame(self.postive_loadings, columns=[f"Factor {i+1}" for i in range(self.postive_loadings.shape[1])], index=[f"x{i+1}" for i in range(self.postive_loadings.shape[0])]).reset_index(names='Variable')
        self.communalities, self.uniquenesses = np.sum((self.postive_loadings**2), axis=1), 1 - np.sum((self.postive_loadings**2), axis=1)
        loadings_matrix = pd.concat([loadings_matrix, pd.DataFrame(self.uniquenesses, columns=['Uniqueness']), pd.DataFrame(self.communalities, columns=['Communality'])], axis=1)
        eigenv_matrix = pd.DataFrame(np.sort(self.eigenvalues)[::-1], columns=['Eigenvalue'], index=[f"Factor {i+1}" for i in range(self.eigenvalues.shape[0])]).reset_index(names='Factor')
        eigenv_proportion = [np.sort(self.eigenvalues)[::-1][i] / np.sum(np.sort(self.eigenvalues)[::-1]) for i in range(len(self.eigenvalues))]
        eigenv_proportion_cs = np.cumsum(eigenv_proportion)
        eigenv_matrix = pd.concat([eigenv_matrix, pd.DataFrame(eigenv_proportion, columns=['Proportion']), pd.DataFrame(eigenv_proportion_cs, columns=['Cumulative'])], axis=1)

        # print(f"-------------Resultados de Factor Analysis-------------")
        # print(tabulate(eigenv_matrix, headers='keys', tablefmt="fancy_grid", showindex=False, floatfmt=".4f"))
        # print(f"-------------------------------------------------------")
        # print(tabulate(loadings_matrix, headers='keys', tablefmt="fancy_grid", showindex=False, floatfmt=".4f"))
        # Definir las tablas como cadenas
        eigenv_table = tabulate(eigenv_matrix, headers='keys', tablefmt="fancy_grid", showindex=False, floatfmt=".4f")
        loadings_table = tabulate(loadings_matrix, headers='keys', tablefmt="fancy_grid", showindex=False, floatfmt=".4f")

        # Separar las líneas de cada tabla
        eigenv_lines = eigenv_table.splitlines()
        loadings_lines = loadings_table.splitlines()

        # Asegurarse de que ambas tengan la misma cantidad de líneas
        max_lines = max(len(eigenv_lines), len(loadings_lines))
        eigenv_lines += [''] * (max_lines - len(eigenv_lines))
        loadings_lines += [''] * (max_lines - len(loadings_lines))

        # Combinar ambas tablas en una sola salida
        combined_output = "\n".join(f"{eigenv: <40}   {loading}" for eigenv, loading in zip(eigenv_lines, loadings_lines))
        bold_start = "\033[1m"
        bold_end = "\033[0m"
        # Imprimir el resultado
        print(f"{bold_start}Resultados de Factor Analysis{bold_end}")
        print(combined_output)

    def get_est_corr_matrix(self):
        """
        Returns the estimated correlated matrix of X, and the differrences with original correlated matrix of X
        """
        ee = pd.DataFrame(self.postive_loadings @ self.postive_loadings.T + np.diag(self.uniquenesses))
        
        print(ee, pd.DataFrame(self.X_corr_o)-ee)

    # def varimaxr(self, loadings, normalize = True, max_iter = 500, tolerance = 1e-5):
    #     df = loadings.copy()
    #     column_names = df.index.values
    #     index_names = df.columns.values
    #     n_rows, n_cols = df.shape
    #     if n_cols < 2:
    #         return df
    #     X = df.values
    #     if normalize:
    #         normalized_mtx = df.apply(lambda x: np.sqrt(sum(x**2)),
    #                                   axis=1).values
    #         X = (X.T / normalized_mtx).T
    #     rotation_mtx = np.eye(n_cols)

    #     d = 0
    #     for _ in range(max_iter):
    #         old_d = d
    #         basis = np.dot(X, rotation_mtx)
    #         transformed = np.dot(X.T, basis**3 - (1.0 / n_rows) *
    #                              np.dot(basis, np.diag(np.diag(np.dot(basis.T, basis)))))
    #         U, S, V = np.linalg.svd(transformed)
    #         rotation_mtx = np.dot(U, V)
    #         d = np.sum(S)
    #         if old_d != 0 and d / old_d < 1 + tolerance:
    #             break

    #     X = np.dot(X, rotation_mtx)

    #     if normalize:
    #         X = X.T * normalized_mtx
    #     else:
    #         X = X.T
    #     loadings = pd.DataFrame(X, columns=column_names, index=index_names).T

    #     def flip_sign(vec):
    #         for i in range(vec.shape[1]):
    #             if(vec[:, i].sum() < 0):
    #                 vec[:, i] = -1 * vec[:, i]
    #         return vec

    #     rloadnpmat = loadings.as_matrix()
    #     rloadingflip = flip_sign(rloadnpmat)

    #     def matx(mat):
    #         rpe = mat ** 2
    #         rpesum = np.sum(rpe, axis = 0)
    #         ind = rpesum.argsort()
    #         rr = mat[:, ind]
    #         return rr
        
    #     index  = ["PC"+str(i) for i in range(loadings.shape[1])]
    #     varmaxrotmat = pd.DataFrame(matx(rloadingflip), columns = index)
        
    #     return varmaxrotmat

In [3]:
final = pd.read_excel(r"C:\Users\HP\OneDrive\Escritorio\David Guzzi\Github\MECMT04\TP AEM - database.xlsx")

final.columns = [f"X{i}" for i in range(len(final.columns))]
final = final.iloc[:,2:].values

In [16]:
fa = FA()
fa.loadings(final)

[1mResultados de Factor Analysis[0m
╒══════════╤══════════════╤══════════════╤══════════════╕   ╒════════════╤════════════╤════════════╤════════════╤════════════╤══════════════╤═══════════════╕
│ Factor   │   Eigenvalue │   Proportion │   Cumulative │   │ Variable   │   Factor 1 │   Factor 2 │   Factor 3 │   Factor 4 │   Uniqueness │   Communality │
╞══════════╪══════════════╪══════════════╪══════════════╡   ╞════════════╪════════════╪════════════╪════════════╪════════════╪══════════════╪═══════════════╡
│ Factor 1 │       1.8282 │       0.4982 │       0.4982 │   │ x1         │    -0.3620 │     0.5729 │    -0.3125 │    -0.1097 │       0.4310 │        0.5690 │
├──────────┼──────────────┼──────────────┼──────────────┤   ├────────────┼────────────┼────────────┼────────────┼────────────┼──────────────┼───────────────┤
│ Factor 2 │       1.2632 │       0.3442 │       0.8424 │   │ x2         │    -0.5573 │     0.6024 │     0.0920 │    -0.1548 │       0.2941 │        0.7059 │
├──────────┼──

In [17]:
fa.get_est_corr_matrix()

          0         1         2         3         4         5         6
0  1.000000  0.535130 -0.322015  0.036342 -0.069417  0.060567  0.114300
1  0.535130  1.000000 -0.199402  0.292244 -0.258222 -0.171280  0.181228
2 -0.322015 -0.199402  1.000000  0.265037  0.313529  0.133084 -0.024176
3  0.036342  0.292244  0.265037  1.000000  0.049735  0.044152  0.243248
4 -0.069417 -0.258222  0.313529  0.049735  1.000000  0.737913 -0.014584
5  0.060567 -0.171280  0.133084  0.044152  0.737913  1.000000  0.118837
6  0.114300  0.181228 -0.024176  0.243248 -0.014584  0.118837  1.000000           0         1             2         3             4             5  \
0  0.000000  0.071771  1.411969e-02 -0.058105  9.864752e-03  6.853152e-03   
1  0.071771  0.000000 -3.544898e-02  0.081438  5.884523e-03 -2.295594e-02   
2  0.014120 -0.035449 -1.110223e-16  0.045048  4.192798e-02 -4.048848e-02   
3 -0.058105  0.081438  4.504802e-02  0.000000 -7.879195e-03  1.927687e-02   
4  0.009865  0.005885  4.192798e-02 -0.

In [1]:
import pandas as pd