## Libraries

In [None]:
import numpy as np
from sklearn.decomposition import FactorAnalysis
from sklearn.model_selection import train_test_split

## Declaration

In [None]:
fa_variables = \
[
 'log_american',
 'log_beverages',
 'log_cafe',
 'log_chicken_dishes',
 'log_chinese',
 'log_desserts',
 'log_healthy',
 'log_indian',
 'log_italian',
 'log_japanese',
 'log_noodle_dishes',
 'log_other',
 'log_street_food_snacks',
 'log_thai'
]

fa_data = data[fa_variables]

## Fit

In [None]:
# Parameters
n_components_values = [1, 2, 3]  # Number of components to evaluate

# Function to evaluate Factor Analysis for a given number of components
def evaluate_factor_analysis(X_train, X_val, n_components):
    """
    Fit a FactorAnalysis model on the training data and evaluate on validation data.

    Parameters:
    - X_train: Training data
    - X_val: Validation data
    - n_components: Number of components for FactorAnalysis

    Returns:
    - log_likelihood: Log-likelihood of the model on validation data
    """
    fa = FactorAnalysis(
        n_components=n_components,
        tol=0.001,
        max_iter=10000,
        svd_method='lapack',
        iterated_power=3,
        rotation='varimax',
        random_state=0
    )
    fa.fit(X_train)
    log_likelihood = fa.score(X_val)
    return log_likelihood

# Split data into train and validation sets (retain feature names)
X_train, X_val = train_test_split(X, test_size=0.2, random_state=30)

# Evaluate Factor Analysis for each number of components
results_list = []
for n_components in n_components_values:
    log_likelihood = evaluate_factor_analysis(X_train, X_val, n_components)
    results_list.append({'n_components': n_components, 'log_likelihood': log_likelihood})
    print(f"n_components: {n_components}, log_likelihood: {log_likelihood}")

# Print summary of results
print("\nResults:")
for result in results_list:
    print(result)

# Find the optimal number of components
best_result = max(results_list, key=lambda x: x['log_likelihood'])
print(
    f"\nOptimal number of components: {best_result['n_components']} "
    f"with log-likelihood: {best_result['log_likelihood']}"
)

# Train the final FactorAnalysis model with the optimal number of components
optimal_fa = FactorAnalysis(
    n_components=best_result['n_components'],
    tol=0.001,
    max_iter=10000,
    svd_method='lapack',
    iterated_power=3,
    rotation='varimax',
    random_state=0
)



## Get Optimal

In [None]:
optimal_fa.get_params()
optimal_fa.get_feature_names_out()
new_data = optimal_fa.transform(fa_data)
pd.DataFrame(new_data).head()
pd.DataFrame(optimal_fa.components_)