In [1]:
import pandas as pd
from sklearn.decomposition import FactorAnalysis
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the dataset
data = pd.read_csv('kc_house_data_reduced.csv')


In [3]:
# Selecting relevant columns for factor analysis
columns_for_factor_analysis = ['condition', 'grade', 'sqft_above', 'sqft_basement', 'sqft_living15']


In [4]:
# Standardize the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data[columns_for_factor_analysis])


In [5]:
# Fit Factor Analysis model with two components
n_components = 2
factor_analysis_model = FactorAnalysis(n_components=n_components, random_state=42)
components = factor_analysis_model.fit_transform(data_scaled)


In [6]:
# Print the components found through factor analysis
print("Components:")
print(factor_analysis_model.components_)

Components:
[[-0.06915843  0.82146164  0.78397372  0.44373017  0.81491862]
 [ 0.24241864 -0.23514799 -0.47719398  0.83719893 -0.19252606]]


In [7]:
# Interpretation of components (in plain English)
for i in range(n_components):
    print(f"\nInterpretation of Component {i + 1}:")
    for j, feature in enumerate(columns_for_factor_analysis):
        weight = factor_analysis_model.components_[i, j]
        print(f"{feature}: {weight:.3f}")



Interpretation of Component 1:
condition: -0.069
grade: 0.821
sqft_above: 0.784
sqft_basement: 0.444
sqft_living15: 0.815

Interpretation of Component 2:
condition: 0.242
grade: -0.235
sqft_above: -0.477
sqft_basement: 0.837
sqft_living15: -0.193


In [9]:
# Additional information about the noise variance (explained variance in this context)
print("\nNoise Variance (Explained Variance):")
print(factor_analysis_model.noise_variance_)


Noise Variance (Explained Variance):
[0.93644737 0.26990708 0.1576642  0.10229964 0.29883962]
