In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Load the USEEIO dataset 
df = pd.read_csv('useeio.csv')

# Standardization of Features
# Assuming the dataset has numerical features only. If not, select numerical columns accordingly.
features = df.select_dtypes(include=[float, int]).columns
x = df[features].values

# Standardizing the features
scaler = StandardScaler()
x_standardized = scaler.fit_transform(x)

# Applying PCA
pca = PCA()
principal_components = pca.fit_transform(x_standardized)

# Creating a DataFrame with the principal components
pca_df = pd.DataFrame(data=principal_components, columns=[f'PC{i+1}' for i in range(len(features))])

# Variance Explained by Each Principal Component
explained_variance_ratio = pca.explained_variance_ratio_

# Plotting the explained variance ratio
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(explained_variance_ratio) + 1), explained_variance_ratio, marker='o', linestyle='--')
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.title('Explained Variance Ratio by Principal Components')
plt.show()

# Selection of Principal Components
# Let's say we want to retain 95% of the variance
cumulative_explained_variance = pca.explained_variance_ratio_.cumsum()
n_components = next(i for i, cumulative_variance in enumerate(cumulative_explained_variance) if cumulative_variance >= 0.95) + 1

print(f'Number of components selected to retain 95% variance: {n_components}')

# Projection onto Selected Principal Components
pca = PCA(n_components=n_components)
x_reduced = pca.fit_transform(x_standardized)

# Creating a DataFrame with the reduced dimensions
reduced_df = pd.DataFrame(data=x_reduced, columns=[f'PC{i+1}' for i in range(n_components)])

# Displaying the reduced DataFrame
print(reduced_df.head())
