In [None]:
import pandas as pd

# Load the data from CSV
file_path = './ACP Data.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())
# Exclude the categorical column "Index"
numeric_data = data.iloc[:, 1:]

# Calculate statistical indicators
stats = numeric_data.describe().loc[['min', 'max', 'mean', 'std']].T
print("Statistical Indicators:\n", stats)
import seaborn as sns

# Calculate the correlation matrix
correlation_matrix = numeric_data.corr()
print("Correlation Matrix:\n", correlation_matrix)
import matplotlib.pyplot as plt

# Plot the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix Heatmap')
plt.show()
from sklearn.decomposition import PCA

# Perform PCA
pca = PCA()
pca.fit(numeric_data)

# Eigenvalues and explained variance
eigenvalues = pca.explained_variance_
explained_variance_ratio = pca.explained_variance_ratio_
cumulative_variance = explained_variance_ratio.cumsum()

# Create a table of eigenvalues
eigenvalues_table = pd.DataFrame({
    'Eigenvalue': eigenvalues,
    'Explained Variance (%)': explained_variance_ratio * 100,
    'Cumulative Variance (%)': cumulative_variance * 100
})
print("Eigenvalues Table:\n", eigenvalues_table)
# Select the two principal components with the highest eigenvalues
top_axes = pca.components_[:2]
print("Top 2 Principal Axes:\n", top_axes)
# Transform data into principal component space
individuals_pca = pca.transform(numeric_data)

# Create a table of individuals with their coordinates on the first two components
individuals_table = pd.DataFrame(individuals_pca[:, :2], columns=['F1', 'F2'], index=data['Index'])
print("Individuals Table:\n", individuals_table)
import matplotlib.pyplot as plt

# Correlation circle
fig, ax = plt.subplots(figsize=(8, 8))
for i, var in enumerate(numeric_data.columns):
    ax.arrow(0, 0, top_axes[0, i], top_axes[1, i], color='blue', alpha=0.5)
    ax.text(top_axes[0, i], top_axes[1, i], var, color='black', ha='center', va='center')

ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
ax.set_xlabel('PC1')
ax.set_ylabel('PC2')
ax.set_title('Correlation Circle')
plt.grid()
plt.show()
# Compute correlations
correlations = pd.DataFrame(pca.components_.T, columns=[f'PC{i+1}' for i in range(pca.n_components_)], index=numeric_data.columns)
print("Correlations Between Variables and Principal Components:\n", correlations)