# Setup and Load Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D

# Load the CSV files
features_df = pd.read_csv('../../results/features_with_labels.csv', header=None)
eigenvalues_df = pd.read_csv('../../results/eigenvalues.csv', header=None)
pca_results_df = pd.read_csv('../../results/pca_results.csv', header=None)

# Print the shape of the features dataframe to see the number of columns
print("Features DataFrame shape:", features_df.shape)

# Visualize the Original Features with Labels

In [None]:
# Add column names for clarity (check if the number of columns matches)
if features_df.shape[1] == 10:
    features_df.columns = [
        'SpectralCentroid', 'SpectralFlatness', 'SpectralBandwidth', 
        'ZCR', 'Energy', 'TemporalMean', 'TemporalVariance', 
        'TemporalSkewness', 'TemporalKurtosis', 'Label'
    ]
else:
    print("Unexpected number of columns in features_df")
    print(features_df.head())  # Print the first few rows to inspect

# Create a dictionary to map each instrument to a unique marker
unique_labels = features_df['Label'].unique()
markers = ["o", "s", "D", "^", "v", "<", ">", "p", "h", "8", "X", "*", "P", "H"]  # 14 unique markers for 14 instruments
label_to_marker = dict(zip(unique_labels, markers))

# Pair plot of the features
sns.pairplot(features_df, hue='Label', diag_kind='kde', 
             markers=[label_to_marker[label] for label in features_df['Label']])
plt.suptitle("Pair Plot of Original Features", y=1.02)
plt.show()


# Plot eigenvalues (scree plot, explains variance by each principal components)

In [None]:
# Sort the eigenvalues in decreasing order
sorted_eigenvalues = eigenvalues_df.iloc[0].sort_values(ascending=False)

# Plot the eigenvalues
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(sorted_eigenvalues) + 1), sorted_eigenvalues, marker='o', linestyle='--', color='b')
plt.plot(range(1, len(sorted_eigenvalues) + 1), [1.0]*9, linestyle='--', color='r')
plt.title('Scree Plot')
plt.xlabel('Principal Component')
plt.ylabel('Eigenvalue')
plt.grid(True)
plt.show()

# Visualize PCA results

## Generate pair plots for all pairs of principal components

In [None]:
# Set column names for PCA results
pca_results_df.columns = [f'Principal Component {i+1}' for i in range(pca_results_df.shape[1])]

# Add labels to PCA results for visualization
if 'Label' in features_df.columns:
    pca_results_df['Label'] = features_df['Label']
else:
    print("Label column not found in features_df")

# Create pair plots for each unique pair of principal components
if 'Label' in pca_results_df.columns:
    pairs = [(i, j) for i in range(9) for j in range(i+1, 9)]
    fig, axes = plt.subplots(6, 6, figsize=(20, 20))

    # Prepare markers and colors for the legend
    unique_labels = pca_results_df['Label'].unique()
    markers = [label_to_marker[label] for label in unique_labels]
    colors = sns.color_palette('Set1', len(unique_labels))

    for idx, (i, j) in enumerate(pairs):
        row, col = divmod(idx, 6)
        ax = axes[row, col]

        for label, marker, color in zip(unique_labels, markers, colors):
            indices = pca_results_df['Label'] == label
            ax.scatter(
                pca_results_df.loc[indices, f'Principal Component {i+1}'], 
                pca_results_df.loc[indices, f'Principal Component {j+1}'],
                label=label, 
                marker=marker, 
                color=color
            )

        ax.set_xlabel(f'PC{i+1}')
        ax.set_ylabel(f'PC{j+1}')
        ax.set_title(f'PC{i+1} vs PC{j+1}')

    # Add a super title to the figure
    plt.suptitle('Plots of principal components against one another for different instruments', fontsize=16, y=1.01)

    # Adjust layout to make space for the common legend
    plt.tight_layout(rect=[0, 0, 0.925, 1])

    # Add the common legend to the right of the plots
    handles = [plt.Line2D([0], [0], marker=marker, color='w', label=label, 
                          markerfacecolor=color, markersize=10) 
               for label, marker, color in zip(unique_labels, markers, colors)]
    fig.legend(handles=handles, loc='center right', title="Instrument")
    plt.show()
else:
    print("Skipping plotting due to errors in the dataframes")