In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from sklearn.decomposition import PCA

def pca_plot(scale_bool):
    polymers = pd.read_csv("data/.csv")
    polymers_train = polymers[polymers["test"]==1]

    y_train_dummies = polymers_train["sample"]
    x_train = polymers_train.drop(["test","sample"], axis=1)

    pca = PCA(n_components=2)
    pca.fit(x_train)

    x_loadings = pca.components_

    x_scores = pca.transform(x_train)

    fig, ax = plt.subplots(1, 2, figsize=(12.5, 4))

    cmap = {'PBA': "magenta", 'PBMA': "peru", 'PEA': "green", 'PMA': "orange", 'PHEA': "blue", 
            'PCHMA': "tomato", 'PBzMA': "purple", 'PMMA': "darkblue",  
            'P(BA-MMA2)': "#44840C", 'P(BA-St2)': "#945200", 'P(BA-AA2)': "#0096FF", 'P(BA-NMA2)': "#716DF6", 
            'PA(BA-GMA2)': "#FADFB6", 'P(BA-GMA7)': "#F5B455", 'P(BA-GMA15)': "#F4A636", 'P(BA-GMA15)': "#F8CA87", 
            'P(BA-GMM4)': "#DC0D23", 'P(BA-4HBA2)': "#83800F"}

    for i in range(len(x_scores)):
        t1, t2 = x_scores[i, 0], x_scores[i, 1]
        ax[0].scatter(t1, t2, c="None", edgecolors=cmap[polymers_train["sample"].iloc[i]], marker="o", linewidth=1, label=None, s=50)

    ax[0].set_xlabel('PC1')
    ax[0].set_ylabel('PC2')
    ax[0].set_title('PCA Score Plot')

    ax[1].plot(range(x_loadings.shape[1]), x_loadings[0], c="r", label='PC1')
    ax[1].plot(range(x_loadings.shape[1]), x_loadings[1], c="b", label='PC2')
    ax[1].set_xlabel('Wavelength')
    ax[1].set_ylabel('Loading Value')
    ax[1].set_title('Principal Component Loadings')

    for axis in ['top', 'bottom', 'left', 'right']:
        ax[0].spines[axis].set_linewidth(2) 
        ax[1].spines[axis].set_linewidth(2)  

    ax[0].tick_params(axis='both', which='both', direction='in', length=6)
    ax[1].tick_params(axis='both', which='both', direction='in', length=6)
    ax[1].set_xlim(0, 200)
    #ax[1].set_ylim(-0.02, 0.12)
    ax[0].tick_params(width=2)
    ax[1].tick_params(width=2)
    #ax[0].set_xlim(-40000, 40000)
    #ax[0].set_ylim(-4000, 6000)

    plt.savefig(".png", dpi=600)   
    plt.tight_layout()
    plt.show()

    max_wavelength_index_pc1 = np.argmax(x_loadings[0])
    min_wavelength_index_pc1 = np.argmin(x_loadings[0])
    max_wavelength_index_pc2 = np.argmax(x_loadings[1])
    min_wavelength_index_pc2 = np.argmin(x_loadings[1])

    print("PC1:")
    print("Max_wavelength:", max_wavelength_index_pc1)
    print("Min_wavelength:", min_wavelength_index_pc1)
    print("PC2:")
    print("Max_wavelength:", max_wavelength_index_pc2)
    print("Min_wavelength:", min_wavelength_index_pc2)

    peaks_0, _ = find_peaks(x_loadings[0], height=0)
    valleys_0, _ = find_peaks(-x_loadings[0], height=0)
    peaks_1, _ = find_peaks(x_loadings[1], height=0)
    valleys_1, _ = find_peaks(-x_loadings[1], height=0)

    peak_wavelengths_pc1 = peaks_0
    valley_wavelengths_pc1 = valleys_0
    peak_wavelengths_pc2 = peaks_1
    valley_wavelengths_pc2 = valleys_1
    
    print("Peak of PC1:", peak_wavelengths_pc1)
    print("Valley of PC1:", valley_wavelengths_pc1)
    print("Peak of PC2:", peak_wavelengths_pc2)
    print("Valley of PC2:", valley_wavelengths_pc2)

pca_plot(scale_bool=None) 