In [None]:
#2D polt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import LeaveOneOut, StratifiedKFold
from matplotlib.patches import Ellipse
from scipy.stats import chi2


A="data/.csv"
def plot_lda_with_ellipse(scale_bool):
    polymers = pd.read_csv(A) 
    polymers_train = polymers[polymers["test"]==1]
    y_train_dummies = polymers_train["sample"]
    x_train = polymers_train.drop(["test", "sample"], axis=1)

    lda = LinearDiscriminantAnalysis(n_components=2)
    x_scores = lda.fit(x_train, y_train_dummies).transform(x_train)

    fig, ax = plt.subplots(1, 1, figsize=(8, 8))

    cmap = {'PBA': "magenta", 'PBMA': "peru", 'PEA': "green", 'PMA': "orange", 'PHEA': "blue", 
            'PCHMA': "tomato", 'PBzMA': "purple", 'PMMA': "darkblue",  
            'P(BA-MMA2)': "#44840C", 'P(BA-St2)': "#945200", 'P(BA-AA2)': "#0096FF", 'P(BA-NMA2)': "#716DF6", 
            'PA(BA-GMA2)': "#FADFB6", 'P(BA-GMA7)': "#F5B455", 'P(BA-GMA15)': "#F4A636", 'P(BA-GMA15)': "#F8CA87", 
            'P(BA-GMM4)': "#DC0D23", 'P(BA-4HBA2)': "#83800F"}

    plt.rcParams['axes.linewidth'] = 2
    ax.tick_params(width=2)
    ax.tick_params(axis='both', which='both', direction='in', length=6)
    
    #ax.set_xlim(-80, 60)
    #ax.set_xticks([-70, -60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60])
    #ax.set_ylim(-20, 25)
    #ax.set_yticks([-35, -30, -25, -20, -15, -10, -5, 0, 5, 10, 15, 20, 25])  

    ax.set_xlabel('Score(1)')
    ax.set_ylabel('Score(2)')
    ax.set_title('LDA Score Plot')
    
    

    for i, (t1, t2) in enumerate(x_scores):
        color = cmap[polymers_train["sample"].iloc[i]]
        plt.scatter(t1, t2, c='None', edgecolors=color, marker="o", label='None', linewidth=1,  s=50)

        if scale_bool:
            class_data = x_scores[y_train_dummies == y_train_dummies.iloc[i]]
            class_covariance = np.cov(class_data.T)
            n = class_data.shape[0]
            pooled_covariance = class_covariance * (n - 1) / (n - 2)
            mean = np.mean(class_data, axis=0)
            eigenvalues, eigenvectors = np.linalg.eigh(pooled_covariance)
            order = eigenvalues.argsort()[::-1]
            eigenvalues, eigenvectors = eigenvalues[order], eigenvectors[:, order]
            angle = np.degrees(np.arctan2(eigenvectors[1, 0], eigenvectors[0, 0]))
            width, height = 2 * np.sqrt(eigenvalues * chi2.ppf(0.95, 2))
            ell = Ellipse(xy=(mean[0], mean[1]), width=width, height=height, angle=angle,
                          edgecolor=color, linestyle='-', linewidth=2, fill=False)
            ax.add_patch(ell)
            
    #plt.savefig(".png", dpi=600)
    #plt.show()
    
    explained_variance_ratio = lda.explained_variance_ratio_
    np.set_printoptions(precision=4)
    print("Explained Variance Ratio (LDA):", explained_variance_ratio)

def main():
    plot_lda_with_ellipse(scale_bool=1)


if __name__ == "__main__":
    main()

In [None]:
#3d polt
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from itertools import combinations

A = "data/.csv"

def plot_lda_with_ellipse_3d(component1=1, component2=2, component3=3, x_range=None, y_range=None, z_range=None):
    polymers = pd.read_csv(A)
    polymers_train = polymers[polymers["test"] == 1]
    y_train_dummies = polymers_train["sample"]
    x_train = polymers_train.drop(["test", "sample"], axis=1)

    lda = LinearDiscriminantAnalysis(n_components=3)
    x_scores = lda.fit(x_train, y_train_dummies).transform(x_train)

    explained_variance_ratio = lda.explained_variance_ratio_

    fig = go.Figure()

    cmap = {'PBA': "magenta", 'PBMA': "peru", 'PEA': "green", 'PMA': "orange", 'PHEA': "blue", 
            'PCHMA': "tomato", 'PBzMA': "purple", 'PMMA': "darkblue",  
            'P(BA-MMA2)': "#44840C", 'P(BA-St2)': "#945200", 'P(BA-AA2)': "#0096FF", 'P(BA-NMA2)': "#716DF6", 
            'PA(BA-GMA2)': "#FADFB6", 'P(BA-GMA7)': "#F5B455", 'P(BA-GMA15)': "#F4A636", 'P(BA-GMA15)': "#F8CA87", 
            'P(BA-GMM4)': "#DC0D23", 'P(BA-4HBA2)': "#83800F"}

    selected_scores = x_scores[:, [component1-1, component2-1, component3-1]]

    for sample in polymers_train["sample"].unique():
        mask = polymers_train["sample"] == sample
        points = selected_scores[mask]

     
        for p1, p2 in combinations(points, 2):
            fig.add_trace(go.Scatter3d(
                x=[p1[0], p2[0]],
                y=[p1[1], p2[1]],
                z=[p1[2], p2[2]],
                mode='lines',
                line=dict(color=cmap[sample], width=1),
                showlegend=False  
            ))

       
        fig.add_trace(go.Scatter3d(
            x=points[:, 0],
            y=points[:, 1],
            z=points[:, 2],
            mode='markers',
            marker=dict(size=1, color=cmap[sample]),
            name=sample
        ))

    fig.update_layout(
        scene=dict(
            xaxis=dict(title=f'LDA Component {component1} ({explained_variance_ratio[component1-1]*100:.2f}%)',
                       range=x_range), 
            yaxis=dict(title=f'LDA Component {component2} ({explained_variance_ratio[component2-1]*100:.2f}%)',
                       range=y_range), 
            zaxis=dict(title=f'LDA Component {component3} ({explained_variance_ratio[component3-1]*100:.2f}%)',
                       range=z_range), 
            aspectmode='cube'  
        ),
        title=f'LDA 3D Plot: Components {component1}, {component2}, {component3}',
        width=1200, 
        height=800  
    )

    fig.show()

    np.set_printoptions(precision=4)
    print("Explained Variance Ratio (LDA):", explained_variance_ratio)


plot_lda_with_ellipse_3d(component1=1, component2=2, component3=3, x_range=[-60, 60], y_range=[-40, 20], z_range=[-40, 30])