In [None]:
import wandb
import pandas as pd
import json
from typing import Any

def download_table(run_name: str) -> None:
    """
    Download a specific artifact table from a W&B run.

    Parameters
    ----------
    run_name : str
        The name of the run from which to download the artifact.

    Returns
    -------
    None
    """
    entity = 'a-b-o'
    project = 'Plant_Disease_Classification'

    api = wandb.Api()

    run = api.run(f'{entity}/{project}/{run_name}')

    artifact = run.logged_artifacts()[1]

    artifact.download()

def get_dataframe_from_table(table_name: str) -> pd.DataFrame:
    """
    Load a JSON table file into a pandas DataFrame.

    Parameters
    ----------
    table_name : str
        The name of the table file to load.

    Returns
    -------
    pd.DataFrame
        A DataFrame containing the data from the JSON table.
    """
    with open(table_name) as f:
        data = json.load(f)
    return pd.DataFrame(data["data"], columns=data["columns"])

In [None]:
from typing import Union 
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go

def plot_3d_interactive(transformed_data: Union[pd.DataFrame, np.ndarray], target_labels: np.ndarray, title: str = 'Interactive PCA 3D plot') -> None:
    """
    Generate an interactive 3D scatter plot of PCA-transformed data with colored points for each category. Uses plotly for this.

    Parameters
    ----------
    transformed_data : Union[pd.DataFrame, np.ndarray]
        PCA-transformed data.
    target_labels : np.ndarray
        Target labels (categories) corresponding to each data point.
    title : str, optional
        Title of the plot. Default is 'Interactive PCA 3D plot'.

    Returns
    -------
    None
        Displays the plot directly.
    """
    # Ensure the transformed_data is a numpy array
    if isinstance(transformed_data, pd.DataFrame):
        transformed_data = transformed_data.values

    # Create traces for each category using Plotly
    traces = []
    categories = np.unique(target_labels)
    color_map = {category: i for i, category in enumerate(categories)}
    colors = np.linspace(0, 1, len(categories))  # Generate colorscale values

    for category in categories:
        indices = np.where(target_labels == category)
        trace = go.Scatter3d(
            x=transformed_data[indices, 0].flatten(),
            y=transformed_data[indices, 1].flatten(),
            z=transformed_data[indices, 2].flatten(),
            mode='markers',
            marker=dict(
                size=4,
                color=colors[color_map[category]],  # Assign a unique color based on index
                colorscale='Jet',  # Choose a suitable colorscale
                opacity=0.8
            ),
            name=category
        )
        traces.append(trace)

    # Create layout
    layout = go.Layout(
        title=title,
        scene=dict(
            xaxis=dict(title='PC 1'),
            yaxis=dict(title='PC 2'),
            zaxis=dict(title='PC 3')
        )
    )

    # Plot using Plotly
    fig = go.Figure(data=traces, layout=layout)
    fig.show()

def reduce_dim_and_plot(data: np.ndarray, labels: np.ndarray, algorithm: PCA, title: str) -> None:
    """
    Reduce dimensionality of `data` using `algorithm` and plot in 3D.

    Parameters
    ----------
    data : np.ndarray
        The input data to transform, shape (n_samples, n_features).
    labels : np.ndarray
        The target labels for each sample, shape (n_samples,).
    algorithm : PCA
        The PCA algorithm instance to use for dimensionality reduction.
    title : str
        The title of the plot.

    Returns
    -------
    None
    """
    data = MinMaxScaler().fit_transform(data)
    transformed_data = algorithm.fit_transform(data)
    plot_3d_interactive(transformed_data, labels, title)
    
def get_explained_variance(pca: PCA) -> None:
    """
    Print the explained variance and explained variance ratio of PCA components.

    Parameters
    ----------
    pca : PCA
        The PCA object after fitting to data.

    Returns
    -------
    None
    """
    # Explained variance
    explained_variance = pca.explained_variance_

    # Explained variance ratio (proportion of variance explained by each component)
    explained_variance_ratio = pca.explained_variance_ratio_

    print("Eigenvalues:", explained_variance)
    print("Explained Variance Ratio:", explained_variance_ratio)
    print("Accumulative explained Variance Ratio:", np.sum(explained_variance_ratio))

# Analyzing the latent space created by the encoders

In [3]:
# Download file
download_table("denoising_autoencoder_50_50_0_721kcrwe")
download_table("denoising_autoencoder_80_20_0_6o2hmk2i")
download_table("autoencoder_50_50_0_oasm9olk")
download_table("autoencoder_80_20_0_mpmp5ljz")

[34m[1mwandb[0m: Downloading large artifact run-denoising_autoencoder_50_50_0_721kcrwe-testLatentSpace:v0, 159.85MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:17.4
[34m[1mwandb[0m: Downloading large artifact run-denoising_autoencoder_80_20_0_6o2hmk2i-testLatentSpace:v0, 69.55MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:8.0
[34m[1mwandb[0m: Downloading large artifact run-autoencoder_50_50_0_oasm9olk-testLatentSpace:v0, 173.58MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:18.6
[34m[1mwandb[0m: Downloading large artifact run-autoencoder_80_20_0_mpmp5ljz-testLatentSpace:v0, 76.38MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:8.2


In [4]:
run_denoise_50_50_file = "artifacts/run-denoising_autoencoder_50_50_0_721kcrwe-testLatentSpace-v0/test/LatentSpace.table.json"
run_denoise_80_20_file = "artifacts/run-denoising_autoencoder_80_20_0_6o2hmk2i-testLatentSpace-v0/test/LatentSpace.table.json"
run_50_50_file = "artifacts/run-autoencoder_50_50_0_oasm9olk-testLatentSpace-v0/test/LatentSpace.table.json"
run_80_20_file = "artifacts/run-autoencoder_80_20_0_mpmp5ljz-testLatentSpace-v0/test/LatentSpace.table.json"

df_denoise_50_50 = get_dataframe_from_table(run_denoise_50_50_file)
df_denoise_80_20 = get_dataframe_from_table(run_denoise_80_20_file)
df_50_50 = get_dataframe_from_table(run_50_50_file)
df_80_20 = get_dataframe_from_table(run_80_20_file)

In [5]:
latent_space_columns = list(map(str, range(512)))

run_50_50_latent_space = df_50_50[latent_space_columns]
run_50_50_targets = df_50_50["target"]

run_80_20_latent_space = df_80_20[latent_space_columns]
run_80_20_targets = df_80_20["target"]

run_denoise_50_50_latent_space = df_denoise_50_50[latent_space_columns]
run_denoise_50_50_targets = df_denoise_50_50["target"]

run_denoise_80_20_latent_space = df_denoise_80_20[latent_space_columns]
run_denoise_80_20_targets = df_denoise_80_20["target"]

In [6]:
from sklearn.manifold import TSNE

pca = PCA(n_components=3)
tsne = TSNE(n_components=3)

# Autoencoder 50 50 embeddings

In [7]:
run_50_50_latent_space = MinMaxScaler().fit_transform(run_50_50_latent_space)
reduce_dim_and_plot(run_50_50_latent_space, run_50_50_targets, pca, "Autoencoder 50-50 embeddings")

# Autoencoder 80 20 embeddings

In [8]:
run_80_20_latent_space = MinMaxScaler().fit_transform(run_80_20_latent_space)
reduce_dim_and_plot(run_80_20_latent_space, run_80_20_targets, pca, "Autoencoder 80-20 embeddings")

# Denoising Autoencoder 50 50 embeddings

In [9]:
run_denoise_50_50_latent_space = MinMaxScaler().fit_transform(run_denoise_50_50_latent_space)
reduce_dim_and_plot(run_denoise_50_50_latent_space, run_denoise_50_50_targets, pca, "Denoising autoencoder 50-50 embeddings")

# Denoising Autoencoder 80 20 embeddings

In [10]:
run_denoise_80_20_latent_space = MinMaxScaler().fit_transform(run_denoise_80_20_latent_space)
reduce_dim_and_plot(run_denoise_80_20_latent_space, run_denoise_80_20_targets, pca, "Denoising autoencoder 80-20 embeddings")

# Explained Variance

In [11]:
# 50 50 explained variance
pca.fit(run_50_50_latent_space)
get_explained_variance(pca)

Eigenvalues: [12.24366278  5.54455099  1.11739264]
Explained Variance Ratio: [0.58694574 0.26579878 0.05356639]
Accumulative explained Variance Ratio: 0.9063109077625026


In [12]:
# 80 20 explained variance
pca.fit(run_80_20_latent_space)
get_explained_variance(pca)

Eigenvalues: [7.6922358  2.79301852 1.87450254]
Explained Variance Ratio: [0.45398985 0.16484181 0.1106317 ]
Accumulative explained Variance Ratio: 0.7294633519096552


In [13]:
# denoising 50 50 explained variance
pca.fit(run_denoise_50_50_latent_space)
get_explained_variance(pca)

Eigenvalues: [1.55102364 1.19938891 0.94388728]
Explained Variance Ratio: [0.18115418 0.14008446 0.11024276]
Accumulative explained Variance Ratio: 0.4314814057771289


In [14]:
# denoising 80 20 explained variance
pca.fit(run_denoise_80_20_latent_space)
get_explained_variance(pca)

Eigenvalues: [15.21178402  6.71103339  1.6453262 ]
Explained Variance Ratio: [0.56244658 0.24813643 0.06083495]
Accumulative explained Variance Ratio: 0.8714179618361891
