In [78]:
import wandb
import pandas as pd
import json

def download_table(run_name):
    entity = 'a-b-o'
    project='Plant_Disease_Classification'

    api = wandb.Api()

    run = api.run(f'{entity}/{project}/{run_name}')

    artifact = run.logged_artifacts()[1]

    artifact.download()
    
def get_dataframe_from_table(table_name):
    with open(table_name) as f:
        data = json.load(f)
    return pd.DataFrame(data["data"], columns=data["columns"])

In [79]:
import numpy as np
import plotly.graph_objs as go
import pandas as pd
import json
from sklearn.preprocessing import MinMaxScaler

def plot_3d_interactive(transformed_data, target_labels, title='Interactive PCA 3D plot'):
    """
    Generate an interactive 3D scatter plot of PCA-transformed data with colored points for each category.

    Parameters:
    - transformed_data (DataFrame or array): PCA-transformed data.
    - target_labels (array-like): Target labels (categories) corresponding to each data point.
    - title (str, optional): Title of the plot. Default is 'Interactive PCA 3D plot'.

    Returns:
    - None (displays the plot directly).
    """
    # Ensure the transformed_data is a numpy array
    if isinstance(transformed_data, pd.DataFrame):
        transformed_data = transformed_data.values

    # Create traces for each category using Plotly
    traces = []
    categories = np.unique(target_labels)
    color_map = {category: i for i, category in enumerate(categories)}
    colors = np.linspace(0, 1, len(categories))  # Generate colorscale values

    for category in categories:
        indices = np.where(target_labels == category)
        trace = go.Scatter3d(
            x=transformed_data[indices, 0].flatten(),
            y=transformed_data[indices, 1].flatten(),
            z=transformed_data[indices, 2].flatten(),
            mode='markers',
            marker=dict(
                size=4,
                color=colors[color_map[category]],  # Assign a unique color based on index
                colorscale='Jet',  # Choose a suitable colorscale
                opacity=0.8
            ),
            name=category
        )
        traces.append(trace)

    # Create layout
    layout = go.Layout(
        title=title,
        scene=dict(
            xaxis=dict(title='PC 1'),
            yaxis=dict(title='PC 2'),
            zaxis=dict(title='PC 3')
        )
    )

    # Plot using Plotly
    fig = go.Figure(data=traces, layout=layout)
    fig.show()

def reduce_dim_and_plot(data, labels, algorithm, title):
    data = MinMaxScaler().fit_transform(data)
    transormed_data = algorithm.fit_transform(data)
    plot_3d_interactive(transormed_data, labels, title)
    
def get_explained_variance(pca):
        # Explained variance
    explained_variance = pca.explained_variance_

    # Explained variance ratio (proportion of variance explained by each component)
    explained_variance_ratio = pca.explained_variance_ratio_

    print("Eigenvalues:", explained_variance)
    print("Explained Variance Ratio:", explained_variance_ratio)
    print("Accumulative explained Variance Ratio:", sum(explained_variance_ratio))

In [80]:
# Download file
download_table("denoising_autoencoder_50_50_0_721kcrwe")
download_table("denoising_autoencoder_80_20_0_6o2hmk2i")
download_table("autoencoder_50_50_0_oasm9olk")
download_table("autoencoder_80_20_0_mpmp5ljz")

[34m[1mwandb[0m: Downloading large artifact run-denoising_autoencoder_50_50_0_721kcrwe-testLatentSpace:v0, 159.85MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.5
[34m[1mwandb[0m: Downloading large artifact run-denoising_autoencoder_80_20_0_6o2hmk2i-testLatentSpace:v0, 69.55MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.3
[34m[1mwandb[0m: Downloading large artifact run-autoencoder_50_50_0_oasm9olk-testLatentSpace:v0, 173.58MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.5
[34m[1mwandb[0m: Downloading large artifact run-autoencoder_80_20_0_mpmp5ljz-testLatentSpace:v0, 76.38MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.4


In [91]:
run_denoise_50_50_file = "artifacts/run-denoising_autoencoder_50_50_0_721kcrwe-testLatentSpace-v0/test/LatentSpace.table.json"
run_denoise_80_20_file = "artifacts/run-denoising_autoencoder_80_20_0_6o2hmk2i-testLatentSpace-v0/test/LatentSpace.table.json"
run_50_50_file = "artifacts/run-autoencoder_50_50_0_oasm9olk-testLatentSpace-v0/test/LatentSpace.table.json"
run_80_20_file = "artifacts/run-autoencoder_80_20_0_mpmp5ljz-testLatentSpace-v0/test/LatentSpace.table.json"

df_denoise_50_50 = get_dataframe_from_table(run_denoise_50_50_file)
df_denoise_80_20 = get_dataframe_from_table(run_denoise_80_20_file)
df_50_50 = get_dataframe_from_table(run_50_50_file)
df_80_20 = get_dataframe_from_table(run_80_20_file)

Unnamed: 0,target,0,1,2,3,4,5,6,7,8,...,502,503,504,505,506,507,508,509,510,511
0,Tomato___Septoria_leaf_spot,0.250845,0.000000,2.705881,0.000000,0.722908,0.000000,0.000000,1.324923,0.000000,...,3.778956,0.000000,3.660818,0.000000,2.962283,0.000000,0.000000,0.000000,0.000000,1.831895
1,Cherry_(including_sour)___healthy,1.409838,0.544014,0.000000,0.173394,0.000000,0.386605,1.866201,1.198290,0.903851,...,0.902987,0.000000,0.000000,0.000000,0.718557,0.388653,1.448120,2.000940,0.423041,0.000000
2,Orange___Haunglongbing_(Citrus_greening),0.000000,0.000000,0.000000,0.000000,0.131501,0.000000,0.041495,0.000000,0.000000,...,0.000000,0.000000,0.135381,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.732140
3,Corn_(maize)___healthy,1.316505,0.241264,0.164719,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,1.117594,0.000000,1.199570,0.000000,1.175196,0.000000,0.297437,1.213868,1.027170
4,Corn_(maize)___Common_rust_,0.251158,0.153077,0.000000,0.296112,0.246614,0.000000,0.000000,0.000000,0.000000,...,0.000000,1.632480,0.135410,0.963495,0.000000,0.000000,0.000000,0.000000,0.000000,0.933878
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27148,Squash___Powdery_mildew,0.000000,0.665638,0.000000,0.000000,3.197766,0.000000,0.029113,0.000000,0.000000,...,0.000000,0.760130,1.049900,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.632300
27149,Corn_(maize)___healthy,0.789556,0.000000,0.000000,0.229050,0.000000,0.000000,0.071609,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.264972,0.000000,0.588216,0.000000,0.000000,1.120265,0.611614
27150,Tomato___Tomato_mosaic_virus,0.000000,0.000000,0.512662,0.488121,0.000000,2.553916,0.024957,1.383618,0.811411,...,0.756749,0.000000,0.000000,0.000000,0.000000,0.307323,1.624516,0.134020,0.000000,0.000000
27151,Tomato___Spider_mites Two-spotted_spider_mite,0.000000,0.000000,0.109316,0.281333,0.000000,0.818299,0.244346,0.000000,0.454108,...,0.000000,0.000000,0.000000,0.482440,0.241781,0.145939,1.550136,0.654186,0.000000,0.205470


In [82]:
latent_space_columns = list(map(str, range(512)))

run_50_50_latent_space = df_50_50[latent_space_columns]
run_50_50_targets = df_50_50["target"]

run_80_20_latent_space = df_80_20[latent_space_columns]
run_80_20_targets = df_80_20["target"]

run_denoise_50_50_latent_space = df_denoise_50_50[latent_space_columns]
run_denoise_50_50_targets = df_denoise_50_50["target"]

run_denoise_80_20_latent_space = df_denoise_80_20[latent_space_columns]
run_denoise_80_20_targets = df_denoise_80_20["target"]

In [83]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

pca = PCA(n_components=3)
tsne = TSNE(n_components=3)

# Autoencoder 50 50 embeddings

In [84]:
run_50_50_latent_space = MinMaxScaler().fit_transform(run_50_50_latent_space)
reduce_dim_and_plot(run_50_50_latent_space, run_50_50_targets, pca, "Autoencoder 50-50 embeddings")

# Autoencoder 80 20 embeddings

In [85]:
run_80_20_latent_space = MinMaxScaler().fit_transform(run_80_20_latent_space)
reduce_dim_and_plot(run_80_20_latent_space, run_80_20_targets, pca, "Autoencoder 80-20 embeddings")

# Denoising Autoencoder 50 50 embeddings

In [93]:
run_denoise_50_50_latent_space = MinMaxScaler().fit_transform(run_denoise_50_50_latent_space)
reduce_dim_and_plot(run_denoise_50_50_latent_space, run_denoise_50_50_targets, pca, "Denoising autoencoder 50-50 embeddings")

# Denoising Autoencoder 80 20 embeddings

In [88]:
run_denoise_80_20_latent_space = MinMaxScaler().fit_transform(run_denoise_80_20_latent_space)
reduce_dim_and_plot(run_denoise_80_20_latent_space, run_denoise_80_20_targets, pca, "Denoising autoencoder 80-20 embeddings")

# Explained Variance

In [96]:
# 50 50 explained variance
pca.fit(run_50_50_latent_space)
get_explained_variance(pca)

Explained Variance (Eigenvalues): [12.24366278  5.54455099  1.11739264]
Accumulative Explained Variance Ratio: 0.906310907762502


In [None]:
# 80 20 explained variance
pca.fit(run_80_20_latent_space)
get_explained_variance(pca)

In [None]:
# denoising 50 50 explained variance
pca.fit(run_denoise_50_50_latent_space)
get_explained_variance(pca)

In [None]:
# denoising 80 20 explained variance
pca.fit(run_denoise_80_20_latent_space)
get_explained_variance(pca)