In [None]:
import numpy as np
import pandas as pd
import cv2
import sklearn
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

In [None]:
DataSolarModules = pd.read_json('InfraredSolarModules/module_metadata.json').transpose().sort_index()
Classes = DataSolarModules['anomaly_class'].unique()
class_to_number = dict(enumerate(Classes.flatten(), 0))
class_to_number = {v: k for k, v in class_to_number.items()}

In [None]:
def map_to_class(value):
    class_to_number
    return class_to_number.get(value, 'Unknown')

DataSolarModules['class_code'] = DataSolarModules['anomaly_class'].apply(map_to_class)

In [None]:
def read_images_dataframe(dataframe):
    images = []
    for image_path in dataframe['image_filepath']:
        img = cv2.imread("InfraredSolarModules/"+image_path,cv2.IMREAD_GRAYSCALE)
        img = img.reshape(40, 24).astype("float32") / 255
        images.append(img)
    images=np.array(images) 
    return images

def read_labels_dataframe(dataframe):
    labels = []
    for label in dataframe['class_code']:
        labels.append(label)
    labels=np.array(labels) 
    labels.astype("int32")   
    return labels

In [None]:
images = read_images_dataframe(DataSolarModules)
labels = read_labels_dataframe(DataSolarModules)

In [None]:
# Define perplexity values
perplexity_values = [5, 10, 20, 30, 50]

In [None]:
# Iterate over perplexity values for T-SNE visualization with cluster labels
for perplexity in perplexity_values:
    # Reduce dimensionality using T-SNE with different perplexity values
    tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)
    tsne_result = tsne.fit_transform(images.reshape(images.shape[0], -1))

    # Perform K-means clustering
    kmeans = KMeans(n_clusters=len(Classes), random_state=42)
    cluster_labels = kmeans.fit_predict(images.reshape(images.shape[0], -1))

    # Create a DataFrame for visualization
    tsne_df = pd.DataFrame(tsne_result, columns=['tsne1', 'tsne2'])
    tsne_df['cluster'] = cluster_labels

    # Plot the T-SNE visualization with cluster labels
    plt.figure(figsize=(10, 8))
    sns.scatterplot(
        x='tsne1', y='tsne2',
        hue='cluster',
        palette='tab10',
        data=tsne_df,
        legend='full',
        alpha=0.8
    )
    plt.title(f'T-SNE Visualization with Cluster Labels (Perplexity={perplexity})')
    plt.xlabel('T-SNE Component 1')
    plt.ylabel('T-SNE Component 2')
    plt.show()

In [None]:
# Iterate over perplexity values for T-SNE visualization with class labels
for perplexity in perplexity_values:
    # Reduce dimensionality using T-SNE with different perplexity values
    tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)
    tsne_result = tsne.fit_transform(images.reshape(images.shape[0], -1))

    # Create a DataFrame for visualization
    tsne_df = pd.DataFrame(tsne_result, columns=['tsne1', 'tsne2'])
    tsne_df['class'] = DataSolarModules['anomaly_class']

    # Plot the T-SNE visualization with class labels
    plt.figure(figsize=(10, 8))
    sns.scatterplot(
        x='tsne1', y='tsne2',
        hue='class',
        palette='hsv',
        data=tsne_df,
        legend='full',
        alpha=0.8
    )
    plt.title(f'T-SNE Visualization with Class Labels (Perplexity={perplexity})')
    plt.xlabel('T-SNE Component 1')
    plt.ylabel('T-SNE Component 2')
    plt.show()

In [None]:
# Perform K-means clustering for confusion matrices
kmeans = KMeans(n_clusters=len(Classes), random_state=42)
cluster_labels = kmeans.fit_predict(images.reshape(images.shape[0], -1))

# Compute confusion matrix for cluster labels
conf_matrix_cluster = confusion_matrix(labels, cluster_labels)

# Plot confusion matrix for cluster labels
plt.figure(figsize=(8, 6))
sns.heatmap(
    conf_matrix_cluster,
    annot=True,
    fmt='d',
    cmap='Blues',
    xticklabels=np.unique(cluster_labels),
    yticklabels=np.unique(labels)
)
plt.xlabel('Cluster Label')
plt.ylabel('Class Label')
plt.title('Confusion Matrix: Cluster vs Class Labels')
plt.show()