In [1]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import PIL
import PIL.Image
import tensorflow as tf

from keras.preprocessing.image import ImageDataGenerator

import keras
from keras.callbacks import EarlyStopping
from keras.models import Sequential, Model
from tensorflow.keras.layers import Input

from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input, Lambda, Reshape, Conv2DTranspose
from tensorflow.keras.layers import BatchNormalization

from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.utils import to_categorical

from keras.models import Model
from keras.losses import binary_crossentropy, mse
from keras import backend as K
from tensorflow.keras import layers, models
from tensorflow.python.framework.ops import disable_eager_execution

from scikeras.wrappers import KerasClassifier

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import roc_curve, auc, roc_auc_score

from sklearn.manifold import TSNE
from mpl_toolkits.mplot3d import Axes3D
import plotly.graph_objects as go
import seaborn as sns
from itertools import product
from tqdm import tqdm
from ipywidgets import interact, IntSlider, FloatSlider
from IPython.display import display, clear_output

%matplotlib inline



In [2]:
os.chdir(r"C:\MalwareOpenSetLearningDatasets\malex_dataset\train")
known_path_root = r"C:\MalwareOpenSetLearningDatasets\malex_dataset\train"

In [3]:
known_batches = ImageDataGenerator().flow_from_directory(directory=known_path_root,
                                                         target_size=(64,64), 
                                                         batch_size=65000, 
                                                         color_mode="grayscale")
known_imgs, known_labels = next(known_batches)

Found 25619 images belonging to 26 classes.


In [4]:
known_imgs_reshaped = np.reshape(known_imgs, (known_imgs.shape[0], -1))
known_imgs_reshaped_subset = known_imgs_reshaped[0:1000]
known_imgs_reshaped_normalized_subset = known_imgs_reshaped_subset / 255.

known_imgs_fulldata_normalized = np.reshape(known_imgs/255., (known_imgs.shape[0], -1))

class_indices_reversed = {v: k for k, v in known_batches.class_indices.items()}
known_labels_indices = np.argmax(known_labels, axis=1)

In [11]:
known_imgs_3d = tsne.fit_transform(known_imgs_fulldata_normalized)

x=known_imgs_3d[:, 0]
y=known_imgs_3d[:, 1]
z=known_imgs_3d[:, 2]

class_indices_reversed = {v: k for k, v in known_batches.class_indices.items()}
class_names = [class_indices_reversed[i] for i in known_labels_indices]
hover_text = ['Class: {}'.format(name) for name in class_names]

# Create a list of trace objects for each class
trace_list = []
for i in range(known_labels.shape[1]):
    idx = np.where(known_labels_indices == i)[0]
    trace_list.append(
        go.Scatter3d(
            x=x[idx],
            y=y[idx],
            z=z[idx],
            mode='markers',
            marker=dict(
                size=5,
                color=i,
                colorscale='Spectral',
                opacity=0.8
            ),
            text=[hover_text[j] for j in idx],
            name=class_indices_reversed[i],
            visible=True  # set all traces visible by default
        )
    )

# Add dropdown menu to toggle between classes
buttons = []
for i in range(26):
    visible_list = [False] * known_labels.shape[1]
    visible_list[i] = True
    buttons.append(
        dict(
            label=class_indices_reversed[i],
            method='update',
            args=[{'visible': visible_list}]
        )
    )

updatemenus = list([
    dict(
        active=0,
        buttons=buttons,
        direction='down',
        showactive=True,
        x=1.05,
        y=1.1,
        xanchor='right',
        yanchor='top'
    )
])

fig = go.Figure(data=trace_list)

fig.update_traces(hovertemplate='(%{x:.2f}, %{y:.2f}, %{z:.2f})<br>%{text}', showlegend=True)
fig.update_layout(
    updatemenus=[dict(
        type="dropdown",
        buttons=list(buttons),
        active=0,
        x=1.05,
        y=0.95,
        yanchor="top"
    )],
    yaxis=dict(range=[-30, 30])
)

fig.show()

In [8]:
# 2D t-SNE Visualization

# Function to compute t-SNE embeddings for given perplexity and iterations
def compute_tsne(perplexity, iterations):
    tsne = TSNE(n_components=2, perplexity=perplexity, n_iter=iterations)
    tsne_embeddings = tsne.fit_transform(known_imgs_fulldata_normalized)
    return tsne_embeddings

class_labels = [class_indices_reversed[i] for i in known_labels_indices]
# Compute all possible t-SNE embeddings
perplexity_values = [25, 50, 75, 100, 125]
iterations_values = [1000, 2000, 3000]

combinations = list(product(perplexity_values, iterations_values))

tsne_embeddings_dict = {}
for perplexity, iterations in tqdm(combinations, desc='Computing t-SNE', unit='combination'):
    tsne_embeddings = compute_tsne(perplexity, iterations)
    tsne_embeddings_dict[(perplexity, iterations)] = tsne_embeddings

# Determine the minimum and maximum values for perplexity and iterations
min_perplexity = min(perplexity_values)
max_perplexity = max(perplexity_values)
min_iterations = min(iterations_values)
max_iterations = max(iterations_values)

# Create an empty figure for displaying the initial plot
layout = dict(
    title='MaleX OSR 2D t-SNE Plot',
    showlegend=True,
    height=700,
    legend=dict(
        x=1.02,
        y=0.5,
        traceorder='normal',
        bordercolor='gray',
        borderwidth=1,
        itemclick='toggleothers',
        itemsizing='constant'
    )
)
fig = go.Figure(layout=layout)

# Function to update the t-SNE plot
def update_tsne(perplexity, iterations):
    tsne_embeddings = tsne_embeddings_dict[(perplexity, iterations)]
    
    class_labels_unique = np.unique(class_labels)
    num_classes = len(class_labels_unique)
    color_palette = sns.color_palette('hls', num_classes).as_hex()
    
    fig.data = []
    for i, class_label in enumerate(class_labels_unique):
        indices = np.where(np.array(class_labels) == class_label)[0]
        x = tsne_embeddings[indices, 0]
        y = tsne_embeddings[indices, 1]
        
        trace = go.Scatter(
            x=x,
            y=y,
            mode='markers',
            marker=dict(
                size=5,
                #color=color_palette[i % len(color_palette)],
                opacity=0.8),
            text=class_label,
            name=class_label,
            legendgroup=class_label,
            showlegend=True
        )
        
        if class_labels == "Unknown":
            trace.marker.color = "black"
        else:
            trace.marker.color = color_palette[i % len(color_palette)]
            
        fig.add_trace(trace)

    fig.update_xaxes(range=[tsne_embeddings[:, 0].min(), tsne_embeddings[:, 0].max()])
    fig.update_yaxes(range=[tsne_embeddings[:, 1].min(), tsne_embeddings[:, 1].max()])
 
    fig.update_layout(showlegend=True)

# Interactive sliders for perplexity and iterations
perplexity_slider = IntSlider(min=min(perplexity_values), 
                              max=max(perplexity_values), step=25, 
                              value=min(perplexity_values), description='Perplexity:')

iterations_slider = IntSlider(min=min(iterations_values), 
                              max=max(iterations_values), step=1000, 
                              value=min(iterations_values), description='Iterations:')

# Update the initial plot
update_tsne(perplexity_slider.value, iterations_slider.value)

# Function to handle slider value changes
def on_value_change(change):
    clear_output(wait=True)
    update_tsne(perplexity_slider.value, iterations_slider.value)
    with fig.batch_update():
        fig.update_traces(visible=True)
        fig.update_layout(showlegend=True)
    display(fig)
    display(perplexity_slider)
    display(iterations_slider)

# Display the plot and sliders
display(fig)
display(perplexity_slider)
display(iterations_slider)

# Attach the on_value_change function to slider value changes
perplexity_slider.observe(on_value_change, names='value')
iterations_slider.observe(on_value_change, names='value')

IntSlider(value=125, description='Perplexity:', max=125, min=25, step=25)

IntSlider(value=3000, description='Iterations:', max=3000, min=1000, step=1000)