In [14]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch as t

from sklearn.manifold import TSNE
import plotly.graph_objects as go


In [3]:
train_df = pd.read_csv("data/train.csv")
test_df = pd.read_csv("data/test.csv")

Preprocess train dataset

In [4]:
not_sensors = ['physical_part_type', 'message_timestamp', 'weekday', 'shift', 'physical_part_id']
sensors = [col for col in train_df.columns.tolist() if col not in not_sensors]

In [5]:
train_df = train_df.dropna(axis=1, how='all')
train_df = train_df.drop(columns=not_sensors)

train_df = train_df.fillna(train_df.mean())

In [6]:
assert not train_df.isna().any().any()

In [10]:
pos_label = train_df[train_df["label"] == 1]
neg_label = train_df[train_df["label"] == 0]

min_count = min(len(pos_label), len(neg_label))

# keep only a few positive labels for vizualization
pos_label_samples = pos_label.sample(min_count, random_state=42)

balanced_train_df = pd.concat([pos_label_samples, neg_label])

balanced_train_df.shape[0], balanced_train_df.index

(4094,
 Index([35892, 34229, 26743, 28719, 23717, 15888, 23387, 16729, 27642, 18617,
        ...
        40155, 40159, 40171, 40189, 40203, 40209, 40241, 40269, 40291, 40304],
       dtype='int64', length=4094))

Fit the TSNE model on training data for 2D visualization

In [19]:
n_components = 2
features = balanced_train_df.drop(columns=["label"]).values

# Apply t-SNE to project the features to 2D
tsne = TSNE(n_components=n_components, random_state=42)

train_embedding = tsne.fit_transform(features)

Plot the resulting class representation

In [21]:
encoded_pos_features = train_embedding[:min_count]
encoded_neg_features = train_embedding[min_count:]

fig = go.Figure()

# Add the first group to the plot
fig.add_trace(go.Scatter(
    x=encoded_pos_features[:, 0], 
    y=encoded_pos_features[:, 1], 
    mode='markers', 
    name='OK',  # Legend label
    marker=dict(color='blue')  # Unique color
))

# Add the second group to the plot
fig.add_trace(go.Scatter(
    x=encoded_neg_features[:, 0], 
    y=encoded_neg_features[:, 1], 
    mode='markers', 
    name='NOK',  # Legend label
    marker=dict(color='red')  # Unique color
))

# Customize the layout
fig.update_layout(
    title='2D Scatter Plot with Two Groups',
    xaxis_title='Feature 1',
    yaxis_title='Feature 2',
    legend_title='Groups',
    template='plotly'
)

# Show the plot
fig.show()

Fit for 2D visualization

In [22]:
n_components = 3
# Apply t-SNE to project the features to 2D
tsne = TSNE(n_components=n_components, random_state=42)

train_embedding_3D = tsne.fit_transform(features)

In [24]:
encoded_pos_features = train_embedding_3D[:min_count]
encoded_neg_features = train_embedding_3D[min_count:]

In [26]:
fig = go.Figure()

# Add group 1
fig.add_trace(go.Scatter3d(
    x=encoded_pos_features[:, 0], y=encoded_pos_features[:, 1], z=encoded_pos_features[:, 2],
    mode='markers',
    marker=dict(size=5, color='blue'),
    name='OK'
))

# Add group 2
fig.add_trace(go.Scatter3d(
    x=encoded_neg_features[:, 0], y=encoded_neg_features[:, 1], z=encoded_neg_features[:, 2],
    mode='markers',
    marker=dict(size=5, color='red'),
    name='NOK'
))

# Update layout
fig.update_layout(
    title="Interactive 3D Scatter Plot with Plotly",
    scene=dict(
        xaxis_title='X-axis',
        yaxis_title='Y-axis',
        zaxis_title='Z-axis'
    )
)

fig.show()