In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

In [None]:
import numpy as np
import os
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import plotly.graph_objects as go

## Loading data

In [None]:
def load_data(split, group):
        
    path = "../embeddings/gray/" + split + "_" + group + "/"
    
    print("reading from: ", path)
    CuNi1_dat = np.load(path+"CuNi1/Embeddings.npy")
    CuNi1_lab = np.load(path+"CuNi1/Labels.npy")

    CuNi2_dat = np.load(path+"CuNi2/Embeddings.npy")
    CuNi2_lab = np.load(path+"CuNi2/Labels.npy")

    CuNi3_dat = np.load(path+"CuNi3/Embeddings.npy")
    CuNi3_lab = np.load(path+"CuNi3/Labels.npy")

    print("CuNi1 dim: {}, amount of labels: {}".format(CuNi1_dat.shape, CuNi1_lab.shape))
    print("CuNi2 dim: {}, amount of labels: {}".format(CuNi2_dat.shape, CuNi2_lab.shape))
    print("CuNi3 dim: {}, amount of labels: {}".format(CuNi3_dat.shape, CuNi3_lab.shape))
    features = np.concatenate((CuNi1_dat, CuNi2_dat, CuNi3_dat), axis=0)
    labels = np.concatenate((CuNi1_lab, CuNi2_lab, CuNi3_lab), axis=0)
   
    df = pd.DataFrame({'features': list(features), 'label': labels}, columns=['features', 'label'])
    
    return df    

In [None]:
group = 'dry'

split = "train"
train_df = load_data(split, group)
print("train_df info:")
print(train_df.groupby(['label']).count())

print("=====================")
split = "test"
test_df = load_data(split, group)
print("test_df info:")
print(test_df.groupby(['label']).count())

In [None]:
def get_features(df):
    features = []
    labels = []
    for i in range(len(df)):
        tmp_features = df.loc[i]['features']
        tmp_labels = df.loc[i]['label']
        features.append(tmp_features)
        labels.append(tmp_labels)

    features = np.array(features)
    labels = np.array(labels)
    
    return features, labels

# <font color='red'>**Visualization methods**</font>
## PCA

In [None]:
train_features, train_labels = get_features(train_df)
min_features = str(round(train_features.min(), 2))
max_features = str(round(train_features.max(), 2))
print("train features shape: {}, min and max values: {}, {}".format(train_features.shape, min_features, max_features))

test_features, test_labels = get_features(test_df)
min_features = str(round(test_features.min(), 2))
max_features = str(round(test_features.max(), 2))
print("test features shape: {}, min and max values: {}, {}".format(test_features.shape, min_features, max_features))

**Getting components for train split**

In [None]:
pca = PCA(n_components=2, random_state=69)
pca_result = pca.fit_transform(train_features)

In [None]:
train_df['pca-one'] = pca_result[:,0]
train_df['pca-two'] = pca_result[:,1] 
#train_df['pca-three'] = pca_result[:,2]

print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))

**Transform the test split**

In [None]:
pca_result = pca.transform(test_features)

In [None]:
test_df['pca-one'] = pca_result[:,0]
test_df['pca-two'] = pca_result[:,1] 
#test_df['pca-three'] = pca_result[:,2]

print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))

**Plotting**

In [None]:
color_discrete_map = {'CuNi1': 'rgb(255,0,0)', 'CuNi2': 'rgb(0,255,0)', 'CuNi3': 'rgb(0,0,255)'}
#===for my data
fig = px.scatter(test_df, x="pca-one", y="pca-two", color = 'label', color_discrete_map=color_discrete_map, hover_name="label", hover_data=["label"], opacity=0.6)

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.show()

**Test and train splits in the same graph**

In [None]:
color_discrete_map_train = {'CuNi1': 'rgb(255,0,0)', 'CuNi2': 'rgb(0,255,0)', 'CuNi3': 'rgb(0,0,255)'}
color_discrete_map_test = {'CuNi1': 'rgb(255,11,215)', 'CuNi2': 'rgb(153,255,51)', 'CuNi3': 'rgb(51,153,255)'}

cols_train = train_df['label'].map(color_discrete_map_train)
cols_test = test_df['label'].map(color_discrete_map_test)

fig = go.Figure()

fig.add_trace(go.Scatter(x = train_df["pca-one"],
                         y = train_df["pca-two"],
                         mode = 'markers',
                         marker=dict(size=5, color = cols_train)
                         ))

fig.add_trace(go.Scatter(x = test_df["pca-one"],
                         y = test_df["pca-two"],
                         mode = 'markers',
                         marker=dict(size=5, color = cols_test)
                        ))

fig.show()

## UMAP

In [None]:
import umap.umap_ as umap

In [None]:
reducer = umap.UMAP()

**Getting representation for train split**

In [None]:
embedding = reducer.fit_transform(train_features)
embedding.shape

In [None]:
train_df['x'] = embedding[:,0]
train_df['y'] = embedding[:,1]

**Getting representation for test split**

In [None]:
embedding = reducer.transform(test_features)
embedding.shape

In [None]:
test_df['x'] = embedding[:,0]
test_df['y'] = embedding[:,1]

In [None]:
color_discrete_map = {'CuNi1': 'rgb(255,0,0)', 'CuNi2': 'rgb(0,255,0)', 'CuNi3': 'rgb(0,0,255)'}
fig = px.scatter(test_df, x="x", y="y", color = 'label', color_discrete_map=color_discrete_map, hover_name="label", hover_data=["label"], opacity=0.5)

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.show()

**Setting train and test splits in the same plot**

In [None]:
color_discrete_map_train = {'CuNi1': 'rgb(255,0,0)', 'CuNi2': 'rgb(0,255,0)', 'CuNi3': 'rgb(0,0,255)'}
color_discrete_map_test = {'CuNi1': 'rgb(255,11,215)', 'CuNi2': 'rgb(153,255,51)', 'CuNi3': 'rgb(51,153,255)'}

cols_train = train_df['label'].map(color_discrete_map_train)
cols_test = test_df['label'].map(color_discrete_map_test)

fig = go.Figure()

fig.add_trace(go.Scatter(x = train_df["x"],
                         y = train_df["y"],
                         mode = 'markers',
                         marker=dict(size=4, color = cols_train)
                         ))

fig.add_trace(go.Scatter(x = test_df["x"],
                         y = test_df["y"],
                         mode = 'markers',
                         marker=dict(size=4, color = cols_test)
                        ))

fig.show()

## T-sne 2D

In [None]:
from sklearn.manifold import TSNE

Representation for **train** split

In [None]:
tsne = TSNE(n_components = 2, init = 'pca')
P1_tsne = tsne.fit_transform(train_features)
P1_tsne.shape

In [None]:
l1 = P1_tsne[:,0]
l2 = P1_tsne[:,1]

In [None]:
train_df['x'] = l1
train_df['y'] = l2

Representation for **test** split

In [None]:
P1_tsne = tsne.fit_transform(test_features)
P1_tsne.shape

In [None]:
l1 = P1_tsne[:,0]
l2 = P1_tsne[:,1]

In [None]:
test_df['x'] = l1
test_df['y'] = l2

**Plotting**

In [None]:
color_discrete_map = {'CuNi1': 'rgb(255,0,0)', 'CuNi2': 'rgb(0,255,0)', 'CuNi3': 'rgb(0,0,255)'}
#=== for my data 
fig = px.scatter(test_df, x="x", y="y", color = 'label', color_discrete_map=color_discrete_map, hover_name="label", hover_data=["label"], opacity=0.5)

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.show()

Setting plot for **both splits**

In [None]:
color_discrete_map_train = {'CuNi1': 'rgb(255,0,0)', 'CuNi2': 'rgb(0,255,0)', 'CuNi3': 'rgb(0,0,255)'}
color_discrete_map_test = {'CuNi1': 'rgb(255,11,215)', 'CuNi2': 'rgb(153,255,51)', 'CuNi3': 'rgb(51,153,255)'}

cols_train = train_df['label'].map(color_discrete_map_train)
cols_test = test_df['label'].map(color_discrete_map_test)

fig = go.Figure()

fig.add_trace(go.Scatter(x = train_df["x"],
                         y = train_df["y"],
                         mode = 'markers',
                         marker=dict(size=5, color = cols_train)
                         ))

fig.add_trace(go.Scatter(x = test_df["x"],
                         y = test_df["y"],
                         mode = 'markers',
                         marker=dict(size=5, color = cols_test)
                        ))

fig.show()