In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

# Import useful libraries

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# <font color='red'>**Loading trained networks**</font>

In [None]:
group = "Humeda"
experiment = "synthetic"

if group == "Seca":
    print("reading network for Seca")
    path_model = "../models/clasification/Vgg16SecaFinal.h5"
else:
    print("reading network for Humeda")
    path_model = "../models/clasification/Vgg16HumedaFinalV2.h5"
    
model = tf.keras.models.load_model(path_model, compile=False)
model.summary()

In [None]:
l1 = model.get_layer(name='fc2')
emb = Model(model.input, l1.output)
emb.summary()

# Getting and saving the embeddings (original samples)

In [None]:
col_names = ['path', 'label']

if group == "Humeda" and experiment == "augmented":
    print("Humeda augmented")
    df = pd.read_csv("../data/rgb/classification/augmented/Humeda.csv", header=None, names=col_names)

if group == "Humeda" and experiment == "original":
    print("Humeda original")
    df =pd.read_csv("../data/rgb/classification/original/Humeda.csv", header=None, names=col_names)

if group == "Seca" and experiment == "augmented":
    print("Seca augmented")
    df = pd.read_csv("../data/rgb/classification/augmented/Seca.csv", header=None, names=col_names)

else:       
    print("Seca original")
    df =pd.read_csv("../data/rgb/classification/original/Seca.csv", header=None, names=col_names)
    

#making the train and test splits
train_df, test_df = train_test_split(df, test_size=0.2, random_state=14)

print("======= TRAIN =======")
print(train_df.groupby('label').count())
print("======= TEST =======")
print(test_df.groupby('label').count())

# Getting and saving the embeddings (synthetic samples)

In [None]:
col_names = ['path', 'label']

if group == "Seca" and experiment == "synthetic":
    print("Seca synthetic")
    df = pd.read_csv("../imgs_results/rgb/train_wet.csv", header=None, names=col_names)
else:
    print("Humeda synthetic")
    df = pd.read_csv("../imgs_results/rgb/train_dry.csv", header=None, names=col_names)
    
print(df.groupby('label').count())   

In [None]:
#getting embeddings for train and test
if experiment == "synthetic":
    None
else:
    df = test_df
    
label, pred = [], []    

for i in range(len(df)):
    img = tf.keras.preprocessing.image.load_img(df.iloc[i]['path'], target_size=(224, 224))
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = img/255.0
    img = tf.expand_dims(img, axis=0)
    emb_test = emb.predict(img)
    pred.extend(emb_test)
    label.append(df.iloc[i]['label'])

#pred = np.squeeze(pred, axis=1)
pred = np.array(pred)
print("dimension of predic: ", pred.shape)

#label = np.squeeze(label, axis=1)
label = np.array(label)
print("dimension of label: ", label.shape)

In [None]:
group

In [None]:
#saving the numpy arrays
if group == "Humeda":
    print("saving the Humeda data")
    np.save("../embeddings/rgb/synthetic/Humedas/train/embHumedaTrain.npy", pred)
    np.save("../embeddings/rgb/synthetic/Humedas/train/labelHumedaTrain.npy", label)
else:
    print("saving the Seca data")
    np.save("../embeddings/rgb/synthetic/Secas/train/embSecaTrain.npy", pred)
    np.save("../embeddings/rgb/synthetic/Secas/train/labelSecaTrain.npy", label)

In [None]:
#haciendo dataframe
df = pd.DataFrame(list(zip(label, pred)), columns=['clase', 'predicción'])
df.groupby('clase').count()

# Dimention reduction using Tsne

In [None]:
tsne = TSNE(n_components = 2, init = 'pca')
P1_tsne = tsne.fit_transform(pred)
print(P1_tsne.shape)

In [None]:
l1 = P1_tsne[:,0]
l2 = P1_tsne[:,1]

df = df.drop(columns='predicción')
df['x'] = l1
df['y'] = l2

In [None]:
conditions = [
    (df['clase'] == 'CuNi1'),
    (df['clase'] == 'CuNi2'),
    (df['clase'] == 'CuNi3')
    ]

values = [1, 2, 3]

df['labels'] = np.select(conditions, values)

In [None]:
df

# Center of mass and distance between classes

In [None]:
clases = ['CuNi1', 'CuNi2', 'CuNi3']
full_x_com = []
full_y_com = []
for clase in clases:
    df_clase = df[df['clase']== clase]
    #center of mass x and y axes
    x_com = df_clase['x'].sum()/len(df_clase)    
    y_com = df_clase['y'].sum()/len(df_clase) 
    full_x_com.append(x_com)
    full_y_com.append(y_com)
    
#print("====== about intra classes distances ========")
cuni1_cuni2_dis = np.sqrt(np.power(full_x_com[0]-full_x_com[1],2)+ np.power(full_y_com[0]-full_y_com[1],2))
cuni1_cuni3_dis = np.sqrt(np.power(full_x_com[0]-full_x_com[2],2)+ np.power(full_y_com[0]-full_y_com[2],2))
cuni2_cuni3_dis = np.sqrt(np.power(full_x_com[1]-full_x_com[2],2)+ np.power(full_y_com[1]-full_y_com[2],2))
#print("====== about inter classes distances ========")
x_cuni1_mean = df[df['clase']=='CuNi1']['x'].mean()
y_cuni1_mean = df[df['clase']=='CuNi1']['y'].mean()
x_cuni2_mean = df[df['clase']=='CuNi2']['x'].mean()
y_cuni2_mean = df[df['clase']=='CuNi2']['y'].mean()
x_cuni3_mean = df[df['clase']=='CuNi3']['x'].mean()
y_cuni3_mean = df[df['clase']=='CuNi3']['y'].mean()
cuni1_dis = np.sqrt(np.power(full_x_com[0]-x_cuni1_mean,2)+ np.power(full_y_com[0]-y_cuni1_mean,2))
cuni2_dis = np.sqrt(np.power(full_x_com[1]-x_cuni2_mean,2)+ np.power(full_y_com[1]-y_cuni2_mean,2))
cuni3_dis = np.sqrt(np.power(full_x_com[2]-x_cuni3_mean,2)+ np.power(full_y_com[2]-y_cuni3_mean,2))

In [None]:
print("============ ABOUT CENTER OF MASS ==============")
print('el centro de masa para CuNi1 es: x {} and y {}'.format(full_x_com[0], full_y_com[0]))
print("el centro de masa para CuNi2 es: x {} and y {}".format(full_x_com[1], full_y_com[1]))
print("el centro de masa para CuNi3 es: x {} and y {}".format(full_x_com[2], full_y_com[2]))
print("============ ABOUT INTRA CLASS DISTANCES ==============")
print("la distancia CuNi1-CuNi2 es: {}".format(cuni1_cuni2_dis))
print("la distancia CuNi1-CuNi3 es: {}".format(cuni1_cuni3_dis))
print("la distancia CuNi2-CuNi3 es: {}".format(cuni2_cuni3_dis))
print("============ ABOUT INTER CLASS DISTANCES ==============")
print("la distancia CuNi1-CuNi1 es: {}".format(cuni1_dis))
print("la distancia CuNi2-CuNi2 es: {}".format(cuni2_dis))
print("la distancia CuNi3-CuNi3 es: {}".format(cuni3_dis))

In [None]:
fig, ax = plt.subplots()

colors = {'CuNi1':'red', 'CuNi2':'green', 'CuNi3':'blue'}

grouped = df.groupby('clase')
for key, group in grouped:
    group.plot(ax=ax, kind='scatter', x='x', y='y', label=key, color=colors[key])

ax.scatter([full_x_com[0]], [full_y_com[0]], color='black', s=250)
ax.scatter([full_x_com[1]], [full_y_com[1]], color='yellow', s=250)
ax.scatter([full_x_com[2]], [full_y_com[2]], color='orange', s=250)
plt.show()