In [None]:
import pandas as pd
from sklearn.manifold import TSNE
import numpy as np

"""
This module explores whether or not the CLIP generations from user taken images and the digital images turn out good and if they
can really be compared, as seen by the tsne plot most of the cards were correctly clustered with their counterparts
"""

In [2]:
truth_df = pd.read_pickle("SV10_truth.pkl")
cap_df = pd.read_pickle("SV10_captured.pkl")

In [3]:
truth_cross_df = truth_df[truth_df['id'].isin(cap_df['id'])]
truth_cross_df = truth_cross_df.drop(["supertype", "image", "name"], axis = 1)
truth_cross_df["class"] = "truth"
print(truth_cross_df.shape)
truth_cross_df.head(3)

(21, 3)


Unnamed: 0,id,emb,class
16,sv10-17,"[1.2504067, 0.04769124, 0.08306568, 0.62093115...",truth
21,sv10-22,"[1.3897394, 0.28314722, 0.33581403, -0.1016198...",truth
23,sv10-24,"[1.1826667, -0.85501885, -0.39982924, 0.176627...",truth


In [4]:
cap_df.columns
cap_df = cap_df.drop(["path"], axis=1)
cap_df["class"] = "cap"
print(cap_df.shape)
cap_df.head(3)

(49, 3)


Unnamed: 0,id,emb,class
0,sv10-103,"[0.95359534, -0.2556382, 0.24236733, -0.079072...",cap
1,sv10-103,"[0.68849266, -0.499305, 0.5636944, -0.10402526...",cap
2,sv10-116,"[0.6393946, -0.010814317, 0.77857274, 0.084503...",cap


In [5]:
data = pd.concat([cap_df, truth_cross_df], axis=0, ignore_index=True)
data.shape

(70, 3)

In [6]:
m = TSNE(learning_rate=10)

flat_data = np.stack(data["emb"].values)

tsne_features = m.fit_transform(flat_data)

In [7]:
data["x"] = tsne_features[:,0]
data["y"] = tsne_features[:,1]

In [8]:
data.head(3)

Unnamed: 0,id,emb,class,x,y
0,sv10-103,"[0.95359534, -0.2556382, 0.24236733, -0.079072...",cap,-2.042312,-1.010459
1,sv10-103,"[0.68849266, -0.499305, 0.5636944, -0.10402526...",cap,-2.527997,-4.091929
2,sv10-116,"[0.6393946, -0.010814317, 0.77857274, 0.084503...",cap,-3.707477,-1.53638


In [9]:
import plotly.express as px

fig = px.scatter(data, x="x", y="y", color="id", symbol="class")
fig.show()

In [None]:
#Plot vs all truth
all = truth_df.drop(["supertype", "image", "name"], axis = 1)
all["class"] = "truth"
all.shape

In [None]:
data2 = pd.concat([cap_df, all], axis=0, ignore_index=True)

m = TSNE(learning_rate=500)

flat_data2 = np.stack(data2["emb"].values)

tsne_features2 = m.fit_transform(flat_data2)

data2["x"] = tsne_features2[:,0]
data2["y"] = tsne_features2[:,1]

fig = px.scatter(data2, x="x", y="y", color="id", symbol="class")
fig.show()