<a href="https://colab.research.google.com/github/Aktuarius/MSBD-5008-Project/blob/main/5008_Project_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
# Install required packages
!pip install dgl

# Load necessary packages
import dgl
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, f1_score



In [3]:
# Load dataset

# https://paperswithcode.com/dataset/amazon-fraud

# Load Amazon Fraud Dataset
dataset = dgl.data.FraudAmazonDataset()

Downloading /root/.dgl/amazon.zip from https://data.dgl.ai/dataset/FraudAmazon.zip...


/root/.dgl/amazon.zip:   0%|          | 0.00/26.1M [00:00<?, ?B/s]

Extracting file to /root/.dgl/amazon_1e446145
Done saving data into cached files.


In [4]:
print(f"Number of categories: {dataset.num_classes}")

Number of categories: 2


In [5]:
g = dataset[0]

In [6]:
print(g)

Graph(num_nodes={'user': 11944},
      num_edges={('user', 'net_upu', 'user'): 351216, ('user', 'net_usu', 'user'): 7132958, ('user', 'net_uvu', 'user'): 2073474},
      metagraph=[('user', 'user', 'net_upu'), ('user', 'user', 'net_usu'), ('user', 'user', 'net_uvu')])


In [7]:
print(g.ndata)

{'feature': tensor([[ 1.0000, 26.0000,  0.0000,  ...,  1.0000, 13.0000,  1.0000],
        [ 4.0000, 17.0000,  0.0000,  ...,  0.0000, 45.0000,  1.0000],
        [ 2.0000, 15.0000,  0.0000,  ...,  1.0000, 24.5000,  1.0000],
        ...,
        [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 15.0000,  1.0000],
        [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 88.0000,  1.0000],
        [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 31.0000,  1.0000]]), 'label': tensor([0, 0, 0,  ..., 0, 0, 0]), 'train_mask': tensor([False, False, False,  ..., False,  True,  True]), 'val_mask': tensor([False, False, False,  ..., False, False, False]), 'test_mask': tensor([False, False, False,  ...,  True, False, False])}


In [8]:
# See about querying the graphs separately
g_upu = g.edge_type_subgraph(['net_upu'])
g_usu = g.edge_type_subgraph(['net_usu'])
g_uvu = g.edge_type_subgraph(['net_uvu'])

In [9]:
print(g_upu)
print(g_usu)
print(g_uvu)

Graph(num_nodes=11944, num_edges=351216,
      ndata_schemes={'feature': Scheme(shape=(25,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})
Graph(num_nodes=11944, num_edges=7132958,
      ndata_schemes={'feature': Scheme(shape=(25,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})
Graph(num_nodes=11944, num_edges=2073474,
      ndata_schemes={'feature': Scheme(shape=(25,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})


In [10]:
print(g_upu.ndata)
print(g_usu.ndata)
print(g_uvu.ndata)

{'feature': tensor([[ 1.0000, 26.0000,  0.0000,  ...,  1.0000, 13.0000,  1.0000],
        [ 4.0000, 17.0000,  0.0000,  ...,  0.0000, 45.0000,  1.0000],
        [ 2.0000, 15.0000,  0.0000,  ...,  1.0000, 24.5000,  1.0000],
        ...,
        [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 15.0000,  1.0000],
        [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 88.0000,  1.0000],
        [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 31.0000,  1.0000]]), 'label': tensor([0, 0, 0,  ..., 0, 0, 0]), 'train_mask': tensor([False, False, False,  ..., False,  True,  True]), 'val_mask': tensor([False, False, False,  ..., False, False, False]), 'test_mask': tensor([False, False, False,  ...,  True, False, False])}
{'feature': tensor([[ 1.0000, 26.0000,  0.0000,  ...,  1.0000, 13.0000,  1.0000],
        [ 4.0000, 17.0000,  0.0000,  ...,  0.0000, 45.0000,  1.0000],
        [ 2.0000, 15.0000,  0.0000,  ...,  1.0000, 24.5000,  1.0000],
        ...,
        [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 15.0000,  1

In [11]:
print("Node features")
print(g_upu.ndata)
print("Edge features")
print(g_upu.edata)

Node features
{'feature': tensor([[ 1.0000, 26.0000,  0.0000,  ...,  1.0000, 13.0000,  1.0000],
        [ 4.0000, 17.0000,  0.0000,  ...,  0.0000, 45.0000,  1.0000],
        [ 2.0000, 15.0000,  0.0000,  ...,  1.0000, 24.5000,  1.0000],
        ...,
        [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 15.0000,  1.0000],
        [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 88.0000,  1.0000],
        [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 31.0000,  1.0000]]), 'label': tensor([0, 0, 0,  ..., 0, 0, 0]), 'train_mask': tensor([False, False, False,  ..., False,  True,  True]), 'val_mask': tensor([False, False, False,  ..., False, False, False]), 'test_mask': tensor([False, False, False,  ...,  True, False, False])}
Edge features
{}


In [12]:
# Need to label

In [13]:
features = g_upu.ndata["feature"]
labels = g_upu.ndata["label"]
train_mask = g_upu.ndata["train_mask"]
val_mask = g_upu.ndata["val_mask"]
test_mask = g_upu.ndata["test_mask"]

In [14]:
# Define NN
from dgl.nn import SAGEConv

class GraphSAGE(nn.Module):
  def __init__(self, in_feats, h_feats):
    super(GraphSAGE, self).__init__()
    self.conv1 = SAGEConv(in_feats, h_feats, "mean")
    self.conv2 = SAGEConv(h_feats, h_feats, "mean")

  def forward(self, g, in_feat):
    h = self.conv1(g, in_feat)
    h = F.relu(h)
    h = self.conv2(g, h)
    return h

In [15]:
# Transdctive NN

def train(g, model):
  optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
  best_val_acc = 0
  best_test_acc = 0

  for e in range(500):
    # Forward
    logits = model(g, features)

    # Compute prediction
    pred = logits.argmax(1)

    # Compute loss
    # Note that you should only compute the losses of the nodes in the training set.
    loss = F.cross_entropy(logits[train_mask], labels[train_mask])

    # Compute accuracy on training/validation/test
    train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
    val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
    test_acc = (pred[test_mask] == labels[test_mask]).float().mean()

    # Save the best validation accuracy and the corresponding test accuracy
    if best_val_acc < val_acc:
      best_val_acc = val_acc
      best_test_acc = test_acc

    # Backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if e % 5 ==0:
      print(
          f"In epoch {e}, loss: {loss:.3f}, val acc: {val_acc:.3f} (best {best_val_acc:.3f}), test acc: {test_acc:.3f}"
      )
  return pred

In [16]:
# Training Logic on UPU graph

features = g_upu.ndata["feature"]
labels = g_upu.ndata["label"]
train_mask = g_upu.ndata["train_mask"]
val_mask = g_upu.ndata["val_mask"]
test_mask = g_upu.ndata["test_mask"]

# Train initialization (on CPU)
model = GraphSAGE(g_upu.ndata['feature'].shape[1], 16) # , dataset.num_classes
pred_upu = train(g_upu, model)

In epoch 0, loss: 908.910, val acc: 0.000 (best 0.000), test acc: 0.002
In epoch 5, loss: 293.923, val acc: 0.079 (best 0.079), test acc: 0.081
In epoch 10, loss: 45.390, val acc: 0.824 (best 0.824), test acc: 0.802
In epoch 15, loss: 23.427, val acc: 0.924 (best 0.924), test acc: 0.898
In epoch 20, loss: 9.950, val acc: 0.915 (best 0.933), test acc: 0.887
In epoch 25, loss: 8.903, val acc: 0.844 (best 0.933), test acc: 0.837
In epoch 30, loss: 8.754, val acc: 0.941 (best 0.941), test acc: 0.913
In epoch 35, loss: 5.476, val acc: 0.869 (best 0.941), test acc: 0.861
In epoch 40, loss: 4.804, val acc: 0.939 (best 0.941), test acc: 0.914
In epoch 45, loss: 3.612, val acc: 0.914 (best 0.941), test acc: 0.890
In epoch 50, loss: 3.075, val acc: 0.947 (best 0.947), test acc: 0.930
In epoch 55, loss: 2.550, val acc: 0.924 (best 0.949), test acc: 0.919
In epoch 60, loss: 1.715, val acc: 0.943 (best 0.952), test acc: 0.942
In epoch 65, loss: 1.664, val acc: 0.956 (best 0.956), test acc: 0.946
In

In [17]:
print(pred_upu)

tensor([0, 0, 0,  ..., 0, 0, 0])


In [18]:
# Training Logic on USU graph

features = g_usu.ndata["feature"]
labels = g_usu.ndata["label"]
train_mask = g_usu.ndata["train_mask"]
val_mask = g_usu.ndata["val_mask"]
test_mask = g_usu.ndata["test_mask"]

# Train initialization (on CPU)
model = GraphSAGE(g_usu.ndata['feature'].shape[1], 16) # , dataset.num_classes
pred_usu = train(g_usu, model)

In epoch 0, loss: 1338.938, val acc: 0.000 (best 0.000), test acc: 0.000
In epoch 5, loss: 653.283, val acc: 0.000 (best 0.000), test acc: 0.001
In epoch 10, loss: 144.411, val acc: 0.037 (best 0.037), test acc: 0.042
In epoch 15, loss: 10.001, val acc: 0.870 (best 0.870), test acc: 0.832
In epoch 20, loss: 11.030, val acc: 0.910 (best 0.917), test acc: 0.882
In epoch 25, loss: 6.404, val acc: 0.905 (best 0.917), test acc: 0.871
In epoch 30, loss: 5.217, val acc: 0.910 (best 0.917), test acc: 0.878
In epoch 35, loss: 4.601, val acc: 0.907 (best 0.917), test acc: 0.875
In epoch 40, loss: 2.964, val acc: 0.835 (best 0.921), test acc: 0.821
In epoch 45, loss: 2.124, val acc: 0.950 (best 0.950), test acc: 0.935
In epoch 50, loss: 1.684, val acc: 0.912 (best 0.952), test acc: 0.903
In epoch 55, loss: 1.346, val acc: 0.951 (best 0.954), test acc: 0.946
In epoch 60, loss: 1.092, val acc: 0.921 (best 0.954), test acc: 0.921
In epoch 65, loss: 1.085, val acc: 0.949 (best 0.954), test acc: 0.944

In [19]:
# Training Logic on UVU graph

features = g_uvu.ndata["feature"]
labels = g_uvu.ndata["label"]
train_mask = g_uvu.ndata["train_mask"]
val_mask = g_uvu.ndata["val_mask"]
test_mask = g_uvu.ndata["test_mask"]

# Train initialization (on CPU)
model = GraphSAGE(g_uvu.ndata['feature'].shape[1], 16) # , dataset.num_classes
pred_uvu = train(g_uvu, model)

In epoch 0, loss: 1021.655, val acc: 0.000 (best 0.000), test acc: 0.000
In epoch 5, loss: 402.143, val acc: 0.013 (best 0.013), test acc: 0.015
In epoch 10, loss: 62.416, val acc: 0.411 (best 0.411), test acc: 0.407
In epoch 15, loss: 30.713, val acc: 0.921 (best 0.921), test acc: 0.894
In epoch 20, loss: 38.443, val acc: 0.922 (best 0.922), test acc: 0.895
In epoch 25, loss: 36.081, val acc: 0.921 (best 0.924), test acc: 0.896
In epoch 30, loss: 27.789, val acc: 0.919 (best 0.924), test acc: 0.895
In epoch 35, loss: 15.446, val acc: 0.919 (best 0.924), test acc: 0.896
In epoch 40, loss: 12.480, val acc: 0.581 (best 0.924), test acc: 0.574
In epoch 45, loss: 5.575, val acc: 0.917 (best 0.924), test acc: 0.901
In epoch 50, loss: 4.532, val acc: 0.813 (best 0.924), test acc: 0.778
In epoch 55, loss: 2.635, val acc: 0.906 (best 0.924), test acc: 0.900
In epoch 60, loss: 2.007, val acc: 0.933 (best 0.933), test acc: 0.929
In epoch 65, loss: 1.672, val acc: 0.922 (best 0.946), test acc: 0.

In [20]:
print(pred_usu)
print(pred_uvu)
print(pred_upu)

tensor([0, 0, 0,  ..., 0, 0, 0])
tensor([0, 0, 0,  ..., 0, 0, 0])
tensor([0, 0, 0,  ..., 0, 0, 0])


In [21]:
print(torch.sum(pred_usu))
print(torch.sum(pred_uvu))
print(torch.sum(pred_upu))

tensor(860)
tensor(761)
tensor(851)


In [22]:
# Create Ensemble Predictor (insensitive)

pred_ensemble_insens = (pred_usu + pred_uvu + pred_upu)/3

# Define a positive case if 2/3 predictors individual models flag as fraudulent
pred_ensemble_insens = torch.round(pred_ensemble_insens)
print(torch.sum(pred_ensemble_insens))

tensor(764.)


In [23]:
# Create Ensemble Predictor (sensitive)

pred_ensemble_sens = (pred_usu + pred_uvu + pred_upu)

# Define a positive case if 1/3 predictors individual models flag as fraudulent
pred_ensemble_sens = pred_ensemble_sens.apply_(lambda x: 0 if x == 0 else 1)
print(torch.sum(pred_ensemble_sens))

tensor(1050)


In [24]:
print(pred_usu.shape)
print(pred_uvu.shape)
print(pred_upu.shape)
print(pred_ensemble_insens.shape)
print(pred_ensemble_sens.shape)
print(torch.sum(pred_usu))
print(torch.sum(pred_uvu))
print(torch.sum(pred_upu))
print(torch.sum(pred_ensemble_insens))
print(torch.sum(pred_ensemble_sens))

torch.Size([11944])
torch.Size([11944])
torch.Size([11944])
torch.Size([11944])
torch.Size([11944])
tensor(860)
tensor(761)
tensor(851)
tensor(764.)
tensor(1050)


In [34]:
labels = g_upu.ndata["label"]
val_acc = (pred_upu[val_mask] == labels[val_mask]).float().mean()
test_acc = (pred_upu[test_mask] == labels[test_mask]).float().mean()
print(val_acc)
print(test_acc)

y_pred = pred_upu[val_mask]
y_test =  labels[val_mask]

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate AUC
AUC = roc_auc_score(y_test, y_pred)
print("AUC:", AUC)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

tensor(0.9722)
tensor(0.9618)
Accuracy: 0.9721900347624566
AUC: 0.893107769423559
F1-Score: 0.8125
Precision: 0.8253968253968254


In [35]:
labels = g_usu.ndata["label"]
val_acc = (pred_usu[val_mask] == labels[val_mask]).float().mean()
test_acc = (pred_usu[test_mask] == labels[test_mask]).float().mean()
print(val_acc)
print(test_acc)

y_pred = pred_usu[val_mask]
y_test =  labels[val_mask]

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate AUC
AUC = roc_auc_score(y_test, y_pred)
print("AUC:", AUC)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

tensor(0.9618)
tensor(0.9601)
Accuracy: 0.9617612977983777
AUC: 0.8592057065741276
F1-Score: 0.7441860465116279
Precision: 0.75


In [36]:
labels = g_uvu.ndata["label"]
val_acc = (pred_uvu[val_mask] == labels[val_mask]).float().mean()
test_acc = (pred_uvu[test_mask] == labels[test_mask]).float().mean()
print(val_acc)
print(test_acc)

y_pred = pred_uvu[val_mask]
y_test =  labels[val_mask]

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate AUC
AUC = roc_auc_score(y_test, y_pred)
print("AUC:", AUC)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

tensor(0.9733)
tensor(0.9647)
Accuracy: 0.9733487833140209
AUC: 0.8796028532870638
F1-Score: 0.8130081300813008
Precision: 0.8620689655172413


In [37]:
labels = g_upu.ndata["label"]
val_acc = (pred_ensemble_sens[val_mask] == labels[val_mask]).float().mean()
test_acc = (pred_ensemble_sens[test_mask] == labels[test_mask]).float().mean()
print(val_acc)
print(test_acc)

y_pred = pred_ensemble_sens[val_mask]
y_test =  labels[val_mask]

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate AUC
AUC = roc_auc_score(y_test, y_pred)
print("AUC:", AUC)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

tensor(0.9594)
tensor(0.9520)
Accuracy: 0.9594438006952491
AUC: 0.9003470213996531
F1-Score: 0.7552447552447553
Precision: 0.6923076923076923


In [38]:
labels = g_upu.ndata["label"]
val_acc = (pred_ensemble_insens[val_mask] == labels[val_mask]).float().mean()
test_acc = (pred_ensemble_insens[test_mask] == labels[test_mask]).float().mean()

print(
          f"val acc: {val_acc:.3f}, test acc: {test_acc:.3f}"
      )

y_pred = pred_ensemble_insens[val_mask]
y_test =  labels[val_mask]

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate AUC
AUC = roc_auc_score(y_test, y_pred)
print("AUC:", AUC)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

val acc: 0.972, test acc: 0.969
Accuracy: 0.9721900347624566
AUC: 0.8789762868710238
F1-Score: 0.8064516129032259
Precision: 0.847457627118644


In [None]:
print(val_acc)

In [None]:
# Visualization with TSNE

# Import necessary libraries for TSNE
import sklearn
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [None]:
# Create dimension-reduction

# For ground truth and ensemble predictors, use USU TSNE dimensionality reduction

# Based on https://builtin.com/data-science/tsne-python
tsne_data = model(g_usu, features).detach().numpy()

tsne = TSNE(n_components=2, verbose = 1)
tsne_results = tsne.fit_transform(model(g_usu, features).detach().numpy())

In [None]:
# Plot Ground Truth

# Dimensionality based on USU

labels = g_usu.ndata["label"]
predicted = labels.detach().numpy()

tsne_2d_one = tsne_results[:,0]
tsne_2d_two = tsne_results[:,1]

df = pd.DataFrame({'x':tsne_2d_one, 'y':tsne_2d_two, 'pred':predicted})

# Show the df
# df

# Plot labelling using TSNE
plt.figure(figsize=(16,10))
sns.scatterplot(
    x="x", y="y",
    hue="pred",
    palette=sns.color_palette("bright", 2),
    data=df,
    legend="full",
    alpha=0.3
)

In [None]:
# Plot Insensitive Prediction
predicted = pred_ensemble_insens.detach().numpy()

tsne_2d_one = tsne_results[:,0]
tsne_2d_two = tsne_results[:,1]

df = pd.DataFrame({'x':tsne_2d_one, 'y':tsne_2d_two, 'pred':predicted})

# Show the df
# df

# Plot labelling using TSNE
plt.figure(figsize=(16,10))
sns.scatterplot(
    x="x", y="y",
    hue="pred",
    palette=sns.color_palette("bright", 2),
    data=df,
    legend="full",
    alpha=0.3
)

In [None]:
# Plot Sensitive Prediction
predicted = pred_ensemble_sens.detach().numpy()

tsne_2d_one = tsne_results[:,0]
tsne_2d_two = tsne_results[:,1]

df = pd.DataFrame({'x':tsne_2d_one, 'y':tsne_2d_two, 'pred':predicted})

# Show the df
# df

# Plot labelling using TSNE
plt.figure(figsize=(16,10))
sns.scatterplot(
    x="x", y="y",
    hue="pred",
    palette=sns.color_palette("bright", 2),
    data=df,
    legend="full",
    alpha=0.3
)

In [None]:
# Plot USU Prediction

# Use USU Data Reduction
predicted = pred_usu.detach().numpy()

df = pd.DataFrame({'x':tsne_2d_one, 'y':tsne_2d_two, 'pred':predicted})

# Show the df
# df

# Plot labelling using TSNE
plt.figure(figsize=(16,10))
sns.scatterplot(
    x="x", y="y",
    hue="pred",
    palette=sns.color_palette("bright", 2),
    data=df,
    legend="full",
    alpha=0.3
)

In [None]:
# Plot UPU Prediction

# Create dimension-reduction

# Based on https://builtin.com/data-science/tsne-python
tsne_data = model(g_upu, features).detach().numpy()

tsne = TSNE(n_components=2, verbose = 1)
tsne_results = tsne.fit_transform(model(g_upu, features).detach().numpy())

predicted = pred_upu.detach().numpy()

tsne_2d_one = tsne_results[:,0]
tsne_2d_two = tsne_results[:,1]

df = pd.DataFrame({'x':tsne_2d_one, 'y':tsne_2d_two, 'pred':predicted})

# Show the df
# df

# Plot labelling using TSNE
plt.figure(figsize=(16,10))
sns.scatterplot(
    x="x", y="y",
    hue="pred",
    palette=sns.color_palette("bright", 2),
    data=df,
    legend="full",
    alpha=0.3
)

In [None]:
# Plot UVU Prediction

# Create dimension-reduction

# Based on https://builtin.com/data-science/tsne-python
tsne_data = model(g_uvu, features).detach().numpy()

tsne = TSNE(n_components=2, verbose = 1)
tsne_results = tsne.fit_transform(model(g_uvu, features).detach().numpy())

predicted = pred_uvu.detach().numpy()

tsne_2d_one = tsne_results[:,0]
tsne_2d_two = tsne_results[:,1]

df = pd.DataFrame({'x':tsne_2d_one, 'y':tsne_2d_two, 'pred':predicted})

# Show the df
# df

# Plot labelling using TSNE
plt.figure(figsize=(16,10))
sns.scatterplot(
    x="x", y="y",
    hue="pred",
    palette=sns.color_palette("bright", 2),
    data=df,
    legend="full",
    alpha=0.3
)