# Installation

In [2]:
!pip3 install tensorflow_text>=2.0.0rc0
!pip install torch_geometric
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip3 install sentence-transformers
!pip3 install transformers

Looking in indexes: https://download.pytorch.org/whl/cu118


# Import packages

In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import DataLoader
from sentence_transformers import SentenceTransformer
from transformers import BertModel, BertTokenizer

# Import Dataset

In [45]:
cyberbully_df = pd.read_csv("cyberbully.csv")
cyberbully_df.head(3)

Unnamed: 0,Img_Name,Img_Text,Img_Text_Label,Img_Label,Text_Label,Sentiment,Emotion,Sarcasm,Harmful_Score,Target,Unnamed: 10,Unnamed: 11
0,0.jpg,Shivam @shivamishraa Girls be named naina and ...,Bully,Nonbully,Bully,Negative,Disgust,Yes,Partially-Harmful,Individual,,
1,1.jpg,Aaloo ke paranthe is the best breakfast Omelet...,Nonbully,Nonbully,Nonbully,Neutral,Other,No,Harmless,,,
2,2.jpg,For Boyfriend For Bestfriend DESI ADUKT TROLLS,Bully,Bully,Nonbully,Negative,Ridicule,No,Partially-Harmful,Society,,


# Embedding the sentence with sBERT

In [None]:
from transformers import AutoModel, AutoTokenizer
model_name = "paraphrase-MiniLM-L6-v2"
model = SentenceTransformer(model_name)

In [46]:
i = 0
embedded_sentence = []
for sentence in cyberbully_df["Img_Text"]:
    try:
        embeddings = model.encode(sentence, convert_to_tensor=True)
        embedded_sentence.append(embeddings)
    except TypeError:
        print(i)
        print(sentence)
    finally:
        i += 1

In [47]:
len(embedded_sentence)

5862

# Additional node features

In [48]:
additional_node_features = cyberbully_df[["Sentiment", "Emotion", "Sarcasm"]]
additional_node_features.head(3)

Unnamed: 0,Sentiment,Emotion,Sarcasm
0,Negative,Disgust,Yes
1,Neutral,Other,No
2,Negative,Ridicule,No


In [49]:
sentiment_values = cyberbully_df["Sentiment"].tolist()
emotion_values = cyberbully_df["Emotion"].tolist()
sarcasm_values = cyberbully_df["Sarcasm"].tolist()

unique_values_sentiment = list(set(sentiment_values))
unique_values_emotion = list(set(emotion_values))
unique_values_sarcasm = list(set(sarcasm_values))

mapping_sentiment = {value: index for index, value in enumerate(unique_values_sentiment)}
mapping_emotion = {value: index for index, value in enumerate(unique_values_emotion)}
mapping_sarcasm = {value: index for index, value in enumerate(unique_values_sarcasm)}

numerical_indices_sentiment = [mapping_sentiment[value] for value in sentiment_values]
numerical_indices_emotion = [mapping_emotion[value] for value in emotion_values]
numerical_indices_sarcasm = [mapping_sarcasm[value] for value in sarcasm_values]

num_classes_sentiment = len(unique_values_sentiment)
num_classes_emotion = len(unique_values_emotion)
num_classes_sarcasm = len(unique_values_sarcasm)

one_hot_encoding_sentiment = torch.nn.functional.one_hot(torch.tensor(numerical_indices_sentiment), num_classes_sentiment)
one_hot_encoding_emotion = torch.nn.functional.one_hot(torch.tensor(numerical_indices_emotion), num_classes_emotion)
one_hot_encoding_sarcasm = torch.nn.functional.one_hot(torch.tensor(numerical_indices_sarcasm), num_classes_sarcasm)

one_hot_encoding_combined = torch.cat([one_hot_encoding_sentiment, one_hot_encoding_emotion, one_hot_encoding_sarcasm], dim=1)
one_hot_encoding_combined

tensor([[1, 0, 0,  ..., 0, 1, 0],
        [0, 0, 1,  ..., 0, 0, 1],
        [1, 0, 0,  ..., 0, 0, 1],
        ...,
        [1, 0, 0,  ..., 0, 1, 0],
        [0, 0, 0,  ..., 0, 0, 1],
        [1, 0, 0,  ..., 0, 1, 0]])

In [50]:
len(one_hot_encoding_combined)

5862

In [51]:
type(one_hot_encoding_combined)

torch.Tensor

In [52]:
stack_embedded_sentence = torch.stack(embedded_sentence, dim=0)
# stack_add_node_features = torch.stack(one_hot_encoding_combined, dim = 0)
node_features = torch.cat([stack_embedded_sentence.to("cuda"), one_hot_encoding_combined.to("cuda")], dim=1)

In [53]:
node_features

tensor([[ 0.0581,  0.0897, -0.1928,  ...,  0.0000,  1.0000,  0.0000],
        [-0.1879, -0.1056,  0.0554,  ...,  0.0000,  0.0000,  1.0000],
        [-0.5000,  0.3423,  0.3584,  ...,  0.0000,  0.0000,  1.0000],
        ...,
        [ 0.0073, -0.6288,  0.0517,  ...,  0.0000,  1.0000,  0.0000],
        [-0.3019,  0.3704,  0.1639,  ...,  0.0000,  0.0000,  1.0000],
        [ 0.0196,  1.2078,  0.0869,  ...,  0.0000,  1.0000,  0.0000]],
       device='cuda:0')

In [54]:
node_features[0]

tensor([ 5.8056e-02,  8.9747e-02, -1.9282e-01,  3.3935e-01, -2.7581e-01,
         6.2932e-02,  5.8596e-01, -1.7504e-01,  1.2247e-01,  1.4029e-01,
         3.9560e-01, -2.7802e-01, -4.7769e-02, -1.0318e-01,  3.3026e-02,
        -1.2242e-01, -3.9688e-02, -7.6766e-03,  1.8037e-01,  2.4633e-01,
        -1.5553e-01,  1.6732e-02, -9.9984e-02, -2.8914e-01,  3.7149e-01,
        -1.3666e-01,  1.4943e-01, -4.3571e-02, -1.9676e-02, -1.3193e-01,
        -6.7504e-02, -1.5992e-01, -1.4398e-01,  1.7429e-01, -1.4909e-01,
         1.0948e-02, -4.8534e-01,  1.9738e-01, -2.7077e-01, -2.3467e-01,
        -1.2652e-02,  7.9679e-02, -2.6009e-01,  9.4327e-03,  1.0099e-01,
         9.4603e-02, -5.8038e-02,  1.9497e-01,  1.2454e-01, -1.1513e-01,
        -9.7945e-01, -3.5390e-01, -2.5893e-01,  2.0604e-02,  1.9451e-01,
        -2.9885e-01,  3.0907e-02, -3.6232e-02,  3.9445e-01, -4.8413e-02,
        -7.7715e-02, -1.1762e-01,  1.7044e-01, -2.9932e-01,  2.9068e-01,
        -1.9984e-01,  8.1213e-02, -4.3866e-01,  4.2

# Cosine similarity to get edge index