In [2]:
import torch
import torch.nn as nn
from transformers import RobertaModel, RobertaTokenizer
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torch.nn.functional import normalize
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.optim.lr_scheduler import ExponentialLR
from sklearn.model_selection import train_test_split
import os

----------------------------------------------------------
To fix the error `Torch compile: libcuda.so cannot found` raised by
```python
torch.compile(robertaModel, backend="inductor")
```
----------------------------------------------------------

In [3]:
!export LC_ALL="en_US.UTF-8"
!export LD_LIBRARY_PATH="/usr/lib64-nvidia"
!export LIBRARY_PATH="/usr/local/cuda/lib64/stubs"
!ldconfig /usr/lib64-nvidia

/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link

/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link



In [39]:
# Check for GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


#### Social Media Modality

In [5]:
class SocialMedia(nn.Module):
    def __init__(
        self,
        device,
        roberta_model_path='roberta-base',
        num_classes=1,
        inductor=True,
        independent=True,
        embedding_dir='/content/drive/Shareddrives/test/FYP/socialmedia/embedding_files'
        ):
        super(SocialMedia, self).__init__()

        self.independent = independent
        self.embedding_dir = embedding_dir
        os.makedirs(self.embedding_dir, exist_ok=True)

        # Load pre-trained RoBERTa model
        self.roberta = RobertaModel.from_pretrained(roberta_model_path).to(device=device)
        if (inductor):
          self.roberta = torch.compile(self.roberta, backend="inductor")
        self.tokenizer = RobertaTokenizer.from_pretrained(roberta_model_path)

        # CNN
        self.conv1d_p1 = nn.Conv1d(in_channels=768, out_channels=128, kernel_size=5).to(device=device)
        self.conv1d_p2 = nn.Conv1d(in_channels=768, out_channels=128, kernel_size=4).to(device=device)
        self.conv1d_p3 = nn.Conv1d(in_channels=768, out_channels=128, kernel_size=3).to(device=device)
        self.conv1d_s1 = nn.Conv1d(in_channels=128, out_channels=128, kernel_size=5).to(device=device)
        self.conv1d_s2 = nn.Conv1d(in_channels=128, out_channels=128, kernel_size=5).to(device=device)

        # Pooling
        self.max_pool_p1 = nn.MaxPool1d(kernel_size=5).to(device=device)
        self.max_pool_p2 = nn.MaxPool1d(kernel_size=5).to(device=device)
        self.max_pool_p3 = nn.MaxPool1d(kernel_size=5).to(device=device)
        self.max_pool_s1 = nn.MaxPool1d(kernel_size=5).to(device=device)
        self.max_pool_s2 = nn.MaxPool1d(kernel_size=10).to(device=device)

        # Fully connected layers
        self.linear1 = nn.Linear(640, 128).to(device=device)
        self.linear2 = nn.Linear(128, num_classes).to(device=device)
        self.sigmoid = nn.Sigmoid().to(device=device)

    def forward(self, x, iteration, train, mode):
        # If the module is independent it self will create embeddings
        if (self.independent):
            # Check if embeddings are already saved to disk
            embedding_path = os.path.join(self.embedding_dir, f'{mode}_{iteration}.pth')
            if (train and os.path.exists(embedding_path)):
                # Load embeddings from disk
                # print('using saved embeddings')
                try:
                    embeddings = torch.load(embedding_path).to(device=device)
                except Exception as e:
                    print('Error loading embeddings:', e)
            else:
                # Tokenize and encode the sentences
                tokenized_sentences = self.tokenizer(x, truncation=True, padding='max_length', return_tensors='pt').to(device=device)

                # Forward pass to get embeddings
                with torch.no_grad():
                    # Get RoBERTa embeddings
                    model_output = self.roberta(**tokenized_sentences)

                # Extract embeddings from the output
                embeddings = model_output.last_hidden_state

                # Save embeddings to disk
                torch.save(embeddings.cpu(), embedding_path)
        else:
            embeddings = x

        output_p1 = self.max_pool_p1(F.relu(self.conv1d_p1(embeddings.permute(0, 2, 1))))
        output_p2 = self.max_pool_p2(F.relu(self.conv1d_p2(embeddings.permute(0, 2, 1))))
        output_p3 = self.max_pool_p3(F.relu(self.conv1d_p3(embeddings.permute(0, 2, 1))))
        output_s = torch.cat((output_p1, output_p2, output_p3), dim=2)
        output_s1 = F.relu(self.conv1d_s1(output_s))
        output_s1 = self.max_pool_s1(output_s1)
        output_s2 = F.relu(self.conv1d_s2(output_s1))
        output_s2 = self.max_pool_s2(output_s2)
        output_s2 = output_s2.permute(0, 2, 1)
        output_f = output_s2.reshape(output_s2.size(0), -1)
        output_l1 = torch.relu(self.linear1(output_f))
        output_l2 = self.linear2(output_l1)
        output = self.sigmoid(output_l2)

        # Release memory
        del embeddings

        return output, output_l1

In [6]:
class TFN(nn.Module):
    def __init__(
        self,
        device,
        inductor=True,
        roberta_model_path='roberta-base',
        socialmedia_out=128,
        post_fusion_dim=256,
        post_fusion_dropout=0,
        embedding_dir='/content/drive/Shareddrives/test/FYP/socialmedia/embedding_files'
        ):
        super(TFN, self).__init__()

        self.embedding_dir = embedding_dir
        os.makedirs(self.embedding_dir, exist_ok=True)

        self.socialmedia_out = socialmedia_out
        self.post_fusion_dim = post_fusion_dim
        self.post_fusion_dropout = nn.Dropout(p=post_fusion_dropout).to(device=device)

        # Load pre-trained RoBERTa model
        self.roberta = RobertaModel.from_pretrained(roberta_model_path).to(device=device)
        if (inductor):
          self.roberta = torch.compile(self.roberta, backend="inductor")
        self.tokenizer = RobertaTokenizer.from_pretrained(roberta_model_path)

        # Load pre-trained FakeBERT models
        self.sentiment = SocialMedia(device=device, inductor=inductor, independent=False)
        self.sentiment.load_state_dict(torch.load('/content/drive/Shareddrives/test/FYP/sentiment/sentiment-usairline.pth'))

        self.fakenews = SocialMedia(device=device, inductor=inductor, independent=False)
        self.fakenews.load_state_dict(torch.load('/content/drive/Shareddrives/test/FYP/fake-news/fakenews.pth'))

        # define the post_fusion layers
        self.post_fusion_layer_1 = nn.Linear((self.socialmedia_out) * (self.socialmedia_out), self.post_fusion_dim).to(device=device)
        self.post_fusion_layer_2 = nn.Linear(self.post_fusion_dim, self.post_fusion_dim).to(device=device)

    def fetchFromReddit(self, dapp_address):
        # TODO: Retrieve 10 posts for dapp from reddit
        return ["I love this product!"]*10


    def forward(self, dapp_address, iteration, train, mode):
        # Check if embeddings are already saved to disk
        embedding_path = os.path.join(self.embedding_dir, f'{mode}_{dapp_address}.pth')

        if (train and os.path.exists(embedding_path)):
            # Load embeddings from disk
            print('using saved embeddings')
            try:
                embeddings = torch.load(embedding_path).to(device=device)
            except Exception as e:
                print('Error loading embeddings:', e)
        else:
            # Get posts from reddit
            sentences = self.fetchFromReddit(dapp_address)

            # Tokenize and encode the sentences
            tokenized_sentences = self.tokenizer(sentences, truncation=True, padding='max_length', return_tensors='pt').to(device=device)

            # Forward pass to get embeddings
            with torch.no_grad():
                # Get RoBERTa embeddings
                model_output = self.roberta(**tokenized_sentences)

            # Extract embeddings from the output
            embeddings = model_output.last_hidden_state

            # Save embeddings to disk
            torch.save(embeddings.cpu(), embedding_path)

        sentiment, sentiment_h = self.sentiment(embeddings, iteration, train, mode)
        # print('sentiment_h', sentiment_h.shape)
        fakenews, fakenews_h = self.fakenews(embeddings, iteration, train, mode)
        # print('fakenews_h', fakenews_h.shape)

        # sentiment_h has shape (batch_size, self.socialmedia_out), _video_h has shape (batch_size, self.socialmedia_out)
        # we want to perform outer product between the two batch, hence we unsqueenze them to get
        # (batch_size, self.socialmedia_out, 1) X (batch_size, 1, self.socialmedia_out)
        # fusion_tensor will have shape (batch_size, self.socialmedia_out, self.socialmedia_out)
        fusion_tensor = torch.bmm(sentiment_h.unsqueeze(2), fakenews_h.unsqueeze(1))
        # print('fusion_tensor', fusion_tensor.shape)

        batch_size = sentiment_h.shape[0]
        fusion_tensor = fusion_tensor.view(batch_size, -1)
        # print('fusion_tensor', fusion_tensor.shape)

        post_fusion_dropped = self.post_fusion_dropout(fusion_tensor)
        # print('post_fusion_dropped', post_fusion_dropped.shape)
        post_fusion_y_1 = F.relu(self.post_fusion_layer_1(post_fusion_dropped))
        # print('post_fusion_y_1', post_fusion_y_1.shape)
        post_fusion_y_2 = F.relu(self.post_fusion_layer_2(post_fusion_y_1))
        # print('post_fusion_y_2', post_fusion_y_2.shape)
        output = post_fusion_y_2

        # Release memory
        del embeddings

        return output


In [20]:
class SocialMediaModality(nn.Module):
    def __init__(
        self,
        device,
        inductor=True,
        roberta_model_path='roberta-base',
        socialmedia_out=128,
        post_fusion_dim=256,
        post_fusion_dropout=0,
        embedding_dir='/content/drive/Shareddrives/test/FYP/socialmedia/embedding_files'
        ):
        super(SocialMediaModality, self).__init__()

        self.tfn = TFN(
            device,
            inductor,
            roberta_model_path,
            socialmedia_out,
            post_fusion_dim,
            post_fusion_dropout,
            embedding_dir
        )

    def forward(self, dapp_addresses, iteration, train, mode):
        # List to store individual outputs for each address
        outputs_list = []

        # Iterate through dapp_addresses
        for dapp_address in dapp_addresses:
            # Get posts from reddit

            outputs = self.tfn(dapp_address, iteration, train, mode)

            # Append the outputs to the list
            outputs_list.append(outputs)

        # Combine the outputs into a single tensor
        combined_outputs = torch.stack(outputs_list, dim=0).to(device=device)

        return combined_outputs

In [None]:
# Initialize the model
tfn = TFN(
    device,
    inductor=True
)

sentences = ["I love this product!"]*10

outputs = tfn(sentences, 1, False, 'test')
outputs.shape

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


torch.Size([10, 256])

In [None]:
# Initialize the model
socialMediaModality = SocialMediaModality(
    device,
    inductor=True
)

addresses_tensor = torch.randint(1000, 10000, size=(20, 1), dtype=torch.int32)

output = socialMediaModality(addresses_tensor, 1, True, 'train')

output.shape

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


torch.Size([20, 10, 256])

#### Multi-Modal Fusion

In [8]:
'''
Sample-Weighted Focal Contrastive (SWFC) Loss:
1. Divide training samples into positive and negative pairs to maximize
inter-class distances while minimizing intra-class distances;
2. Assign more importance to hard-to-classify positive pairs;
3. Assign more importance to minority classes.
'''
class SampleWeightedFocalContrastiveLoss(nn.Module):

    def __init__(self, temp_param, focus_param, sample_weight_param, dataset, class_counts, device):
        '''
        temp_param: control the strength of penalty on hard negative samples;
        focus_param: forces the model to concentrate on hard-to-classify samples;
        sample_weight_param: control the strength of penalty on minority classes;
        dataset: MELD or IEMOCAP.
        device: cpu or cuda.
        '''
        super().__init__()

        self.temp_param = temp_param
        self.focus_param = focus_param
        self.sample_weight_param = sample_weight_param
        self.dataset = dataset
        self.class_counts = class_counts
        self.device = device

        if self.dataset == 'MELD':
            self.num_classes = 7
        elif self.dataset == 'IEMOCAP':
            self.num_classes = 6
        else:
            raise ValueError('Please choose either MELD or IEMOCAP')

        self.class_weights = self.get_sample_weights()


    '''
    Use dot-product to measure the similarity between feature pairs.
    '''
    def dot_product_similarity(self, current_features, feature_sets):
        similarity = torch.sum(current_features * feature_sets, dim = -1)
        similarity_probs = torch.softmax(similarity / self.temp_param, dim = 0)

        return similarity_probs


    '''
    Calculate the loss contributed from positive pairs.
    '''
    def positive_pairs_loss(self, similarity_probs):
        pos_pairs_loss = torch.mean(torch.log(similarity_probs) * ((1 - similarity_probs)**self.focus_param), dim = 0)

        return pos_pairs_loss


    '''
    Assign more importance to minority classes.
    '''
    def get_sample_weights(self):
        total_counts = torch.sum(self.class_counts, dim = -1)
        class_weights = (total_counts / self.class_counts)**self.sample_weight_param
        class_weights = normalize(class_weights, dim = -1, p = 1.0)

        return class_weights


    def forward(self, features, labels):
        self.num_samples = labels.shape[0]
        self.feature_dim = features.shape[-1]

        features = normalize(features, dim = -1)  # normalization helps smooth the learning process

        batch_sample_weights = torch.FloatTensor([self.class_weights[label] for label in labels]).to(self.device)

        total_loss = 0.0
        for i in range(self.num_samples):
            current_feature = features[i]
            current_label = labels[i]
            feature_sets = torch.cat((features[:i], features[i + 1:]), dim = 0)
            label_sets = torch.cat((labels[:i], labels[i + 1:]), dim = 0)
            expand_current_features = current_feature.expand(self.num_samples - 1, self.feature_dim).to(self.device)
            similarity_probs = self.dot_product_similarity(expand_current_features, feature_sets)
            pos_similarity_probs = similarity_probs[label_sets == current_label]  # positive pairs with the same label
            if len(pos_similarity_probs) > 0:
                pos_pairs_loss = self.positive_pairs_loss(pos_similarity_probs)
                weighted_pos_pairs_loss = pos_pairs_loss * batch_sample_weights[i]
                total_loss += weighted_pos_pairs_loss

        loss = - total_loss / self.num_samples

        return loss

In [9]:
'''
Maximize the correlations across multimodal-fused features
extracted from MultiAttn through Soft-HGR loss.
'''
class SoftHGRLoss(nn.Module):

    def __init__(self):
        super().__init__()


    '''
    Calculate the inner products between feature mappings.
    '''
    def feature_mapping(self, feature_X, feature_Y):
        feature_mapping_X_Y = torch.mean(torch.sum(feature_X * feature_Y, dim = -1), dim = 0)

        return feature_mapping_X_Y


    '''
    Calculate the inner products between feature covariances.
    '''
    def feature_covariance(self, feature_X, feature_Y):
        cov_feature_X = torch.cov(feature_X)
        cov_feature_Y = torch.cov(feature_Y)
        # We empirically find that scaling the feature covariance by a factor of 1 / num_samples
        # leads to enhanced training stability and improvements in model performances.
        feature_covariance_X_Y = torch.trace(torch.matmul(cov_feature_X, cov_feature_Y)) / self.num_samples
        return feature_covariance_X_Y


    def forward(self, f_t, f_a, f_v):
        self.num_samples = f_t.shape[0]

        all_features = [f_t, f_a, f_v]
        total_loss = 0.0
        for i in range(len(all_features) - 1):
            for j in range(i + 1, len(all_features)):
                feature_mapping_i_j = self.feature_mapping(all_features[i], all_features[j])
                feature_covariance_i_j = self.feature_covariance(all_features[i], all_features[j])
                soft_hgr_loss_i_j = feature_mapping_i_j - feature_covariance_i_j / 2
                total_loss += soft_hgr_loss_i_j

        loss = - total_loss / self.num_samples

        return loss

In [10]:
'''
2-layer MLP with ReLU activation.
'''
class MLP(nn.Module):

    def __init__(self, device, input_dim, hidden_dim, num_classes, dropout_rate):
        super().__init__()

        self.linear_1 = nn.Linear(input_dim, hidden_dim).to(device=device)
        self.relu = nn.ReLU().to(device=device)
        self.linear_2 = nn.Linear(hidden_dim, num_classes).to(device=device)
        self.dropout = nn.Dropout(dropout_rate).to(device=device)


    def forward(self, x):
        return self.dropout(self.linear_2(self.relu(self.linear_1(x))))

In [11]:
'''
Bidirectional cross-attention layers.
'''
class BidirectionalCrossAttention(nn.Module):

    def __init__(self, device, model_dim, Q_dim, K_dim, V_dim):
        super().__init__()

        self.query_matrix = nn.Linear(model_dim, Q_dim).to(device=device)
        self.key_matrix = nn.Linear(model_dim, K_dim).to(device=device)
        self.value_matrix = nn.Linear(model_dim, V_dim).to(device=device)


    def bidirectional_scaled_dot_product_attention(self, Q, K, V):
        score = torch.bmm(Q, K.transpose(-1, -2))
        scaled_score = score / (K.shape[-1]**0.5)
        attention = torch.bmm(F.softmax(scaled_score, dim = -1).to(device=device), V)

        return attention


    def forward(self, query, key, value):
        Q = self.query_matrix(query)
        K = self.key_matrix(key)
        V = self.value_matrix(value)
        attention = self.bidirectional_scaled_dot_product_attention(Q, K, V)

        return attention



'''
Multi-head bidirectional cross-attention layers.
'''
class MultiHeadAttention(nn.Module):

    def __init__(self, device, num_heads, model_dim, Q_dim, K_dim, V_dim):
        super().__init__()

        self.num_heads = num_heads
        self.attention_heads = nn.ModuleList(
            [BidirectionalCrossAttention(device, model_dim, Q_dim, K_dim, V_dim) for _ in range(self.num_heads)]
        )
        self.projection_matrix = nn.Linear(num_heads * V_dim, model_dim).to(device=device)


    def forward(self, query, key, value):
        heads = [self.attention_heads[i](query, key, value) for i in range(self.num_heads)]
        multihead_attention = self.projection_matrix(torch.cat(heads, dim = -1).to(device=device))

        return multihead_attention



'''
A feed-forward network, which operates as a key-value memory.
'''
class Feedforward(nn.Module):

    def __init__(self, device, model_dim, hidden_dim, dropout_rate):
        super().__init__()

        self.linear_W1 = nn.Linear(model_dim, hidden_dim).to(device=device)
        self.linear_W2 = nn.Linear(hidden_dim, model_dim).to(device=device)
        self.relu = nn.ReLU().to(device=device)
        self.dropout = nn.Dropout(dropout_rate).to(device=device)


    def forward(self, x):
        return self.dropout(self.linear_W2(self.relu(self.linear_W1(x))))



'''
Residual connection to smooth the learning process.
'''
class AddNorm(nn.Module):

    def __init__(self, device, model_dim, dropout_rate):
        super().__init__()

        self.layer_norm = nn.LayerNorm(model_dim).to(device=device)
        self.dropout = nn.Dropout(dropout_rate).to(device=device)


    def forward(self, x, sublayer):
        output = self.layer_norm(x + self.dropout(sublayer(x)))

        return output



'''
MultiAttn is a multimodal fusion model which aims to capture the complicated interactions and
dependencies across textual, audio and visual modalities through bidirectional cross-attention layers.
MultiAttn is made up of three sub-components:
1. MultiAttn_text: integrate the textual modality with audio and visual information;
2. MultiAttn_audio: incorporate the audio modality with textual and visual information;
3. MultiAttn_visual: fuse the visual modality with textual and visual cues.
'''
class MultiAttnLayer(nn.Module):

    def __init__(self, device, num_heads, model_dim, hidden_dim, dropout_rate):
        super().__init__()

        Q_dim = K_dim = V_dim = model_dim // num_heads
        self.attn_1 = MultiHeadAttention(device, num_heads, model_dim, Q_dim, K_dim, V_dim)
        self.add_norm_1 = AddNorm(device, model_dim, dropout_rate)
        self.attn_2 = MultiHeadAttention(device, num_heads, model_dim, Q_dim, K_dim, V_dim)
        self.add_norm_2 = AddNorm(device, model_dim, dropout_rate)
        self.ff = Feedforward(device, model_dim, hidden_dim, dropout_rate)
        self.add_norm_3 = AddNorm(device, model_dim, dropout_rate)


    def forward(self, query_modality, modality_A, modality_B):
        attn_output_1 = self.add_norm_1(query_modality, lambda query_modality: self.attn_1(query_modality, modality_A, modality_A))
        attn_output_2 = self.add_norm_2(attn_output_1, lambda attn_output_1: self.attn_2(attn_output_1, modality_B, modality_B))
        ff_output = self.add_norm_3(attn_output_2, self.ff)

        return ff_output



'''
Stacks of MultiAttn layers.
'''
class MultiAttn(nn.Module):

    def __init__(self, device, num_layers, model_dim, num_heads, hidden_dim, dropout_rate):
        super().__init__()

        self.multiattn_layers = nn.ModuleList([
            MultiAttnLayer(device, num_heads, model_dim, hidden_dim, dropout_rate) for _ in range(num_layers)])


    def forward(self, query_modality, modality_A, modality_B):
        for multiattn_layer in self.multiattn_layers:
            query_modality = multiattn_layer(query_modality, modality_A, modality_B)

        return query_modality



class MultiAttnModel(nn.Module):

    def __init__(self, device, num_layers, model_dim, num_heads, hidden_dim, dropout_rate):
        super().__init__()

        self.multiattn_text = MultiAttn(device, num_layers, model_dim, num_heads, hidden_dim, dropout_rate)
        self.multiattn_audio = MultiAttn(device, num_layers, model_dim, num_heads, hidden_dim, dropout_rate)
        self.multiattn_visual = MultiAttn(device, num_layers, model_dim, num_heads, hidden_dim, dropout_rate)


    def forward(self, text_features, audio_features, visual_features):
        f_t = self.multiattn_text(text_features, audio_features, visual_features)
        f_a = self.multiattn_audio(audio_features, text_features, visual_features)
        f_v = self.multiattn_visual(visual_features, text_features, audio_features)

        return f_t, f_a, f_v

#### Combined Model

In [56]:
class PonziShield(nn.Module):
    def __init__(
        self,
        device,
        inductor=True,
        roberta_model_path='roberta-base',
        socialmedia_out=128,
        tensorfusion_out=256,
        post_fusion_dim=256,
        post_fusion_dropout=0,
        embedding_dir='/content/drive/Shareddrives/test/FYP/socialmedia/embedding_files',
        multi_attn_flag=True,
        hidden_dim=1024,
        dropout=0,
        num_layers=6,
        model_dim=256,
        num_heads=4,
        n_classes=1,
        n_posts=10,
        transaction_seq=108,
        transaction_features=11
        ):
        super(PonziShield, self).__init__()

        self.multi_attn_flag = multi_attn_flag
        self.multiattn = MultiAttnModel(device, num_layers, model_dim, num_heads, hidden_dim, dropout)
        self.fc = nn.Linear(model_dim * 3, model_dim).to(device=device)
        self.mlp = MLP(device, model_dim, model_dim, n_classes, dropout)
        self.post_fusion_socialmedia = nn.Linear(n_posts * tensorfusion_out, model_dim).to(device=device)
        self.pre_fusion_transaction = nn.Linear(transaction_features, model_dim).to(device=device)
        self.post_fusion_transaction = nn.Linear(transaction_seq * model_dim, model_dim).to(device=device)

        self.socialmedia_modality = SocialMediaModality(
            device,
            inductor,
            roberta_model_path,
            socialmedia_out,
            post_fusion_dim,
            post_fusion_dropout,
            embedding_dir
        )

        self.transaction_modality = TransactionModality(device='cpu')

        self.smartcode_modality = SocialMediaModality(
            device,
            inductor,
            roberta_model_path,
            socialmedia_out,
            post_fusion_dim,
            post_fusion_dropout,
            embedding_dir
        )

    def forward(self, dapp_addresses, iteration, train, mode):
        sentiment_features = self.socialmedia_modality(dapp_addresses, iteration, train, mode)
        print('sentiment_features', sentiment_features.shape)
        contract_features = self.smartcode_modality(dapp_addresses, iteration, train, mode)
        print('contract_features', contract_features.shape)
        _, transaction_features = self.transaction_modality(dapp_addresses, train=True)
        # TODO
        transaction_features = transaction_features.to(device=device)
        print('transaction_features', transaction_features.shape)
        transaction_features = self.pre_fusion_transaction(transaction_features)
        print('transaction_features', transaction_features.shape)


        if self.multi_attn_flag == True:
            fused_sentiment_features, fused_contract_features, fused_transaction_features = self.multiattn(sentiment_features, contract_features, transaction_features)
        else:
            fused_sentiment_features, fused_contract_features, fused_transaction_features = sentiment_features, contract_features, transaction_features

        print('fused_sentiment_features', fused_sentiment_features.shape)
        print('fused_contract_features', fused_contract_features.shape)
        print('fused_transaction_features', fused_transaction_features.shape)

        fused_sentiment_features = fused_sentiment_features.reshape(fused_sentiment_features.shape[0], -1)
        fused_sentiment_features = self.post_fusion_socialmedia(fused_sentiment_features)
        fused_contract_features = fused_contract_features.reshape(fused_contract_features.shape[0], -1)
        fused_contract_features = self.post_fusion_socialmedia(fused_contract_features)
        fused_transaction_features = fused_transaction_features.reshape(fused_transaction_features.shape[0], -1)
        fused_transaction_features = self.post_fusion_transaction(fused_transaction_features)

        print('fused_sentiment_features', fused_sentiment_features.shape)
        print('fused_contract_features', fused_contract_features.shape)
        print('fused_transaction_features', fused_transaction_features.shape)

        fused_features = torch.cat((fused_sentiment_features, fused_contract_features, fused_transaction_features), dim = -1)
        print('fused_features', fused_features.shape)
        fc_outputs = self.fc(fused_features)
        print('fc_outputs', fc_outputs.shape)
        mlp_outputs = self.mlp(fc_outputs)
        print('mlp_outputs', mlp_outputs.shape)

        return fused_sentiment_features, fused_contract_features, fused_transaction_features, fc_outputs, mlp_outputs

In [13]:
# Initialize the model
ponzishield = PonziShield(
    device=device
)

addresses_tensor = torch.randint(1000, 10000, size=(20, 1), dtype=torch.int32)

fused_text_features, fused_audio_features, fused_visual_features, fc_outputs, mlp_outputs = ponzishield(addresses_tensor, 1, True, 'train')

print("fused_text_features shape:", fused_text_features.shape)
print("fused_audio_features shape:", fused_audio_features.shape)
print("fused_visual_features shape:", fused_visual_features.shape)
print("fc_outputs shape:", fc_outputs.shape)
print("mlp_outputs shape:", mlp_outputs.shape)

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['robert

using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
fused_text_features shape: torch.Size([20, 256])
fused_audio_features shape: tor

In [14]:
addresses_tensor.shape

torch.Size([20, 1])

In [57]:
# Initialize the model
ponzishield = PonziShield(
    device=device
)

contract_addresses = [
    "0x6e38a457c722c6011b2dfa06d49240e797844d66",
    "0x109c4f2ccc82c4d77bde15f306707320294aea3f",
    "0x793ae8c1b1a160bfc07bfb0d04f85eab1a71f4f2",
    "0x5fe5b7546d1628f7348b023a0393de1fc825a4fd",
    "0xd79b4c6791784184e2755b2fc1659eaab0f80456",
    "0x273930d21e01ee25e4c219b63259d214872220a2",
    "0xd07ce4329b27eb8896c51458468d98a0e4c0394c"
]

fused_text_features, fused_audio_features, fused_visual_features, fc_outputs, mlp_outputs = ponzishield(contract_addresses, 1, True, 'train')

print("fused_text_features shape:", fused_text_features.shape)
print("fused_audio_features shape:", fused_audio_features.shape)
print("fused_visual_features shape:", fused_visual_features.shape)
print("fc_outputs shape:", fc_outputs.shape)
print("mlp_outputs shape:", mlp_outputs.shape)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['robert

using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
sentiment_features torch.Size([7, 10, 256])
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
using saved embeddings
contract_features torch.Size([7, 10, 256])
torch.Size([7, 108, 11])
torch.Size([7])
transaction_features torch.Size([7, 108, 11])
transaction_features torch.Size([7, 108, 256])
fused_sentiment_features torch.Size([7, 10, 256])
fused_contract_features torch.Size([7, 10, 256])
fused_transaction_features torch.Size([7, 108, 256])
fused_sentiment_features torch.Size([7, 256])
fused_contract_features torch.Size([7, 256])
fused_transaction_features torch.Size([7, 256])
fused_features torch.Size([7, 768])
fc_outputs torch.Size([7, 256])
mlp_outputs torch.Size([7, 1])
fused_text_features shape: torch.Size([7, 256])
fused_audio_feature

In [None]:
# del ponzishield
torch.cuda.empty_cache()

#### Transaction model

In [24]:
import torch
from torch import nn, Tensor
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import math
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [25]:
class BinaryClassification(nn.Module):
  def __init__(self, embed_size, device):
    super(BinaryClassification, self).__init__()
    # Number of input features is embed_size.
    self.layer_1 = nn.Linear(embed_size, 64)
    self.layer_2 = nn.Linear(64, 64)
    self.layer_out = nn.Linear(64, 1)

    self.relu = nn.ReLU()
    self.dropout = nn.Dropout(p=0.1)
    self.batchnorm1 = nn.BatchNorm1d(64)
    self.batchnorm2 = nn.BatchNorm1d(64)
    self.device = device

  def forward(self, inputs):
    # print("start binary classification")
    # print(inputs.shape)
    # print(inputs)
    x = self.relu(self.layer_1(inputs))
    x = self.batchnorm1(x)
    x = self.relu(self.layer_2(x))
    x = self.batchnorm2(x)
    x = self.dropout(x)
    x = self.layer_out(x)
    #if math.isnan (x[0][0]):
    #  print(src)

    return x

In [26]:
class Classifier(nn.Module):
  def __init__(self, d_model, seq_len, nhead, dim_feedforward, nlayers, device, dropout = 0.5):
    super(Classifier, self).__init__()
    self.d_model = d_model
    self.seq_len = seq_len
    self.nhead = nhead
    self.dim_feedforward = dim_feedforward
    self.nlayers = nlayers
    self.device = device

    self.position_embedding = nn.Embedding(seq_len, d_model)
    encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
    self.encoder = TransformerEncoder(encoder_layer, nlayers)
    self.binary_classifier = BinaryClassification(seq_len*d_model, device)

  def forward(self, src: Tensor) -> Tensor:

    """
    Args:
        src: Tensor, shape [seq_len, batch_size]
        src_mask: Tensor, shape [seq_len, seq_len]
    Returns:
        output Tensor of shape [seq_len, batch_size, ntoken]
    """
    N, seq_length, embed_size = src.shape
    positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
    src_ = src + self.position_embedding(positions)
    output1 = self.encoder(src_)
    # print(output1.shape)
    # print(output1)
    output = self.binary_classifier(torch.reshape(output1, (N, seq_length*embed_size)))

    return output, output1

In [27]:
loaded_model = torch.load('/content/drive/MyDrive/23_FYP_Realtime_Estimation_of_Trustworthiness_of_Decentralized_Applications/models/PonziShield_tr_v1.pth')
loaded_model.eval()

Classifier(
  (position_embedding): Embedding(108, 11)
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=11, out_features=11, bias=True)
        )
        (linear1): Linear(in_features=11, out_features=8, bias=True)
        (dropout): Dropout(p=0.5, inplace=False)
        (linear2): Linear(in_features=8, out_features=11, bias=True)
        (norm1): LayerNorm((11,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((11,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.5, inplace=False)
        (dropout2): Dropout(p=0.5, inplace=False)
      )
    )
  )
  (binary_classifier): BinaryClassification(
    (layer_1): Linear(in_features=1188, out_features=64, bias=True)
    (layer_2): Linear(in_features=64, out_features=64, bias=True)
    (layer_out): Linear(in_features=64, out_features=1, bias=True)
    

In [28]:
X_test = np.load("/content/drive/MyDrive/23_FYP_Realtime_Estimation_of_Trustworthiness_of_Decentralized_Applications/models/tr/X_test.npy", allow_pickle=True)
y_test = np.load("/content/drive/MyDrive/23_FYP_Realtime_Estimation_of_Trustworthiness_of_Decentralized_Applications/models/tr/y_test.npy", allow_pickle=True)

In [None]:
print(X_test.shape)
print(y_test.shape)

(31, 108, 11)
(31,)


In [41]:
X_test[0]

array([[-0.10294521, -0.11435686, -0.01381463, ..., -0.23786669,
        -0.31210669, -0.2284082 ],
       [-0.10294521, -0.11435686, -0.01381463, ..., -0.23786669,
        -0.31210669, -0.2284082 ],
       [-0.10294521, -0.11435686, -0.01381463, ..., -0.23786669,
        -0.31210669, -0.2284082 ],
       ...,
       [-0.10294521, -0.11435686, -0.01381463, ..., -0.23786669,
        -0.31210669, -0.2284082 ],
       [-0.10294521, -0.11435686, -0.01381463, ..., -0.23786669,
        -0.31210669, -0.2284082 ],
       [-0.10294521, -0.11435686, -0.01381463, ..., -0.23786669,
        -0.31210669, -0.2284082 ]])

In [42]:
## test data
class TestData(Dataset):

    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __len__ (self):
        return len(self.X_data)


test_data = TestData(X_test, y_test)

In [43]:
test_loader = DataLoader(dataset=test_data, batch_size=1)

In [45]:
y_pred = []
y_true = []
sigmoid = nn.Sigmoid()

for X_batch, y_batch in test_loader:
        #print("w.requires_grad:",X_batch.requires_grad)
        X_batch, y_batch = X_batch.to("cpu"), y_batch.to("cpu")
        results,result_before_bin_classifier = loaded_model(X_batch.float())
        y_batch_pred = torch.round(sigmoid(results))
        y_pred.extend(y_batch_pred.cpu().detach().numpy())
        y_true.extend(y_batch.cpu().detach().numpy())

In [None]:
count_true = 0
for i in range(len(y_pred)):
  if y_true[i]==y_pred[i]:
    count_true+=1
acc = count_true/len(y_pred)
print(len(y_test))
print(acc)

31
0.7741935483870968


In [29]:
def create_tensor_inputs(embedding_dir,contract_address):
    all_data = []
    all_labels = []
    for i in range(len(contract_address)):

        # print(filtered_df.loc[i, "address"], filtered_df.loc[i, "label"])
        fileNameToRead = embedding_dir + str(contract_address[i]) + '.csv'
        data = pd.read_csv(fileNameToRead)
        # Extract the relevant data (assuming the label column is named 'label')
        features = data.iloc[:, :11].to_numpy()
        label = data['label'][1]
        # print(features[1], labels[1])
        all_data.append((features))
        all_labels.append(label)
        # print("-----------------------------------------------------------------------")
    data_array = np.array(all_data)
    labels_array = np.array(all_labels)

    # Reshape the array to (301*108, 11) for normalization
    reshaped_data = data_array.reshape((-1, 11))
    # Initialize the StandardScaler
    scaler = StandardScaler()
    # Fit the scaler on the reshaped data and transform it
    normalized_data = scaler.fit_transform(reshaped_data)
    # Reshape the normalized data back to the original shape
    normalized_data_array = normalized_data.reshape(data_array.shape)

    data_tensor = torch.tensor(normalized_data_array, dtype=torch.float32)
    labels_tensor = torch.tensor(labels_array, dtype=torch.float32)

    print(data_tensor.shape)
    print(labels_tensor.shape)
    return data_tensor, labels_tensor



In [30]:
contract_addresses = [
    "0x6e38a457c722c6011b2dfa06d49240e797844d66",
    "0x109c4f2ccc82c4d77bde15f306707320294aea3f",
    "0x793ae8c1b1a160bfc07bfb0d04f85eab1a71f4f2",
    "0x5fe5b7546d1628f7348b023a0393de1fc825a4fd",
    "0xd79b4c6791784184e2755b2fc1659eaab0f80456",
    "0x273930d21e01ee25e4c219b63259d214872220a2",
    "0xd07ce4329b27eb8896c51458468d98a0e4c0394c"
]
create_tensor_inputs('/content/drive/MyDrive/23_FYP_Realtime_Estimation_of_Trustworthiness_of_Decentralized_Applications/models/tr/data_set/',contract_addresses)

torch.Size([7, 108, 11])
torch.Size([7])


(tensor([[[-0.2207, -0.2227, -0.1379,  ..., -0.6683,  3.1237, -0.7103],
          [-0.2207, -0.2227, -0.1379,  ..., -0.6683,  1.1492, -0.7103],
          [-0.2207, -0.2227, -0.1379,  ..., -0.6683,  1.1492, -0.7103],
          ...,
          [-0.2207, -0.2227, -0.1379,  ..., -0.6683, -0.8253, -0.7103],
          [-0.2207, -0.2227, -0.1379,  ..., -0.6683, -0.8253, -0.7103],
          [-0.2207, -0.2227, -0.1379,  ..., -0.6683, -0.8253, -0.7103]],
 
         [[ 3.1714,  3.1648,  1.7379,  ...,  1.3607,  3.1237,  1.4198],
          [ 5.2067,  5.1973,  0.0358,  ...,  1.6143,  1.1492,  1.4198],
          [ 2.8806,  2.8745,  0.1921,  ...,  1.1071,  1.1492,  1.4198],
          ...,
          [-0.2207, -0.2227, -0.1379,  ...,  1.1071, -0.8253,  1.4198],
          [-0.2207, -0.2227, -0.1379,  ...,  1.6143, -0.8253,  1.4198],
          [-0.2207, -0.2227, -0.1379,  ..., -0.1610, -0.8253,  1.2068]],
 
         [[-0.2207, -0.2227, -0.1379,  ...,  1.8679,  1.1492,  1.4198],
          [-0.2207, -0.2227,

In [31]:
class TransactionModality(nn.Module):
    def __init__(
        self,
        device,
        inductor=True,
        embedding_dir='/content/drive/MyDrive/23_FYP_Realtime_Estimation_of_Trustworthiness_of_Decentralized_Applications/models/tr/data_set/',
        model_path='/content/drive/MyDrive/23_FYP_Realtime_Estimation_of_Trustworthiness_of_Decentralized_Applications/models/PonziShield_tr_v1.pth',
        ):
        super(TransactionModality, self).__init__()

        self.device = device
        self.embedding_dir=embedding_dir
        self.model = torch.load(model_path)

    def forward(self, dapp_addresses, train):

        if train==False:
            # do realtime prediction
            return


        # create 3d tensor [dapp_count,sequence_length,features]
        data_tensor, labels_tensor= create_tensor_inputs(self.embedding_dir,dapp_addresses)
        data_tensor = data_tensor.to(self.device)
        results,result_before_bin_classifier = self.model(data_tensor.float())

        # results shape = [dapp_count,1], result_before_bin_classifier = [dapp_count,sequence_length,features]
        return results,result_before_bin_classifier

In [48]:
transactionModality = TransactionModality(device='cpu')
_, outputs_t = transactionModality(contract_addresses, train=True)
outputs_t.shape

torch.Size([7, 108, 11])
torch.Size([7])


torch.Size([7, 108, 11])