# Relaiable Users Neural Network

In [None]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

In [None]:
!pip install transformers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!ls "/content/drive/MyDrive/Colab Notebooks"

In [None]:
from numpy.random import seed
from numpy.random import randint
import torch
import numpy as np
# seed random number generator
seed(1)
values = randint(0, 2, 10)
print(values)
labels = torch.from_numpy(values)
print(labels)

In [None]:
pRoot = "drive/MyDrive/Colab Notebooks/"
pData = pRoot + "dataset/"
pModels = pRoot + "models/"
pTweet = pData + "tweet/"
users_id = []
fin = open(pData + "users_fake_news.txt", "r")
for line in fin.readlines():
    users_id.append(line.rstrip("\n"))
users_id = users_id[1:11]
#print(users_id)
print(len(users_id))

In [None]:
from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained(
    'distilbert-base-multilingual-cased',
    do_lower_case=False
)

In [None]:
def preprocess_sentences(input_ids, attention_masks, sentences):
  for sent in sentences:
    encoded_dict = tokenizer.encode_plus(
        sent,
        add_special_tokens = True,
        max_length = max_len,
        # pad_to_max_length = True,
        padding = 'max_length',
        return_attention_mask = True,
        return_tensors = 'pt'
    )
    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])

  input_ids = torch.cat(input_ids, dim=0)
  attention_masks = torch.cat(attention_masks, dim=0)
  return input_ids, attention_masks

In [None]:
def load_model(modelname):
  loaded_model = DistilBertForSequenceClassification.from_pretrained(
    pModels+modelname, output_hidden_states = True)
  return loaded_model.cuda()

Read the first 100 tweets of each user and transform them into tensors

In [None]:
import pandas as pd
from transformers import DistilBertForSequenceClassification
import gc

# traits_big5 = ['O','C','E','A','N']
# traits_bhv = ['SD', 'ST', 'HE', 'AC', 'PO', 'SE', 'CO', 'TR', 'BE', 'UN']
'''
trait = "UN"
max_len = 256 # the closest power of two exceeding max len found
model = load_model("distil_"+trait)

for user in users_id:   
  input_ids = []
  attention_masks = []
  df = pd.read_csv(pTweet + str(user) + ".csv")
  # print(df.head())
  df = df[:50]
  df['text']= df['text'].astype('str')
  sentences  = df.text.values
  input_ids, attention_masks = preprocess_sentences(
      input_ids, attention_masks, sentences)
  input_ids = input_ids.to(device)
  attention_masks = attention_masks.to(device)
  pt_output = model(input_ids, attention_mask=attention_masks)

  token_embeddings = torch.stack(pt_output.hidden_states, dim=0)
  # print(token_embeddings.size())
  last_layer = token_embeddings[-1]
  last_layer = last_layer.permute(1,0,2) 
  # print(last_layer[0].size())
  result = last_layer[0].cpu().detach().numpy()
  res = np.asarray(result)
  np.savetxt(pData+"user_tensor/"+str(user)+"_"+trait+".csv", res, delimiter=",", fmt='%5.5f')
  del input_ids
  del attention_masks
  del pt_output
  del df
  del last_layer
  del result
  del res
  torch.cuda.empty_cache()
  gc.collect()

print("done")
'''

Prepare data into input tensors

In [None]:
traits_big5 = ['O','C','E','A','N']
traits_bhv = ['SD', 'ST', 'HE', 'AC', 'PO', 'SE', 'CO', 'TR', 'BE', 'UN']
flag_tot = 0
for user in users_id[:5]:
  flag = 0
  for trait in traits_big5:
    torch_in = torch.from_numpy(
        np.loadtxt(pData+"user_tensor/"+str(user)+"_"+trait+".csv",
        delimiter=","))
    if flag == 0 :
      b5 = torch_in
      flag = 1
    else:
      b5 = torch.cat((b5, torch_in), dim=1)
  #print(b5.size())
  flag = 0
  for trait in traits_bhv:
    torch_in = torch.from_numpy(
        np.loadtxt(pData+"user_tensor/"+str(user)+"_"+trait+".csv",
        delimiter=","))
    if flag == 0 :
      bhv = torch_in
      flag = 1
    else:
      bhv = torch.cat((bhv, torch_in), dim=1)
  #print(bhv.size())
  if flag_tot == 0:
    big5_tot = b5
    bhv_tot = bhv
    flag_tot = 1
  else:
    big5_tot = torch.cat((big5_tot, b5), dim=0)
    bhv_tot = torch.cat((bhv_tot, bhv), dim=0)

print(big5_tot.size())
print(bhv_tot.size())

In [None]:
big5_size = list(big5_tot.size())
big5_tot = torch.reshape(big5_tot, (int(big5_size[0]/50), 50, big5_size[1]))
#print(big5_tot)
print(big5_tot[0].size())
bhv_size = list(bhv_tot.size())
bhv_tot = torch.reshape(bhv_tot, (int(bhv_size[0]/50), 50, bhv_size[1]))
#print(big5_tot)
print(bhv_tot[0].size())

configure the RUNN architecture

In [None]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F

class RUNN(nn.Module):

  def __init__(
      self,
      big5_in = 768*5,
      bhv_in = 768*10
      ):
        super().__init__()
        self.shrink_big5 = nn.Linear(big5_in, 768) # trasformare in sequential 
        # più relu e batch norm ad ogni livello
        self.shrink_bhv = nn.Linear(bhv_in, 768)
        self.conv1 = nn.Conv1d(in_channels=50, out_channels=1, kernel_size=3, stride=1)
        self.ll = nn.Linear(1534, 1)


  def forward(self, big5, bhv):
    # aggiungere relu e batch normalization
    sb5 = self.shrink_big5(big5) # (50,(768*5) = (50, 768)
    sbhv = self.shrink_bhv(bhv) # (50,(768*10) = (50, 768)
    print(sb5.size())
    print(sbhv.size())
    x = torch.cat((sb5, sbhv), dim=1)
    x = x.unsqueeze(0)
    x = self.conv1(x)
    print("size dopo conv1", x.size())
    x = self.ll(x)
    print("size dopo ll", x.size())
    print("x",x)
    y = torch.squeeze(x)
    print("y", y)
    return y
  


train and test the model

In [None]:
model = RUNN()
y = model.forward(big5_tot[0].float(), bhv_tot[0].float())
y_pred = torch.sigmoid(y)
y_pred_tag = torch.round(y_pred)
print(y_pred, y_pred_tag)