# Load Models

In [1]:
!gdown --id 1Nrj3gi-ENBwsEqjsGB68pWGrQI8fFWC6

Downloading...
From: https://drive.google.com/uc?id=1Nrj3gi-ENBwsEqjsGB68pWGrQI8fFWC6
To: /content/DEMO.zip
100% 215M/215M [00:02<00:00, 94.7MB/s]


In [2]:
!unzip DEMO.zip

Archive:  DEMO.zip
   creating: DEMO/
  inflating: DEMO/fake_news_of_train.pkl  
  inflating: DEMO/OCSVM.pkl          
  inflating: DEMO/kmeans_15.pkl      
  inflating: DEMO/kmeans_14.pkl      
  inflating: DEMO/kmeans_10.pkl      
  inflating: DEMO/kmeans_11.pkl      
  inflating: DEMO/kmeans_12.pkl      
  inflating: DEMO/kmeans_13.pkl      
  inflating: DEMO/Encoder_Bi-Vae-Density.zip  


In [3]:
!unzip DEMO/Encoder_Bi-Vae-Density.zip

Archive:  DEMO/Encoder_Bi-Vae-Density.zip
   creating: Encoder_Bi-Vae-Density/
  inflating: Encoder_Bi-Vae-Density/saved_model.pb  
  inflating: Encoder_Bi-Vae-Density/keras_metadata.pb  
   creating: Encoder_Bi-Vae-Density/assets/
   creating: Encoder_Bi-Vae-Density/variables/
  inflating: Encoder_Bi-Vae-Density/variables/variables.index  
  inflating: Encoder_Bi-Vae-Density/variables/variables.data-00000-of-00001  


## Load Bi-VAE-Density

In [4]:
import tensorflow as tf

bi_vae_density = tf.keras.models.load_model('Encoder_Bi-Vae-Density')



## Load Kmeans

In [5]:
import pickle

l_kmeans = []

for i in range(6):
  with open('DEMO/kmeans_1' + str(i) + '.pkl', 'rb') as file:
    l_kmeans.append(pickle.load(file))

## Load fake news of train

In [6]:
import pandas as pd
import numpy as np
df_fk_train = pd.read_pickle('DEMO/fake_news_of_train.pkl')
df_train = np.array(df_fk_train['DistilBERT Multilingua'].to_list())

## Load OCSVM

In [7]:
with open('DEMO/OCSVM.pkl', 'rb') as file:
  ocsvm = pickle.load(file)

# DBERTML Embeddings (USE GPU)

In [8]:
!pip install sentence-transformers==1.0.4 #version used in the fake news collections

Collecting sentence-transformers==1.0.4
  Downloading sentence-transformers-1.0.4.tar.gz (74 kB)
[K     |████████████████████████████████| 74 kB 2.1 MB/s 
[?25hCollecting transformers<5.0.0,>=3.1.0
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 16.1 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 47.0 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 20.9 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 33.5 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.1.2-py3-none-any.whl (59 kB)
[K     |

In [9]:
from sentence_transformers import SentenceTransformer

In [10]:
def sentence_embedding(txts):

  model = SentenceTransformer('distiluse-base-multilingual-cased')

  sentences =[]

  for txt in txts:
    txt.replace('\\\\t', ' ')
    txt.replace('\\\\r', ' ')
    txt.replace('\\\\n',' ')
    txt.replace('\\t', ' ')
    txt.replace('\\r', ' ')
    txt.replace('\\n',' ')
    txt.replace('\t', ' ')
    txt.replace('\r', ' ')
    txt.replace('\n',' ')
    sentences.append(txt)

  sentence_embeddings = model.encode(sentences)

  return sentence_embeddings 

# Density Information

In [28]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score
import numpy as np 

def return_density_inf(l_kmeans, df_train, df_new):
    l_x_new = []

    len_train = len(df_train)
    len_new = len(df_new)

    for kmeans in l_kmeans:
        x_new_temp = []

        dfs = np.concatenate([df_train, df_new])
        labels = np.concatenate([kmeans.labels_, kmeans.predict(df_new.astype('float64'))])
          
        silho = silhouette_samples(dfs, labels).reshape(len_train + len_new, 1)
        l_x_new.append(silho[len_train:])

    return np.concatenate(l_x_new, axis=1)

# DEMO

In [33]:
inputs = [
          'this is a real news',
          'essa é uma noticia falsa sobre o bolsonaro',
          'essa aqui é outra noticia falsa que fala sobre politica'
]

In [34]:
embeddings_DBERTML = sentence_embedding(inputs)

In [35]:
densities = return_density_inf(l_kmeans, df_train, np.array(embeddings_DBERTML))

In [36]:
embeddings_BiVae, _, _ = bi_vae_density.predict([embeddings_DBERTML,densities]) 

In [37]:
predictions = ocsvm.predict(embeddings_BiVae)

**1 for fake news and -1 for real news**

In [38]:
predictions

array([-1,  1,  1])