In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import spacy
nlp = spacy.load("en_core_web_lg")

from tqdm import tqdm
tqdm.pandas()   

from src import config
from src.datasets import TextConcatFactCheck, TextConcatPosts
from src.utils import cleaning_spacy, cleaning_spacy_batch

tasks_path = config.TASKS_PATH
posts_path = config.POSTS_PATH
fact_checks_path = config.FACT_CHECKS_PATH
gs_path = config.GS_PATH
lang = 'tha'
task_name = "monolingual"

print("Loading Fact Checks...")
fc = TextConcatFactCheck(fact_checks_path, tasks_path=tasks_path, task_name=task_name, lang=lang, version="english", clean=True)
fc_orig = TextConcatFactCheck(fact_checks_path, tasks_path=tasks_path, task_name=task_name, lang=lang, version="original", clean=True)
print("Loading Fact Checks (English + Clean)...")
# fc_eng = TextConcatFactCheck(fact_checks_path, tasks_path=tasks_path, task_name=task_name, lang=lang, version="english", cleaning_function=lambda x: cleaning_spacy_batch(x, nlp))

print("Loading Posts...")
posts = TextConcatPosts(posts_path, tasks_path=tasks_path, task_name=task_name, lang=lang, gs_path=gs_path, version="english", clean=True)
posts_orig = TextConcatPosts(posts_path, tasks_path=tasks_path, task_name=task_name, lang=lang, gs_path=gs_path, version="original", clean=True)

  from .autonotebook import tqdm as notebook_tqdm


Loading Fact Checks...
Loading Fact Checks (English + Clean)...
Loading Posts...


In [12]:
df_dev = posts.df_dev
df_dev_orig = posts_orig.df_dev
df_fc = fc.df

In [7]:
from src.models import IEModel
model_name = "en_core_web_trf"
model = IEModel(model_name, df_fc)

  model.load_state_dict(torch.load(filelike, map_location=device))


In [None]:
df_dev_trial = df_dev.iloc[:10]
df_dev_trial_orig = df_dev_orig.iloc[:10]

for i, row in df_dev_trial.iterrows():
    print(row['full_text'])
    print()




[]

#. "Mexican MPs" naked during parliamentary sessions and told the meeting that... [URL] ------------------ ‚ÄúYou guys are embarrassed to see me undressed. But you are not ashamed to see people without clothes to wear. Barefoot, suffering, unemployment and starvation as you stole their money.‚Äù
[[193572, 193573, 193574, 193575, 193576, 193577, 193578, 193579, 193564, 193565], [193572, 193573, 193574, 193575, 193576, 193577, 193578, 193579, 193564, 193565], [193572, 193573, 193574, 193575, 193576, 193577, 193578, 193579, 193564, 193565], [193572, 193573, 193574, 193575, 193576, 193577, 193578, 193579, 193564, 193565], [193572, 193573, 193574, 193575, 193576, 193577, 193578, 193579, 193564, 193565], [193572, 193573, 193574, 193575, 193576, 193577, 193578, 193579, 193564, 193565], [193572, 193573, 193574, 193575, 193576, 193577, 193578, 193579, 193564, 193565], [193572, 193573, 193574, 193575, 193576, 193577, 193578, 193579, 193564, 193565], [193570, 193572, 193573, 193574, 193575, 

In [11]:
model.encode(df_dev["full_text"].values)

[['GLONG',
  'live',
  'king',
  'ask',
  'million',
  'like',
  'support',
  'General',
  'Prayut',
  'Chan',
  'Ocha',
  'Prime',
  'Minister',
  '5',
  'hour',
  'thank',
  'Mater',
  'Dei',
  'School',
  'Mater',
  'Dei',
  'School',
  'year',
  'meeting',
  'dissatisfied',
  'leave',
  'like',
  'page',
  'school',
  'study'],
 ['mexican',
  'mp',
  'naked',
  'parliamentary',
  'session',
  'tell',
  'meeting',
  'guy',
  'embarrassed',
  'undressed',
  'ashamed',
  'people',
  'clothe',
  'wear',
  'barefoot',
  'suffering',
  'unemployment',
  'starvation',
  'steal',
  'money'],
 ['whathappensinthailand',
  'oh',
  'bow',
  'dog',
  'loyalty',
  'dog',
  'invent',
  'people',
  'pay',
  'respect',
  'hahaha',
  'thailandonly',
  'Thailand'],
 ['kidney',
  'stone',
  'dissolve',
  'recipe',
  'eat',
  '3',
  'pineapple',
  'core',
  'day',
  'spin',
  'pair',
  'basil',
  'eat',
  'pain',
  'go',
  '1',
  'group',
  'white',
  'liquor',
  'add',
  '1',
  'lemon',
  'eat',
  'be

In [3]:
df_dev_orig

Unnamed: 0_level_0,ocr,verdicts,text,lan,fb,tw,ig,full_text,gs
post_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1059,GLONG LIVES KING ‡∏Ç‡∏≠‡∏•‡πâ‡∏≤‡∏ô Like ‡∏™‡∏ô‡∏±‡∏ö‡∏™‡∏ô‡∏∏‡∏ô‡πÉ‡∏´‡πâ ‡∏û‡∏•‡πÄ‡∏≠‡∏Å...,Partly false information,,tha,1,0,0,GLONG LIVES KING ‡∏Ç‡∏≠‡∏•‡πâ‡∏≤‡∏ô Like ‡∏™‡∏ô‡∏±‡∏ö‡∏™‡∏ô‡∏∏‡∏ô‡πÉ‡∏´‡πâ ‡∏û‡∏•‡πÄ‡∏≠‡∏Å...,[193600]
3053,,Partly false information,"#. ""‡∏™‡∏™.‡πÄ‡∏°‡πá‡∏Å‡∏ã‡∏¥‡∏Å‡∏±‡∏ô"" ‡πÅ‡∏Å‡πâ‡∏ú‡πâ‡∏≤‡∏Ç‡∏ì‡∏∞‡∏õ‡∏£‡∏∞‡∏ä‡∏∏‡∏°‡∏™‡∏†‡∏≤ ‡πÅ‡∏•‡∏∞‡∏ö‡∏≠‡∏Å‡∏Å‡∏±‡∏ö...",tha,1,0,0,"#. ""‡∏™‡∏™.‡πÄ‡∏°‡πá‡∏Å‡∏ã‡∏¥‡∏Å‡∏±‡∏ô"" ‡πÅ‡∏Å‡πâ‡∏ú‡πâ‡∏≤‡∏Ç‡∏ì‡∏∞‡∏õ‡∏£‡∏∞‡∏ä‡∏∏‡∏°‡∏™‡∏†‡∏≤ ‡πÅ‡∏•‡∏∞‡∏ö‡∏≠‡∏Å‡∏Å‡∏±...",[193329]
3854,,,#whathappensinthailand #‡πÇ‡∏≠‡πâ‡∏ß‡∏Å‡∏£‡∏≤‡∏ö‡∏´‡∏°‡∏≤‡∏Ñ‡∏∑‡∏≠‡∏Ñ‡∏ß‡∏≤‡∏°‡∏à‡∏á‡∏£‡∏±...,tha,1,0,0,#whathappensinthailand #‡πÇ‡∏≠‡πâ‡∏ß‡∏Å‡∏£‡∏≤‡∏ö‡∏´‡∏°‡∏≤‡∏Ñ‡∏∑‡∏≠‡∏Ñ‡∏ß‡∏≤‡∏°‡∏à‡∏á‡∏£...,[193410]
4034,,Partly false information.,#‡∏™‡∏π‡∏ï‡∏£‡∏•‡∏∞‡∏•‡∏≤‡∏¢‡∏ô‡∏¥‡πà‡∏ß‡πÉ‡∏ô‡πÑ‡∏ï - ‡∏Å‡∏¥‡∏ô‡πÅ‡∏Å‡∏ô‡∏™‡∏±‡∏ö‡∏õ‡∏∞‡∏£‡∏î‡∏ß‡∏±‡∏ô‡∏•‡∏∞ 3 ‡πÅ‡∏Å‡∏ô ...,tha,1,0,0,#‡∏™‡∏π‡∏ï‡∏£‡∏•‡∏∞‡∏•‡∏≤‡∏¢‡∏ô‡∏¥‡πà‡∏ß‡πÉ‡∏ô‡πÑ‡∏ï - ‡∏Å‡∏¥‡∏ô‡πÅ‡∏Å‡∏ô‡∏™‡∏±‡∏ö‡∏õ‡∏∞‡∏£‡∏î‡∏ß‡∏±‡∏ô‡∏•‡∏∞ 3 ‡πÅ‡∏Å‡∏ô...,[193261]
4040,,False information,#‡∏≠‡∏¢‡πà‡∏≤‡∏ï‡∏Å‡πÉ‡∏à‡πÑ‡∏õ‡∏ï‡∏≤‡∏°‡∏ä‡∏∑‡πà‡∏≠ ‡πÄ‡∏ä‡∏∑‡πâ‡∏≠‡∏ó‡∏µ‡πà‡∏£‡∏±‡∏ö‡πÄ‡∏Ç‡πâ‡∏≤‡πÑ‡∏õ ‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πà ‡πÄ‡∏û...,tha,1,0,0,#‡∏≠‡∏¢‡πà‡∏≤‡∏ï‡∏Å‡πÉ‡∏à‡πÑ‡∏õ‡∏ï‡∏≤‡∏°‡∏ä‡∏∑‡πà‡∏≠ ‡πÄ‡∏ä‡∏∑‡πâ‡∏≠‡∏ó‡∏µ‡πà‡∏£‡∏±‡∏ö‡πÄ‡∏Ç‡πâ‡∏≤‡πÑ‡∏õ ‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πà ‡πÄ...,[193313]
4041,,False information,#‡∏≠‡∏¢‡πà‡∏≤‡∏ï‡∏Å‡πÉ‡∏à‡πÑ‡∏õ‡∏ï‡∏≤‡∏°‡∏ä‡∏∑‡πà‡∏≠ ‡πÄ‡∏ä‡∏∑‡πâ‡∏≠‡∏ó‡∏µ‡πà‡∏£‡∏±‡∏ö‡πÄ‡∏Ç‡πâ‡∏≤‡πÑ‡∏õ ‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πà ‡πÄ‡∏û...,tha,1,0,0,#‡∏≠‡∏¢‡πà‡∏≤‡∏ï‡∏Å‡πÉ‡∏à‡πÑ‡∏õ‡∏ï‡∏≤‡∏°‡∏ä‡∏∑‡πà‡∏≠ ‡πÄ‡∏ä‡∏∑‡πâ‡∏≠‡∏ó‡∏µ‡πà‡∏£‡∏±‡∏ö‡πÄ‡∏Ç‡πâ‡∏≤‡πÑ‡∏õ ‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πà ‡πÄ...,[193313]
4049,‡πÄ‡∏ã‡πá‡∏ô‡∏ó‡∏£‡∏±‡∏•‡∏•‡∏≤‡∏î‡∏û‡∏£‡πâ‡∏≤‡∏ß ‡∏ú‡∏π‡πâ‡∏´‡∏ç‡∏¥‡∏á‡πÄ‡∏î‡∏¥‡∏ô ‡∏ä‡πâ‡∏≠‡∏õ‡∏õ‡∏¥‡πâ‡∏á ‡πÇ‡∏î‡∏ô‡∏ó‡∏≥‡∏£‡πâ‡∏≤...,Partly false information,#‡πÇ‡∏ä‡∏Ñ‡∏î‡∏µ‡∏ô‡∏∞‡πÄ‡∏£‡∏≤‡πÑ‡∏°‡πà‡∏°‡∏µ‡∏ó‡∏≠‡∏á‡πÉ‡∏™‡πàüò¨üò¨ ‡∏ó‡∏≠‡∏á‡πÅ‡∏û‡∏á‡πÜ‡πÄ‡∏≠‡∏≤‡πÑ‡∏ß‡πâ‡πÇ‡∏£‡∏á‡∏à‡∏≥‡∏ô‡∏≥‡πÄ...,tha,1,0,0,‡πÄ‡∏ã‡πá‡∏ô‡∏ó‡∏£‡∏±‡∏•‡∏•‡∏≤‡∏î‡∏û‡∏£‡πâ‡∏≤‡∏ß ‡∏ú‡∏π‡πâ‡∏´‡∏ç‡∏¥‡∏á‡πÄ‡∏î‡∏¥‡∏ô ‡∏ä‡πâ‡∏≠‡∏õ‡∏õ‡∏¥‡πâ‡∏á ‡πÇ‡∏î‡∏ô‡∏ó‡∏≥‡∏£‡πâ‡∏≤...,[193594]
4128,/fbq) San Diego Lab Discovers COVID-19 Vaccine...,Partly false information,(‡∏Ç‡πà‡∏≤‡∏ß)‡∏Ç‡πà‡∏≤‡∏ß‡∏î‡πà‡∏ß‡∏ô!!! ‡∏ó‡∏µ‡πà‡∏≠‡πÄ‡∏°‡∏£‡∏¥‡∏Å‡∏≤‡∏Ñ‡πâ‡∏ô‡∏û‡∏ö‡∏ß‡∏±‡∏Ñ‡∏ã‡∏µ‡∏ô‡∏™‡∏≤‡∏°‡∏≤‡∏£‡∏ñ‡∏£...,tha,1,0,0,/fbq) San Diego Lab Discovers COVID-19 Vaccine...,[193349]
4405,,,.. ‡∏ù‡∏±‡πà‡∏á‡∏ò‡∏ô‡∏Ø(‡∏ö‡∏≤‡∏á‡πÅ‡∏Ñ) ‡∏°‡∏µ‡πÅ‡∏ï‡πà‡∏Ñ‡∏ô‡∏î‡πà‡∏≤ ‡∏û‡∏ô‡∏á.‡∏ï‡∏±‡∏î‡∏´‡∏ç‡πâ‡∏≤ ‡∏´‡∏≤‡∏ß‡πà‡∏≤...,tha,0,1,0,. ‡∏ù‡∏±‡πà‡∏á‡∏ò‡∏ô‡∏Ø(‡∏ö‡∏≤‡∏á‡πÅ‡∏Ñ) ‡∏°‡∏µ‡πÅ‡∏ï‡πà‡∏Ñ‡∏ô‡∏î‡πà‡∏≤ ‡∏û‡∏ô‡∏á.‡∏ï‡∏±‡∏î‡∏´‡∏ç‡πâ‡∏≤ ‡∏´‡∏≤‡∏ß‡πà‡∏≤...,[193451]
4667,,False information,5G ‡∏à‡∏∞‡πÄ‡∏õ‡πá‡∏ô‡∏à‡∏∏‡∏î‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô‡∏Ç‡∏≠‡∏á‡∏ù‡∏±‡∏ô‡∏£‡πâ‡∏≤‡∏¢‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏ù‡∏π‡∏á‡∏ä‡∏ô‡∏ß‡∏±‡∏Ñ‡∏ã‡∏µ...,tha,1,0,0,5G ‡∏à‡∏∞‡πÄ‡∏õ‡πá‡∏ô‡∏à‡∏∏‡∏î‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô‡∏Ç‡∏≠‡∏á‡∏ù‡∏±‡∏ô‡∏£‡πâ‡∏≤‡∏¢‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏ù‡∏π‡∏á‡∏ä‡∏ô‡∏ß‡∏±‡∏Ñ‡∏ã...,[193487]
