# Readme

This is a baseline model, which only uses SBERT-based semantic search without TFIDF or any preprocessing

# Preparation

In [1]:
from sentence_transformers import SentenceTransformer
import scipy
import os
import pandas as pd
import numpy as np
from stemming.porter2 import stem
import re
import csv

# preprocessing method
def string_tokenise(string):  # return list
    result = re.findall(r"\w+", string)
    return result


def case_fold(list1):  # return list
    result = [word.lower() for word in list1]
    #     string = ' '.join([str(elem) for elem in list1])
    #     result = string.lower().split() #lower() is the same as casefold()
    return result


def stopping(list1):  # return list
    stopfile = open("englishST.txt", 'r')
    stopwords = stopfile.read().split()
    result = [items for items in list1 if items not in stopwords]
    return result


def normalise(list1):  # return list
    result = []
    for item in list1:
        result.append(stem(item))
    return result

# LOAD BERT SENTENCE MODEL
# Load the BERT model. Various models trained on Natural Language Inference (NLI) https://github.com/UKPLab/sentence-transformers/blob/master/docs/pretrained-models/nli-models.md and 
# Semantic Textual Similarity are available https://github.com/UKPLab/sentence-transformers/blob/master/docs/pretrained-models/sts-models.md

model = SentenceTransformer('bert-base-nli-mean-tokens')

# try different encoding and seperate identifiers
def read_csv(filepath):
     if os.path.splitext(filepath)[1] != '.csv':
          return  # or whatever
     seps = [',', ';', '\t']                    # ',' is default
     encodings = [None, 'utf-8', 'ISO-8859-1', 'utf-16','ascii']  # None is default
     for sep in seps:
         for encoding in encodings:
              try:
                  return pd.read_csv(filepath, encoding=encoding, sep=sep)
              except Exception:  # should really be more specific 
                  pass
     raise ValueError("{!r} is has no encoding in {} or seperator in {}"
                      .format(filepath, encodings, seps))

# Set up (and preprocess) and Embedding

In [2]:
# parse the verified claims
vclaims_directory = 'v3.0/verified_claims.docs.tsv'
vclaims_fields = ['vclaim_id', 'vclaim', 'title']
df_vc = pd.read_csv(vclaims_directory, usecols = vclaims_fields, sep = '\t')
vclaims_list = df_vc.vclaim.tolist().copy()
vclaims_list

# prepare queries and put them in a list
tweets_directory = 'v3.0/train/tweets.queries.tsv'
tweets_fields = ['tweet_id', 'tweet_content']
df_t = pd.read_csv(tweets_directory, usecols = tweets_fields, sep = '\t')
tweets_list = df_t.tweet_content.tolist().copy()
tweets_list



['Trump needs to immediately divest from his businesses and comply with the emoluments clause. Iran could threaten Trump hotels *worldwide* and he could provoke war over the loss of revenue from skittish guests.  His business interests should not be driving military decisions. — Ilhan Omar (@IlhanMN) January 6, 2020',
 'A number of fraudulent text messages informing individuals they have been selected for a military draft have circulated throughout the country this week.',
 'Fact check: The U.S. Army is NOT contacting anyone regarding the draft. If you are receiving texts, phone calls or direct messages about a military draft, they are not official communications from the U.S. Army pic.twitter.com/3S32De8ekP — U.S. Army CGSC (@USACGSC) January 8, 2020',
 'The US drone attack on #Soleimani caught on camera.#IranUsapic.twitter.com/TvRkHvlgby — Olaudah Equiano® (@RealOlaudah) January 6, 2020',
 '1. To the dim witted reporters like @dmedin11:  no one said this wasn’t photoshopped.  No one 

In [19]:
# Use SBERT to do sentence embedding
# Each vclaim is encoded as a 1-D vector with 78 columns
vclaim_embeddings = model.encode(vclaims_list)

# print('Sample BERT embedding vector - length', len(vclaim_embeddings[0]))

np.save('01_embeddings.npy', vclaim_embeddings)
print('Sample BERT embedding vector - note includes negative values', vclaim_embeddings[0])

Sample BERT embedding vector - note includes negative values [-0.06839999  0.11646768 -0.37378955  0.27000183  0.37912527 -1.2374749
  0.5467199  -0.64416355  0.01363655 -0.06171849 -0.23101607 -0.4370634
 -0.11530093  0.78729784 -0.53884095  0.30604655 -0.654617   -0.11056169
 -0.00810371 -0.20159982  0.39454025 -0.6258818  -0.50732946 -0.26715767
  0.86845887  0.3733293  -0.40977922 -0.76450676 -1.4386698   0.29790646
 -0.49806467  0.2177606  -0.03685323 -0.16548534 -0.138993    0.7363141
 -1.177447   -0.17506939 -0.21862906 -0.26186585 -0.07425258 -0.25737232
 -0.6412997  -0.24607065 -0.792142   -0.5476829   0.7522482   0.46411932
  0.578255   -0.30182254  0.29880628 -0.02058476 -0.3748311  -0.7059333
  0.07826146 -0.19672419  0.09062672 -1.7489096  -0.27156785 -0.5553281
 -0.6658741   0.36642843  0.52788115  0.60650444  0.26296413 -0.00902202
  0.4893662   0.8471768  -1.2924407  -1.1877214   0.49134386 -0.5718257
 -0.7279378  -0.45531315  0.3747597  -0.25155854 -0.16524307 -0.14369

# PERFORM SEMANTIC SEARCH AND GENERATE RESULTS

In [5]:
# prepare embeddings
copy = np.load('01_embeddings.npy')
vclaim_embeddings = copy.copy() # duplicate embedding data
tweet_embeddings = model.encode(tweets_list)

# prepare submitted file
RankedIROutput = open('01_results.tsv', 'w')
results_fields = ['tweet_id','Q0','vclaim_id','rank','score','tag']
writer = csv.DictWriter(RankedIROutput, fieldnames = results_fields)
writer.writeheader()

# Find the closest 3 sentences of the corpus for each tweet sentence based on cosine similarity
number_top_matches = 1 #@param {type: "number"}

print("Semantic Search Results")

for tweet, tweet_embedding in zip(tweets_list, tweet_embeddings):
    return_data = [] # a dict that contains the info that needed to be reported
                    # in eval. the structure is {'attr1': value1, 'attr2': value2}
    distances = scipy.spatial.distance.cdist([tweet_embedding], vclaim_embeddings, "cosine")[0]

    results = zip(range(len(distances)), distances)
    results = sorted(results, key=lambda x: x[1])

    print("\n\n======================\n\n")
    print("tweet:", tweets_list[tweets_list.index(tweet)]) # for best reading, output orginial text reather than tokens
    print("\nTop 20 most similar vclaim in corpus:")

    rank = 0 # initialize param rank in return_data
    for idx, distance in results[0:number_top_matches]:
        print(vclaims_list[idx].strip(), "(Cosine Score: %.4f)" % (1-distance))
        score = "%.4f" % (1-distance) # an parameter that need to be reported in eval
        
        # write the results to the file
        tweet_id = df_t.loc[df_t['tweet_content'] == tweet,'tweet_id'].item()
        vclaim_id = df_vc.loc[df_vc['vclaim'] == vclaims_list[idx],'vclaim_id'].item()
        rank = rank + 1
        score = score
        tag = 'DC'

        return_data = {'tweet_id':tweet_id, 'Q0':'Q0', 'vclaim_id':vclaim_id, 'rank': '1',
                      'score': score, 'tag': tag}
        # NB: no matter what the real rank is, it should be 1 if you want it to be counted
    
        # write data to the results file
        # results_fields = ['tweet_id','Q0','vclaim_id','rank','score','tag']
        # NO HEADER NEEDED!!
        writer = csv.DictWriter(RankedIROutput, fieldnames = results_fields, delimiter='\t')
        writer.writerow(return_data)
            
RankedIROutput.close()

Semantic Search Results




tweet: Trump needs to immediately divest from his businesses and comply with the emoluments clause. Iran could threaten Trump hotels *worldwide* and he could provoke war over the loss of revenue from skittish guests.  His business interests should not be driving military decisions. — Ilhan Omar (@IlhanMN) January 6, 2020

Top 20 most similar vclaim in corpus:
Donald Trump said that America's economy crashing would lead to riots demanding we make the country great again. (Cosine Score: 0.8107)




tweet: A number of fraudulent text messages informing individuals they have been selected for a military draft have circulated throughout the country this week.

Top 20 most similar vclaim in corpus:
Hillary Clinton's campaign made several payments to a company that destroys sensitive data due to an ongoing investigation into her use of e-mail while she was secretary of state. (Cosine Score: 0.7438)




tweet: Fact check: The U.S. Army is NOT contacting anyone regar





tweet: Holy shit Michael Knowles of Daily Wire just called Greta Thunberg a “mentally ill Swedish child.”@ChristopherHahn tore into him pic.twitter.com/Ki0cK6W3Ev — jordan (@JordanUhl) September 23, 2019

Top 20 most similar vclaim in corpus:
A Fox News guest disparaged teenage climate activist Greta Thunberg by calling her "mentally ill." (Cosine Score: 0.7877)




tweet: Trump visited the US border this week & said rock climbers tested his wall & agreed it cannot be climbed. But Mexicans have turned Trump’s wall into a tourist attraction & are playing a game to see who can climb it the fastest, no ladders or ropes needed. The record is 45 seconds pic.twitter.com/2Xr4rolKk2 — Stone 🥶 (@stonecold2050) September 22, 2019

Top 20 most similar vclaim in corpus:
Images shared widely in September 2019 showed people easily climbing the same fence, or same type of fence, that U.S. President Donald Trump said "can't be climbed." (Cosine Score: 0.7574)




tweet: America has more governors 





tweet: The story by Axios that President Trump wanted to blow up large hurricanes with nuclear weapons prior to reaching shore is ridiculous. I never said this. Just more FAKE NEWS! — Donald J. Trump (@realDonaldTrump) August 26, 2019

Top 20 most similar vclaim in corpus:
Donald Trump suggested he'd use nuclear weapons against Mexico if they refused to build and pay for a wall. (Cosine Score: 0.8411)




tweet: .@AllenFrancesMD, a psychiatrist, tells @Brianstelter: Trump “may be responsible for many more million deaths” than Adolf Hitler, Joseph Stalin, and Mao Zedong.https://t.co/cBUZVQjhNUpic.twitter.com/RVARyeVnVh — Julio Rosas (@Julio_Rosas11) August 25, 2019

Top 20 most similar vclaim in corpus:
A psychiatrist said U.S. President Donald Trump may be responsible for millions more deaths than Hitler, Stalin, and Mao. (Cosine Score: 0.8546)




tweet: fox news is very worried about homeless people laying “in the street having just shot up with marijuana” people forget that weed





tweet: Crazy Bernie Sanders recently equated the City of Baltimore to a THIRD WORLD COUNTRY! Based on that statement, I assume that Bernie must now be labeled a Racist, just as a Republican would if he used that term and standard! The fact is, Baltimore can be brought back, maybe…… — Donald J. Trump (@realDonaldTrump) July 29, 2019

Top 20 most similar vclaim in corpus:
President Donald Trump was using an anti-Semitic slur when he called NBC host Chuck Todd 'sleepy eyes.' (Cosine Score: 0.7881)




tweet: GOP rallies around Trump 9/11 role. “While Obama and Biden were cowering in fear on Air Force 1, Mr. Trump was on the ground with first responders searching for survivors and pulling people to safety,” Jim Jordan says. “I remember seeing him on TV, running toward the danger.” pic.twitter.com/P7ol4wycoF — Dan Lyons (@realdanlyons) July 29, 2019

Top 20 most similar vclaim in corpus:
A video shows an audience member scaring Donald Trump at a rally by screaming 'Allahu Akbar.' (Cosin





tweet: WATCH: Elderly Man Beaten With Crowbar, Another Has Head Split Open By Antifa While Trying To Help ‘Gay Man In Sundress’ https://t.co/slRy6Nzajm — The Daily Wire (@realDailyWire) July 1, 2019

Top 20 most similar vclaim in corpus:
Notorious homophobe and Westboro Baptist Church founder Fred Phelps made a deathbed confession that he was gay. (Cosine Score: 0.7459)




tweet: John Blum, the older man that was bloodied and had his picture go viral, is here in this video before the brawl broke out holding his baton. A black man was able to deescalate a stand off between John and #antifa & other counter protesters. #Milkshake#NoPrideforProudBoyspic.twitter.com/yomI2759Bp — John (@Johnnthelefty) July 2, 2019

Top 20 most similar vclaim in corpus:
A photograph of a 'My Daddy's Life Matters' sign included text explaining why its OK for police officers to shoot black men. (Cosine Score: 0.7522)




tweet: In order to get elected, @BarackObama will start a war with Iran. — Donald J. T





tweet: One of the many terrifying parts of the Georgia abortion bill is that they will prosecute women who leave the state to get an abortion where it is otherwise legal. This law follows people out of the state. This is codifying residents, specifically women, as property of the state. — T-Rex (@PhillyTRex) May 9, 2019

Top 20 most similar vclaim in corpus:
Georgia's House Bill 481 would, when implemented, leave women open to prosecution for criminal abortion, murder, or second-degree murder for having abortions or miscarriages. (Cosine Score: 0.8732)




tweet: Joe Biden told a predominantly black audience in 2012 that Republicans want to “put y’all back in chains!” How do you get more racially divisive that THAT? — Build The Wall (@ditchobama) May 8, 2019

Top 20 most similar vclaim in corpus:
Sen. Joe Biden told an audience which included hundreds of African Americans that Republican economic policies would "put y'all back in chains." (Cosine Score: 0.8153)




tweet: I love th





tweet: In a first, Germany is requesting the removal of the Trump appointed ambassador to Germany, #RichardGrenell. They consider him to stoke racism and fascism on the country. That’s where we’re at, folks. The Germans want the American gone, because he’s a Nazi. — Andrew James Gregor (@andrewjgregor) March 19, 2019

Top 20 most similar vclaim in corpus:
A tweet from U.S. President Donald Trump attacking the press echoes a passage from Hitler's "Mein Kampf." (Cosine Score: 0.8196)




tweet: Our investigators exposed this New York abortion facility, which says they will put a born-alive baby in a jar of “solution” to drown her. They also say to “flush” the baby down the toilet, or “put it in a bag” if she’s born alive. pic.twitter.com/GYxdpyEbDK — Lila Rose (@LilaGraceRose) February 20, 2019

Top 20 most similar vclaim in corpus:
An employee at a women's health center in Bronx, New York, told an undercover pro-life activist she should dispose of a baby born alive after a failed ab





tweet: Large adult son thinks teachers are ‘losers’ pic.twitter.com/oXsSR39kQp — NowThis (@nowthisnews) February 12, 2019

Top 20 most similar vclaim in corpus:
Donald Trump tweeted and deleted a candid, regretful statement in the early hours of New Year's Day. (Cosine Score: 0.7314)




tweet: El Paso was NEVER one of the MOST dangerous cities in the US. We‘ve had a fence for 10 years and it has impacted illegal immigration and curbed criminal activity. It is NOT the sole deterrent. Law enforcement in our community continues to keep us safe #SOTU — Mayor Dee Margo (@mayor_margo) February 6, 2019

Top 20 most similar vclaim in corpus:
A chart documents that nationals from the seven countries included in President Trump's U.S. entry restriction have not killed any American citizens since 1975. (Cosine Score: 0.7087)




tweet: California Newborn becomes first baby to be named an emoji: 😍😍😍 https://t.co/RDCRlsgKPD — Game IU IU (@GameIUIU) January 30, 2019

Top 20 most similar vclaim 





tweet: Do you know why they call it Wall Street in NYC? Because the Dutch literally built a wall to keep the British out of Dutch colonies. You want to know how well walls work? It isn’t called New Amsterdam anymore.https://t.co/VnztJ2jxHU — Ocular Nervosa (@ocularnervosa) January 5, 2019

Top 20 most similar vclaim in corpus:
In the 17th century, New Amsterdam built a protective wall along its northern perimeter (analogous to Trump's border wall) to keep "bad hombres" out, but it failed to achieve its stated purpose in that the British successfully invaded the city by sea. (Cosine Score: 0.7708)




tweet: In the 1640’s the Dutch inhabitants of New Amsterdam built a 12′ wall to keep the bad hombres out. In 1664 the British ignored the wall and took New Amsterdam by sea. It’s now called New York. They took down the wall and built a street. It’s called Wall Street. pic.twitter.com/Cbhp9N8BDW — Joe Delmonaco (@JoeDelmonaco) January 7, 2019

Top 20 most similar vclaim in corpus:
In th





tweet: Lt. Col. Alexander Vindman is still at National Security Council, we are told. Earlier Sunday questions arose about whether Vindman, the NSC’s top Ukrainian expert, was still in his role after Nat Sec Adv O’Brien talked to CBS about a natural rotation of people to and from it. — Jim Acosta (@Acosta) November 11, 2019

Top 20 most similar vclaim in corpus:
In August 2019, workers at the Shell plant in Beaver County, Pennsylvania, were told to either attend a speech by U.S. President Donald Trump or lose pay. (Cosine Score: 0.7435)




tweet: MORE: @mikevolkov20 says, “Vindman is still detailed to NSC. We are not aware of any changes in his status. Obviously any retaliatory action against LTC Vindman on a day when we honor our military heroes would be reprehensible.” #VeteransDay @CBSNews @FaceTheNation — Paula Reid (@PaulaReidCBS) November 11, 2019

Top 20 most similar vclaim in corpus:
Linn County Sheriff Tim Mueller sent a letter to Vice President Biden stating that he woul





tweet: EXCLUSIVE: I have obtained a copy of ⁦@realDonaldTrump⁩’s letter to #Erdogan. ⁦@POTUS⁩ warns him to not “be a tough guy! Don’t be a fool!” Says he could destroy Turkey’s economy if #Syria is not resolved in a humane way. Details tonight at 8pm #TrishRegan #FoxBusiness pic.twitter.com/9BoSGlbRyt — Trish Regan (@trish_regan) October 16, 2019

Top 20 most similar vclaim in corpus:
Mike Huckabee tweeted that the White House Correspondents Dinner 'celebrated bullying' after inviting people to watch his own show unless they lacked a 'sense of humor.' (Cosine Score: 0.7430)




tweet: The look of the White House Italian translator as Trump says President Mozzarella for the Italian President and says U.S. and Italy have been allies since Ancient Rome. Hot fucking damn pic.twitter.com/ecrIaRFvOI — Danny Keetz, Drunk Assistant to the GM (@DKeetz) October 16, 2019

Top 20 most similar vclaim in corpus:
President Trump raised his middle finger to make an obscene gesture at Italian Prime





tweet: These Democrats don't realize that if they impeach Trump and the Senate doesn't confirm it then it nullifies Trump's first term and he gets to run two more times. Read the Constitution, people — Jack Posobiec (@JackPosobiec) October 01, 2019

Top 20 most similar vclaim in corpus:
The U.S. Constitution states that if a president is impeached by the House but not convicted by the Senate, that person's first term is nullified and they are eligible to run for office two more times. (Cosine Score: 0.8535)




tweet: WHO CHANGED THE LONG STANDING WHISTLEBLOWER RULES JUST BEFORE SUBMITTAL OF THE FAKE WHISTLEBLOWER REPORT? DRAIN THE SWAMP! — Donald J. Trump (@realDonaldTrump) September 30, 2019

Top 20 most similar vclaim in corpus:
In June 2018, the actor Peter Fonda suggested in a tweet that Barron Trump should be kidnapped, as a protest against the president's immigration policy. (Cosine Score: 0.7479)




tweet: In an effort to evade the police, a motorcyclist jumps train tracks





tweet: “giving Americans small doses of socialism until they suddenly awake to find they have communism.”-Nakita Khrushchev pic.twitter.com/mQVfhYvaNo — Susan Minor (@susanminor41) April 14, 2016

Top 20 most similar vclaim in corpus:
Soviet leader Nikita Khrushchev said: "We cannot expect the Americans to jump from capitalism to communism, but we can assist their elected leaders in giving Americans small doses of socialism, until they suddenly awake to find they have communism." (Cosine Score: 0.7490)




tweet: We cannot expect the Americans to jump from capitalism to communism, but we can assist their elected leaders in giving Americans small doses of socialism, until they suddenly awake to find they have communism.” — Nikita Khrushchev — Michael Reagan (@ReaganWorld) February 18, 2018

Top 20 most similar vclaim in corpus:
Soviet leader Nikita Khrushchev said: "We cannot expect the Americans to jump from capitalism to communism, but we can assist their elected leaders in giving





tweet: Sen. McConnell casts vote as man behind him gives thumbs down at @bellarmineU. @courierjournal@ALTONPHOTO#kysenpic.twitter.com/EFAqxMfbMd — Michael Clevenger (@MClevenger_CJ) November 4, 2014

Top 20 most similar vclaim in corpus:
During the RNC, Donald Trump sent a tweet about his running mate describing him as 'boring Mike Pence.' (Cosine Score: 0.6907)




tweet: This guy voting behind Mitch McConnell who wants the world to know how he feels about Mitch McConnell: pic.twitter.com/aP7m7dblLW — NowThis (@nowthisnews) November 4, 2014

Top 20 most similar vclaim in corpus:
During the RNC, Donald Trump sent a tweet about his running mate describing him as 'boring Mike Pence.' (Cosine Score: 0.7176)




tweet: We made the wrong call today by using a disabled parking spot for our live shot in TX. There’s no excuse. I apologize to the two people who were on their way to vote as it’s one of the most important parts of our democracy; we’d never want to jeopardize anyone’s ability 





tweet: Found on Craigslist: “We need numerous people to stand in line for Trump Rally, Erie Insurance Arena. $100 per person. Wednesday, October 10 only, from 9:00 AM to 4:00 PM.” — Mollie Katzen (@MollieKatzen) October 10, 2018

Top 20 most similar vclaim in corpus:
Organizers working for billionaire George Soros ran Craigslist ads offering $300 each to individuals participating in the 24 March 2018 'March for Our Lives' protests. (Cosine Score: 0.7813)




tweet: VA is now NOT allowing visitors to wear a tampon or menstrual cup during visits….this is way over the line….. pic.twitter.com/9JxzWrT6cS — PrisonReformMovement (@PrisonReformMvt) September 23, 2018

Top 20 most similar vclaim in corpus:
Visitors to prisons in Virginia are prohibited from wearing tampons or menstrual cups. (Cosine Score: 0.8122)




tweet: The Dow has dropped 1,377 points in the last two days. Oh @realDonaldTrump, your self from six years ago would like a word with you. Alternatively, you could save us al





tweet: Received this for my 88-year-old grandma. Says it’s a summons from Travis County, but is actually asking for money for @tedcruz . Did your campaign authorize this? Is this even legal? Shame on you. That’s one more @BetoORourke voter. pic.twitter.com/NcFoOCvjFj — Sean Owen (@sean_r_owen) September 16, 2018

Top 20 most similar vclaim in corpus:
President Trump's older sister, federal judge Maryanne Trump Barry, said Trump was called 'Donnie Dimwit' as a child, and is now 'acting like a nutjob.' (Cosine Score: 0.7477)




tweet: Outside of a possible DTPA (civil) violation. The slimy Ted Cruz mailers may actually be criminal! Texas Penal Code 32.48 states that you can not falsely send a “Summons” in order to induce someone to take an action. pic.twitter.com/nstNhNWmWd — Gene Wu (@GeneforTexas) September 17, 2018

Top 20 most similar vclaim in corpus:
President Trump signed a bill blocking Obama-era background checks that would have made it easier to flag mentally ill potential





tweet: Direct from your mouth, “If soldiers were real patriots, they will not take a pay raise” Wow🤔 How unthoughtful coming from a 5 Time draft dodger who was scared to wear the uniform 😡😡😡 — John Hobbs Jr (@HobbsAnimations) August 15, 2018

Top 20 most similar vclaim in corpus:
Donald Trump criticized President Barack Obama during the 2013 government shutdown for failing to 'lead,' 'get everyone in a room,' and 'make a deal.' (Cosine Score: 0.7731)




tweet: Trump: If soldiers were ‘real patriots,’ they wouldn’t take a pay raise https://t.co/hMjdWnPvQopic.twitter.com/5C0rzOdMwZ — Tommy Christopher (@tommyxtopher) August 14, 2018

Top 20 most similar vclaim in corpus:
In a speech to U.S. military personnel, President Trump said if soldiers were real patriots, they wouldn't take a pay raise. (Cosine Score: 0.8269)




tweet: ANIMATOR OF “My Little Pony” TOM WYSOM, HOLLYWEIRD #pedophile RECIVES MILD SENTENCE FOR OVER 3,000 IMAGES OF CHILDREN, SOME WITH HANDS AND FEET TIED! #SICK #p





tweet: Sad to report this update: Today, after 25 years as the editorial cartoonist for the Pittsburgh Post-Gazette, I was fired. — Rob Rogers (@Rob_Rogers) June 14, 2018

Top 20 most similar vclaim in corpus:
Comedy Central fired Michelle Wolf and issued an apology after the comedian's appearance at the 2018 White House Correspondents Dinner. (Cosine Score: 0.7877)




tweet: “The Trump Organization declared that the new Trump Tower Pyongyang would be “the finest mixed-use real-estate venture in the world” https://t.co/AgCZhdBiPBpic.twitter.com/UTvjYaWzA9 — Niewidzialna Ręka™ (@Reka_Niewidka) June 12, 2018

Top 20 most similar vclaim in corpus:
A photograph shows a sign reading 'Trump Made America the Best Country in the Nation.' (Cosine Score: 0.7613)




tweet: They already have a design for Trump Tower Pyongyang… #TrumpKimSummit #FuckTrump pic.twitter.com/G3kmLRy4pk — Blocked By Trump (@trump_blocked) June 13, 2018

Top 20 most similar vclaim in corpus:
Donald Trump boasted in 





tweet: There have been AT LEAST 288 school shootings in the United States since January 1, 2009, according to @cnn data review. Internationally, there have been at least 27 school shootings in select countries listed below. @cnnbrk pic.twitter.com/bmUGN4mz5z — AnneClaire Stapleton (@AnneClaireCNN) May 21, 2018

Top 20 most similar vclaim in corpus:
The United States had 251 mass shootings during the first 216 days of 2019. (Cosine Score: 0.7822)




tweet: A friend kindly alerted me to a very sweet moment we missed yesterday – the Queen travelling with Meghan’s beagle guy spotted by the talented Lainey Gossip https://madaboutmeghan.blogspot.con/2018/05/live-blog-day-before-wedding.html pic.twitter.com/zpQbtg8YeQ — Mad About Meghan (@MadAboutMeghan) May 18, 2018

Top 20 most similar vclaim in corpus:
In a 2 January 2018 Twitter thread, Chelsea Clinton wished the Church of Satan a Happy New Year. (Cosine Score: 0.7420)




tweet: President Trump during California #SanctuaryCities Rou





tweet: IM IN THE HOSPITAL! I WAS ATTACKED 2NITE! A MAN OPENED MY CAR DOOR & STABBED ME W SOMETHING! PLEASE SAY PRAYERS 4 US! 🙏🏼🙏🏼 THANK GOD IT WAS ONLY MYSELF & MY SECURITY IN THE CAR, WHEN 3 MEN APPROACHED! WHILE SECURITY WAS DISTRACTED, W A GUY A CAR PULLED UP & ATTACKED! I’M OK! pic.twitter.com/TZ0ppZeEWN — Corey Feldman (@Corey_Feldman) March 28, 2018

Top 20 most similar vclaim in corpus:
Former Navy SEAL Chris Heben was shot following a parking lot altercation and drove himself to the hospital after pursuing his assailant[s]. (Cosine Score: 0.7839)




tweet: @LAPD R CURRENTLY INVESTIGATING THE CASE AS AN ATTEMPTED HOMICIDE! I HAVE HAD MOUNTING THREATS ON ALL SM PLATFORMS BY THIS VILE “WOLFPACK” & THIS IM SURE IS A RESULT OF THOSE NEGATIVE ACTIONS! I HAVE REASON 2 BELIEVE ITS ALL CONNECTED! ENOUGH IS ENOUGH! HOW SICK R THESE PPL?!? — Corey Feldman (@Corey_Feldman) March 28, 2018

Top 20 most similar vclaim in corpus:
Sen. Tim Kaine responded to news of a machete attack at Ohi





tweet: Source: Marjory Stoneman Douglas High School shooter, now in police custody after Parkland, Florida shooting, who has been identified as 19-year-old Nicolas Cruz – is a “Dreamer”. — Craig R. Brittain for US Senate (R-AZ 2018) 🆒 (@SenatorBrittain) February 14, 2018

Top 20 most similar vclaim in corpus:
CNN asked a survivor of the 14 February 2018 mass shooting in Parkland, Florida to deliver scripted remarks during a televised town hall. (Cosine Score: 0.7876)




tweet: A girl with Down syndrome, got up during a regular papal service & went toward @Pontifex Security men quickly moved in 2 take her back to her mom. The Pope stopped them & said to her “come sit next to me.” She then sat down near him and he continued his homily, holding her hands pic.twitter.com/2QziweDRPX — TradCatFem🌹🙏🌹 (@TradCatFem) February 11, 2018

Top 20 most similar vclaim in corpus:
A bride who requests the theme from Robin Hood: Prince of Thieves as her processional music is shocked to find herself 





tweet: There is nothing that I would want more for our Country than true FREEDOM OF THE PRESS. The fact is that the Press is FREE to write and say anything it wants, but much of what it says is FAKE NEWS, pushing a political agenda or just plain trying to hurt people. HONESTY WINS! — Donald J. Trump (@realDonaldTrump) August 16, 2018I  mean, he’s literally just translating Mein Kampf at this point. pic.twitter.com/Gddw4WqjDT — Jules Suzdaltsev (@jules_su) August 16, 2018

Top 20 most similar vclaim in corpus:
Buzz Aldrin said that his greatest achievement was successfully restraining himself from punching President Trump. (Cosine Score: 0.7570)




tweet: I just got back from the border.  This week, Trump blew up a sacred Native American hill on public land to build his racist wall.  Watch my new update and join the growing movement to stop the wall. #NoBorderWall pic.twitter.com/YgmS94vAwO — Raul M. Grijalva (@RepRaulGrijalva) February 9, 2020

Top 20 most similar vclaim in corpus





tweet: Yesterday we participated in a ceremony at @ArlingtonNatl to honor the sacrifices of soldiers during #WWI by laying a wreath on the tomb of the #UnknownSoldier. #VeteransDay pic.twitter.com/ZQPbZwMSf6 — French Embassy U.S. (@franceintheus) November 12, 2018

Top 20 most similar vclaim in corpus:
Rep. Drew Ferguson posted a picture of Nazi soldiers on Twitter on the anniversary of the Allied invasion of Normandy. (Cosine Score: 0.7551)




tweet: Border Patrol Officer killed at Southern Border, another badly hurt. We will seek out and bring to justice those responsible. We will, and must, build the Wall! — Donald J. Trump (@realDonaldTrump) November 20, 2017

Top 20 most similar vclaim in corpus:
President Donald Trump's website contains a release about a plan to 'fuck the poor.' (Cosine Score: 0.7563)




tweet: This is my picture of Ata Kandó , she became 101 years old in The Netherlands and died of old age. It was made in Bergen. This is not Rose Malinger. Do not use my ph





tweet: Hi Antonio thanks for sharing. This is our entrance floor, contrary to the internets belief this isn’t designed to stop people running through it. It’s designed to show what can be achieved with tiles. It also is completely flat and only really comes to life through the lens. — Casa Ceramica (@casaceramica) July 4, 2018

Top 20 most similar vclaim in corpus:
Speaker of the House Nancy Pelosi said: 'Just because someone is here illegally doesn't mean they broke any of our laws.' (Cosine Score: 0.6616)




tweet: Before y’all pearl clutch about the sudden unraveling of our society, I’ll note that the owner of a cookie store called “Crumb and Get It” refused service to Joe Biden in 2012 in Radford, also a smallish college town in VA— and then basked in GOP praise. They closed in 2016. https://t.co/MGpWbR2Y2V — Doug Landry (@dougblandry) June 23, 2018

Top 20 most similar vclaim in corpus:
Vice President Joe Biden was refused service at the Crumb and Get It cookie shop in 2012. 





tweet: It is with a heavy heart that we announce the passing of Deputy Marshall Peterson, a 28-year veteran of the Broward Sheriff’s Office, Department of Detention. Deputy Peterson, 53, died at his residence. Thank you for your service, Deputy. pic.twitter.com/ytEwtINkWp — Broward Sheriff (@browardsheriff) April 25, 2018

Top 20 most similar vclaim in corpus:
In March 2005, investigators discovered the corpse of Michael Jackson buried at his Neverland Ranch, more than four years before he was reported dead. (Cosine Score: 0.7640)




tweet: “When the aging gorilla is confronted with the much more virile, new alpha-male, he shows submissiveness by grooming the alpha-male, but the gesture is actually a vain attempt by the old gorilla to humiliate his much younger rival.” — Jane Goodall pic.twitter.com/fx85I1KwVy — Mrs. Betty Bowers (@BettyBowers) April 24, 2018

Top 20 most similar vclaim in corpus:
Photograph shows a drunk gorilla about to punch a photographer. (Cosine Score: 0.728





tweet: No Clinton Foundation funds—dedicated to Haiti or otherwise—were used to pay for Chelsea’s wedding. It’s not only untrue, it’s a personal insult to me, to Hillary, and to Chelsea and Marc. https://t.co/YEHqqYrsxW — Bill Clinton (@BillClinton) January 13, 2018

Top 20 most similar vclaim in corpus:
Donald Trump's nominee for Secretary of Education, billionaire Betsy DeVos, has no education degree or teaching experience, has never attended a public school or sent her children to one, and supports the funding of for-profit Christian schools over public ones; she got the job because she donated $9.5 million to Trump's campaign. (Cosine Score: 0.8064)




tweet: Jan 12, 1958: Rainmaker Charles Hatfield died in #EagleRock. In 1904, LA paid him for 18″ of rain. @LATimesPhotos: https://t.co/DPdQqE8Dhq pic.twitter.com/JmRszVFc4H — LAhistory (@LAhistory) January 12, 2017

Top 20 most similar vclaim in corpus:
A photograph shows former president Barack Obama visiting Arlington National





tweet: In Portugal, with no net neutrality, internet providers are starting to split the net into packages. pic.twitter.com/TlLYGezmv6 — Ro Khanna (@RoKhanna) October 27, 2017

Top 20 most similar vclaim in corpus:
Facebook (or its algorithms) flagged the Declaration of Independence as 'hate speech' in July 2018. (Cosine Score: 0.7610)




tweet: The dude attacking my newspaper over the Moore story can’t remember how many purple hearts he supposedly has. pic.twitter.com/4iW4FqQR4U — Dave Weigel (@daveweigel) November 10, 2017

Top 20 most similar vclaim in corpus:
The comedian and actor Tim Allen wrote a lengthy Facebook post that attacked liberals and Democratic politicians and was shared widely in August 2019. (Cosine Score: 0.7652)




tweet: “It is historical to be the first robot in the world to be recognized with citizenship.” Please welcome the newest Saudi: Sophia. #FII2017pic.twitter.com/bsv5LmKwlf — CIC Saudi Arabia (@CICSaudi) October 25, 2017

Top 20 most similar vclaim





tweet: DIRECTV is letting us cancel NFL pkg. had to threaten to go with cable. Took half hour of arguing with them. They are getting lots of calls. — MB Taurus (@HeirloomCore) September 25, 2017

Top 20 most similar vclaim in corpus:
President Bush misspoke at a right-to-life rally and repeatedly said ‘feces’ instead of ‘fetus.’ (Cosine Score: 0.7901)




tweet: “NoRefund from #DirectTV so I cancelled ” Not Paying to watch Kneelers Disrespect Our Flag & Country. #WednesdayThoughts #WednesdayWisdom pic.twitter.com/Wtl3Ugo66M — TresDeplorable (@TresDeplorable) September 27, 2017

Top 20 most similar vclaim in corpus:
Mark Zuckerberg admitted or bragged that Facebook banned pro-life advertising, in particular, during Ireland's 2018 abortion-referendum campaign. (Cosine Score: 0.8138)




tweet: President should not be telling the Washington Redskins to change their name-our country has far bigger problems! FOCUS on them,not nonsense — Donald J. Trump (@realDonaldTrump) October 8, 2013





tweet: We are 99% confident that this is a completely false stunt by DS derps. Still not anyone taking credit, despite feelers. — Anonymous (@YourAnonNews) August 14, 2017

Top 20 most similar vclaim in corpus:
Speaker Paul Ryan said that he doesn't care if Donald 'Trump destroys the United States and that he is a liar and a fraud.' (Cosine Score: 0.7435)




tweet: That it was truly awful. ? We immediately removed the sign from the display as soon as we found out the store location. -Vik — Walmart (@Walmart) August 9, 2017

Top 20 most similar vclaim in corpus:
All U.S. stores will stop accepting coupons as of 1 July 2014 due to ‘Extreme Couponing’ fraud. (Cosine Score: 0.8409)




tweet: Michael, this is completely false. Starbucks is not sponsoring any such event. Where did you get this image? — Starbucks Coffee (@Starbucks) August 4, 2017

Top 20 most similar vclaim in corpus:
The arts and crafts chain Hobby Lobby does not sell Jewish holiday items at any of their stores. (Cosi





tweet: Yes that’s Cape Town. But tornado ain’t local. It’s from Texas, and got inserted with some CGI #CapeTownStorm — RyGuySA (@RyGuySA) June 7, 2017

Top 20 most similar vclaim in corpus:
CNN mislabeled the state of Alabama as Mississippi on a weather map of Hurricane Dorian. (Cosine Score: 0.7335)




tweet: Can anyone confirm this video? Tornado In #CapeTown #capestorm pic.twitter.com/eAx4zZPzkV — Maghdie Fife (@MagZaz5) June 7, 2017

Top 20 most similar vclaim in corpus:
A photograph shows Daytona International Speedway flooded during Hurricane Matthew in October 2016. (Cosine Score: 0.7177)




tweet: Ted Cruz: “Southern States Should Build Their Own Nuclear Weapon To Protect Their Beliefs” https://t.co/dul6fjlHGL — InxsySparxs (@InxsyS) June 6, 2017

Top 20 most similar vclaim in corpus:
Sen. Ted Cruz said Southern states should build a nuclear arsenal to "protect their beliefs." (Cosine Score: 0.9052)




tweet: AWWWW HELL NO! Obama’s Burial Plot Is A Massive Waste Of Taxpa





tweet: President Clinton fired his FBI director on July 19th, 1993, The Day before Vince Foster was found dead in Marcy Park. #ThursdayThoughts pic.twitter.com/wgbv9lAhDV — ?STOCK MONSTER? (@StockMonsterUSA) May 11, 2017

Top 20 most similar vclaim in corpus:
President Bill Clinton firing of FBI Director William Sessions was linked to Deputy White House Counsel Vince Foster's death a day later. (Cosine Score: 0.8643)




tweet: CBO has scored #AHCA twice. #facts March 13: https://t.co/fqlMGmPVdz March 23: https://t.co/OAJ4M7lhEP — AshLee Strong (@AshLeeStrong) May 6, 2017

Top 20 most similar vclaim in corpus:
Photographs of Renee Zellweger taken in October 2014 are real and undoctored. (Cosine Score: 0.7071)




tweet: ABORTIONS NOT ENOUGH FOR THE LEFT? Bill Nye: Should parents be penalized for “extra kids”? @infowars @DailyCaller https://t.co/nz9D9y41Ut — Liberty News 1776 (@LibertyNews1776) April 27, 2017

Top 20 most similar vclaim in corpus:
Macy's pulled funding from Planned 





tweet: Secret Knights Templar caves beneath Shropshire field - picS and video « Shropshire Star https://t.co/b0PSlpzNuk via @ShropshireStar — Mary Elizabeth Carey (@MaryCarey9) March 9, 2017

Top 20 most similar vclaim in corpus:
Wikileaks released a trove of "deep state files" in late December 2017. (Cosine Score: 0.6964)




tweet: free cone day on march 20th @ dairy queen ? — amy? (@ayeeeamyy) March 1, 2017

Top 20 most similar vclaim in corpus:
Dairy Queen is hosting 'Free Cone Day' on 20 March 2017. (Cosine Score: 0.9042)




tweet: rt to save a life you can get a free soft serve cone at dairy queen on march 20th I AM READY 4 THIS — kace (@faceofkace) March 8, 2017

Top 20 most similar vclaim in corpus:
Dairy Queen is hosting 'Free Cone Day' on 20 March 2017. (Cosine Score: 0.8586)




tweet: Donald Trump officially names the Obamacare replacement ‘World’s Greatest Healthcare Plan of 2017’ pic.twitter.com/HPEeCoD0zD — Richard Southern (@richard680news) March 8, 2017

Top 20 mo





tweet: #UnholyTrinity insistence to monitor Putin call was due to concern POTUS hasn’t told full story about Putin attempts at blackmail. — Rogue POTUS Staff (@RoguePOTUSStaff) January 28, 2017

Top 20 most similar vclaim in corpus:
That Hillary Clinton opposed sanctions against Russia because of speaking fees her husband received from a Russian bank, and that the Clinton campaign used undue influence to kill a Bloomberg story about the incident. (Cosine Score: 0.7758)




tweet: .@SeanSpicer‘s role in the Trump administration will be to provide the American public with robust and clearly articulated misinformation. pic.twitter.com/IlRkZPbDZl — The Onion (@TheOnion) January 29, 2017 You nailed it. Period! https://t.co/AUmS1C222b — Sean Spicer (@seanspicer) January 29, 2017 

Top 20 most similar vclaim in corpus:
Fox News reported that U.S. Rep. Ilhan Omar had delivered a 'profanity-laced attack on President Trump' by repeating Trump's own words. (Cosine Score: 0.8056)




tweet: Ca





tweet: Carl Paladino made racist, ugly, reprehensible remarks about the President & First Lady. My wish this season is for unity. Full statement pic.twitter.com/ZHgsrHmHbp — Andrew Cuomo (@NYGovCuomo) December 23, 2016

Top 20 most similar vclaim in corpus:
House Minority Leader Nancy Pelosi tweeted that she was disgusted with 'President' Trump after the passage of a tax reform bill. (Cosine Score: 0.7919)




tweet: Drunk Man Has Sex With Snowman; Loses Penis To Frostbite https://t.co/j98WaBYi2f — Mwic Robinson (@mwic255) December 8, 2016

Top 20 most similar vclaim in corpus:
An habitual alcohol abuser lost his penis to frostbite after a drunken sexual encounter with a snowman. (Cosine Score: 0.8860)




tweet: Pressure on Kuwait to move event to Trump hotel a ‘paradigm impeachable offense’ https://t.co/OvK4CBDURr pic.twitter.com/NBjcijb4Ua — Judd Legum (@JuddLegum) December 20, 2016

Top 20 most similar vclaim in corpus:
Speaking at the University of California Berkeley in May 2





tweet: Criminalizing flag-burning: Totally criminal when by proposed by Trump (2016) yet somehow totally cool when proposed by Hillary (2005). — John Schindler (@20committee) November 29, 2016

Top 20 most similar vclaim in corpus:
New York Times columnist hailed the election of Donald Trump as a 'complete repudiation of Barack Obama.' (Cosine Score: 0.7918)




tweet: Delivering supplies to #standingrock protest camp could warrant $1,000 fine #Humanrights #INDIGENOUS #environment https://t.co/nSFFliOP7R pic.twitter.com/wPBRz8932S — Dr. Frank Wilson (@DrFrankWilson) November 30, 2016

Top 20 most similar vclaim in corpus:
Anyone bringing supplies to DAPL protesters at the Standing Rock Indian Reservation could be fined $1,000 for the attempt, according to the Morton County Sheriff's Department. (Cosine Score: 0.7774)




tweet: The Sioux are literally being forced at gunpoint to accept ecological risks that North Dakota’s white residents refused. — terrycloth warrior (@suntzufuntzu





tweet: Lmfao soooooo did those Obama phones really get cut off?? This kid just posted his phone stopped working & he still had 500mins left?☠️? — BLM✨ (@blmmxo_) November 9, 2016

Top 20 most similar vclaim in corpus:
A Facebook page is giving away hundreds of PlayStation 4 or PlayStation 5 consoles because they have been unsealed and cannot be sold. (Cosine Score: 0.7776)




tweet: eric trump deleted his violation of new york state law, so here it is for posterity pic.twitter.com/Y0ss1Ew5Wc — noah kulwin (@nkulw) November 8, 2016

Top 20 most similar vclaim in corpus:
After the 2019 El Paso shooting, President Trump deleted tweets that referred to immigrants as 'invaders.' (Cosine Score: 0.8439)




tweet: Computer scientists have apparently uncovered a covert server linking the Trump Organization to a Russian-based bank. pic.twitter.com/8f8n9xMzUU — Hillary Clinton (@HillaryClinton) November 1, 2016

Top 20 most similar vclaim in corpus:
Experts confirmed a computer server linke





tweet: I hope Obama serious about this hot food with ebt thing ?? — NickiM? (@Lilonikaa) October 19, 2016

Top 20 most similar vclaim in corpus:
Olive Garden restaurants are 'funding Trump's re-election in 2020.' (Cosine Score: 0.7197)




tweet: Once again hillary proves she can't handle classified information. Gives away America's nuclear response time to the World. What an idiot! — OneUSMarineRet (@MasterGuns1313) October 20, 2016

Top 20 most similar vclaim in corpus:
Major General Paul Eaton said President Trump's decertification of an existing nuclear deal with Iran deal 'dishonors America.' (Cosine Score: 0.8522)




tweet: When the president gives the order to launch a nuclear weapon, that’s it. The officer has to launch. It can take as little as four minutes. — Hillary Clinton (@HillaryClinton) October 20, 2016

Top 20 most similar vclaim in corpus:
Hillary Clinton revealed a classified response window of "four minutes" for a U.S. president to launch nuclear weapons. (Cosi





tweet: ?‼️?? #USA, #NewYork: Welfare Scammers - Muslims caught buying Soda with Food Stamps to sell at their store. More: https://t.co/74O8qmkW3npic.twitter.com/YfqYBpxIJJ— Onlinemagazin (@OnlineMagazin) September 30, 2016

Top 20 most similar vclaim in corpus:
A video depicts a Muslim woman fraudulently using food stamps to purchase soda to resell at her store. (Cosine Score: 0.8225)




tweet: UN Demands U.S. Pay Reparations To Blacks... https://t.co/qDMzdmzx88— DRUDGE REPORT (@DRUDGE_REPORT) September 28, 2016

Top 20 most similar vclaim in corpus:
A resolution passed in March 2016 forces residents of a Texas county to pay reparations to black citizens. (Cosine Score: 0.7976)




tweet: Outrageous! The UN Has Decided The U.S. Owes Reparations To Black People For A History Of 'Racial Terrorism'  https://t.co/ZAOj0fGrJw— RedFlagNews.com (@redflagnews) September 28, 2016

Top 20 most similar vclaim in corpus:
Miller County Judge Roy John McNatt wrote a racist memo about black peopl





tweet: During a recent interview with Australian journalist John Pilger, Assange said that Bernie Sanders was forced to... https://t.co/rxYa2FZIZ8 — Jonathan Henderson (@Dagan81) September 9, 2016

Top 20 most similar vclaim in corpus:
Matt Damon announced in March 2018 that he was moving his family to 'a safe place' in Australia because he was fed up with President Donald Trump and his policies. (Cosine Score: 0.6926)




tweet: #BreakingNews: We’re launching an exciting new savoury #condom range – Eggplant flavour! ? #CondomEmojipic.twitter.com/idA07EaiXC — Durex Global (@durex) September 5, 2016

Top 20 most similar vclaim in corpus:
The "Tide pod challenge" is a real viral phenomenon whereby people bite into Tide brand laundry detergent pods. (Cosine Score: 0.7139)




tweet: Phyllis Schlafly said “We’ll have a woman President over my dead body.” God accepts the deal. https://t.co/5lDfLA23Qq — Randi Rhodes (@RandiRhodes) September 6, 2016

Top 20 most similar vclaim in corpus:


In [44]:
df_t.loc[df_t['tweet_content'] == 'NYPD just came to my house bc Rutgers Police told them i’m a threat based on political statements i’ve made on campus and on twitter. — Kevin Allred (@KevinAllred) November 16, 2016','tweet_id'].item()

896