In [4]:
import spacy
from spacy.tokens import Span
from spacy.matcher import PhraseMatcher

nlp = spacy.load("en_core_web_sm")

animals = ["Golden Retriever", "cat", "turtle", "Rattus norvegicus"]
matcher = PhraseMatcher(nlp.vocab)

In [36]:
def animal_component(doc):
    animals = ["Golden Retriever", "cat", "turtle", "Rattus norvegicus"]
    matcher = PhraseMatcher(nlp.vocab)
    
    matches = matcher(doc)
    span = [Span(doc, start, end, label="ANIMAL") for match_id, start, end in matches]
    doc.ents = span
    return doc

In [37]:
global nlp
nlp.add_pipe(animal_component, after=True)

ValueError: [E001] No component 'True' found in pipeline. Available names: ['tagger', 'parser', 'ner']

In [5]:
def get_entities(doc):
    return [(ent.text, ent.label_) for ent in doc.ents]

def get_countries(doc):
    return [(ent.text, ent.label_) for ent in doc.ents if ent.label_ == 'GPE']

def get_persons(doc):
    return [(ent.text, ent.label_) for ent in doc.ents if ent.label_ == 'PERSON']


In [91]:
TEXT = """
@VirginAmerica to jump into the Dallas-Austin market - @Dallas_News http://t.co/EwwGi97gdx
"""
doc = nlp(TEXT)

print('Entities:   ', get_entities(doc))
print('Countries   :',  get_countries(doc))
print('PERSON   :',  get_persons(doc))

Entities:    [('Dallas', 'GPE')]
Countries   : [('Dallas', 'GPE')]
PERSON   : []


In [14]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")

In [15]:
results = classifier(["We are very happy to show you the 🤗 Transformers library", "We hope you don't hate it."])

for result in results:
    print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

label: POSITIVE, with score: 0.9998
label: NEGATIVE, with score: 0.5309


In [7]:
import csv

p = '../dataset/datasets_73978_166769_fb_sentiment.csv'
tweets = []
with open(p, newline='') as f:
    spamreader = csv.DictReader(f)
    for row in spamreader:
        tweets.append(row["FBPost"])
        
tweets[0]

'Drug Runners and  a U.S. Senator have something to do with the Murder http://www.amazon.com/Circumstantial-Evidence-Getting-Florida-Bozarth-ebook/dp/B004FPZ452/ref=pd_rhf_p_t_1 The State Attorney Knows... NOW So Will You. GET Ypur Copy TODAY'

In [16]:
doc = nlp(tweets[1])
print(doc)
print('Entities:   ', get_entities(doc))
print('Countries   :',  get_countries(doc))
print('PERSON   :',  get_persons(doc))

results = classifier(doc.text)
print("RESULT:  ", results)

Heres a single, to add, to Kindle. Just read this 19th century story: "The Ghost of Round Island". Its about a man (French/American Indian) and his dog sled transporting a woman across the ice, from Mackinac Island to Cheboygan - and the ghost that...
Entities:    [('this 19th century', 'DATE'), ('The Ghost of Round Island', 'WORK_OF_ART'), ('French', 'NORP'), ('Mackinac Island', 'GPE'), ('Cheboygan', 'GPE')]
Countries   : [('Mackinac Island', 'GPE'), ('Cheboygan', 'GPE')]
PERSON   : []
RESULT:   [{'label': 'NEGATIVE', 'score': 0.9485653638839722}]


In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [3]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc")

classes = ["not paraphrase", "is paraphrase"]
sequence_0 = "The company HuggingFace is based in New York City"
sequence_1 = "Apples are especially bad for your health"
sequence_2 = "HuggingFace's headquarters are situated in Manhattan"

paraphrase = tokenizer(sequence_0, sequence_2, return_tensors="pt")
no_paraphrase = tokenizer(sequence_0, sequence_1, return_tensors="pt")

paraphrase_classification_logits = model(**paraphrase).logits
no_paraphrase_classification_logits = model(**no_paraphrase).logits

paraphrase_results = torch.softmax(paraphrase_classification_logits, dim=1).tolist()
no_paraphrase_results = torch.softmax(no_paraphrase_classification_logits, dim=1).tolist()

for i in range(len(classes)):
    print(f"{classes[i]}: {int(round(paraphrase_results[i] * 100))}%")

AttributeError: 'tuple' object has no attribute 'logits'