In [1]:
# Transformer experiments
# Inspiration from Huggingface
# February 2024. Sila

In [6]:
from transformers import pipeline

summarizer = pipeline("summarization", "sshleifer/distilbart-cnn-12-6")

In [8]:
text_doc = r"""
Former President Donald Trump said he is going to have a big victory in the Republican South Carolina presidential primary Saturday, sending a signal to President Joe Biden that his campaign will be coming at him "like a freight train.
"We're going to have a gigantic victory here in South Carolina,
 Trump, the front-runner for the GOP nomination, told a raucous crowd Friday at the Winthrop Coliseum in Rock Hill,
  South Carolina at a rally that aired live on Newsmax and simulcast on the Newsmax2 online streaming platform.
  We're going to show crooked Joe Biden and the radical left Democrats that we are coming like a freight train in November.
Trump then took aim at his lone remaining opponent, former South Carolina Gov. Nikki Haley, who was the U.N. ambassador
in the first year of his administration. The FiveThirtyEight average of polling shows Trump with a 63.6% to 32.9% lead over
Nikki Haley.
Trump then tol the black community: Our message to the black community in this election will be a very simple one: If you want strong borders, safe neighborhoods,
rising wages, good jobs, great education and the return of the American dream then congratulations you are a Republican!
Based on the fact that District Attorney Fani Willis and her Lover were together long prior to the
 filing date of their Fake Lawsuit against me and many other innocent people, despite their sworn testimony to the contrary,
 this case must be determined as OVER and, of no further force or effect. Among other things,
  in close coordination and conjunction with the DOJ and White House (numerous 8-hour meetings between the Biden
  people and them in D.C.!), this case was all about stealing close to $1 Million Dollars for Lover Wade,
   and Election Interference, whereby a vicious and heinous attack is made on Crooked Joe Biden’s Political Opponent.
    This has never happened in the U.S.A., it is the “stuff” of Third World Countries and Banana Republics!
"""

summarizer(text_doc, max_length = int(0.2 * len(text_doc)))



[{'summary_text': ' The FiveThirtyEight average of polling shows Trump with a 63.6% to 32.9% lead over Nikki Haley . Trump: "We\'re going to show crooked Joe Biden and the radical left Democrats that we are coming like a freight train in November" Trump: If you want strong borders, safe neighborhoods, good jobs, great education and the return of the American dream then congratulations you are a Republican!'}]

In [14]:
summarizer(text_doc, min_length=5, max_length=100)

[{'summary_text': ' The FiveThirtyEight average of polling shows Trump with a 63.6% to 32.9% lead over Nikki Haley . Trump: "We\'re going to have a gigantic victory here in South Carolina"'}]

In [17]:
# Tokenizers

#Huggingface:
#Like other neural networks, Transformer models can’t process raw text directly, so the first step of our pipeline is to convert the text inputs into numbers that the model can make sense of. To do this we use a tokenizer, which will be responsible for:

 #   Splitting the input into words, subwords, or symbols (like punctuation) that are called tokens
 #   Mapping each token to an integer
 #   Adding additional inputs that may be useful to the model

In [18]:
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [19]:
raw_inputs = [
    "I've wanted to go to France my whole life.",
    "I hate this so much!",
]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
print(inputs)

{'input_ids': tensor([[ 101, 1045, 1005, 2310, 2359, 2000, 2175, 2000, 2605, 2026, 2878, 2166,
         1012,  102],
        [ 101, 1045, 5223, 2023, 2061, 2172,  999,  102,    0,    0,    0,    0,
            0,    0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]])}
