In [1]:
# installing necessary libraries

In [4]:
!pip install torch



In [5]:
!pip install --upgrade torch



In [7]:
!pip install transformers=="4.11.1"  # Known compatible version [5]



In [8]:
!pip install datasets evaluate transformers[sentencepiece]



In [5]:
!pip install sacremoses



In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from transformers import pipeline
from transformers import AutoTokenizer
from transformers import AutoModel


  LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'


# SENTIMENT ANALYSIS

In [3]:
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

In [4]:
# cardiffnlp/twitter-roberta-base-sentiment model is being used.

In [7]:
# Analyzing tweets

In [8]:
sentiment = pipeline(
    task="text-classification",
    model="cardiffnlp/twitter-roberta-base-sentiment"
)

tweet = "I love the new features! 😍"
sentiment(tweet)

[{'label': 'LABEL_2', 'score': 0.9917186498641968}]

# TOPIC CLASSIFICATION

In [9]:
# zero-shot classification

In [6]:
classifier = pipeline(
    task="zero-shot-classification",
    model="facebook/bart-large-mnli")
classifier(
    "I love Canada, oh the beautiful maple leaf on the flag",
    candidate_labels=["Country"],
)

{'sequence': 'I love Canada, oh the beautiful maple leaf on the flag',
 'labels': ['Country'],
 'scores': [0.9876495599746704]}

# TEXT GENERATOR

In [5]:
generator = pipeline('text-generation', model = 'gpt2')
generator("Hello, I'm a language model", max_length = 30, num_return_sequences=3)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "Hello, I'm a language modeler so I know what to do and how to use data with it. How to use it and what data to"},
 {'generated_text': "Hello, I'm a language modeler, so this is pretty complicated, but I'm really happy to present you the code.\n\nclass Program"},
 {'generated_text': "Hello, I'm a language model student at the University of California. So I've started writing my first language design documentation, so I should be able"}]

# NAMED ENTITY RECOGNITION

In [9]:
nlp = pipeline("ner", model="Jean-Baptiste/camembert-ner", grouped_entities=True)
eg = "Deepseek is the new competitor for OpenAI's ChatGPT."

ner_results = nlp(eg)
print(ner_results)

[{'entity_group': 'ORG', 'score': 0.8332773, 'word': 'Deepseek', 'start': 0, 'end': 8}, {'entity_group': 'MISC', 'score': 0.9667167, 'word': "OpenAI's ChatGPT", 'start': 34, 'end': 51}]


# Q/A

In [6]:
qa = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
qa(
    question="Which lake is one of the five Great Lakes of North America?",
    context="Lake Ontario is one of the five Great Lakes of North America. It is surrounded on the north, west, and southwest by the Canadian province of Ontario, and on the south and east by the U.S. state of New York, whose water boundaries, along the international border, meet in the middle of the lake.",
)

{'score': 0.9834370017051697, 'start': 0, 'end': 12, 'answer': 'Lake Ontario'}

# TEXT SUMMARIZATION

In [7]:
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6",  max_length=59)
summarizer(
    """
    Canada is the second-largest country in the world by land area, known for its vast natural landscapes, multicultural population,
    and high quality of life. Located in North America, it shares the longest international border with the United States. 
    Canada is made up of ten provinces and three territories, with Ottawa as its capital and major cities 
    like Toronto, Vancouver, and Montreal serving as economic and cultural hubs. 
"""
)

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

[{'summary_text': ' Canada is the second-largest country in the world by land area . Located in North America, it shares the longest international border with the United States . Canada is made up of ten provinces and three territories, with Ottawa as its capital and major cities like Toronto, Vancouver, and Montreal'}]

# LANGUAGE TRANSLATION

In [8]:
translator = pipeline("translation_en_to_fr", model="t5-small")
print(translator("Toronto is my favourite city", max_length=40))

Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/231M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.27k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/773k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.32M [00:00<?, ?B/s]

[{'translation_text': 'Toronto est ma ville préférée'}]
