### install the required libraries

In [None]:
# pip install transformers datasets

In [1]:
import os

# Disable TensorFlow & Flax backend
os.environ["TRANSFORMERS_NO_TF"] = "1"
os.environ["TRANSFORMERS_NO_FLAX"] = "1"

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from transformers import pipeline

### sentiment analysis

In [5]:
# Force pipeline to use PyTorch ("pt")
classifier = pipeline("sentiment-analysis", framework="pt")

In [6]:
# Test
result = classifier("I really love this app, it's amazing!")

print(result)

[{'label': 'POSITIVE', 'score': 0.9998834133148193}]


In [10]:
test = [ "Hey I really love you" , 
        "Till now I have seen many worst but you even more!",
        "You think you are best.Actually you are right but you are also wrong",
       "I’m not saying I hate you, what I’m saying is that you are literally the Monday of my life."
       ]

In [11]:
for idx in test:
    result = classifier(idx)
    print(result)

[{'label': 'POSITIVE', 'score': 0.9998151659965515}]
[{'label': 'NEGATIVE', 'score': 0.9551741480827332}]
[{'label': 'NEGATIVE', 'score': 0.9951919317245483}]
[{'label': 'POSITIVE', 'score': 0.9960795044898987}]


### summarization

In [13]:
summarizer = pipeline("summarization" , framework='pt')

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


In [14]:
text = """Hugging Face is an open-source platform 
that provides pre-trained models for NLP, CV, and speech tasks. 
It allows researchers and developers to share and use models easily."""

summary = summarizer(text, max_length=40, min_length=10, do_sample=False)

In [15]:
print(summary)

[{'summary_text': ' Hugging Face is an open-source platform that provides pre-trained models for NLP, CV, and speech tasks . It allows researchers and developers to share and use models easily .'}]


In [16]:
print(summary[0]['summary_text'])

 Hugging Face is an open-source platform that provides pre-trained models for NLP, CV, and speech tasks . It allows researchers and developers to share and use models easily .


In [17]:
text = """
Ghattamaneni Mahesh Babu (born 9 August 1975) is an Indian actor, producer and philanthropist who works in Telugu cinema. He is one of the highest-paid actors in Indian cinema and has featured in Forbes India's Celebrity 100 list since 2012.[1][2] He has appeared in over 25 films and is a recipient of several accolades including, nine Nandi Awards, five Filmfare Awards South and four SIIMA Awards.

The younger son of veteran actor Krishna, Mahesh Babu made his debut as a child artist in a cameo role in a Telugu film called Needa (1979), at the age of four. He went on to act as a child artist in the films Sankharavam (1987), Bazaar Rowdy, Mugguru Kodukulu (both 1988) and Gudachari 117 (1989). He played a dual role in the film Koduku Diddina Kapuram (1989). Babu then appeared in Balachandrudu and Anna Thammudu (both 1990). He made his debut as a lead actor with Rajakumarudu (1999) which won him the Nandi Award for Best Male Debut
"""

In [18]:
summary = summarizer(text, max_length=70, min_length=10, do_sample=False)

In [19]:
print(summary[0]['summary_text'])

 Mahesh Babu is one of the highest-paid actors in Indian cinema and has featured in Forbes India's Celebrity 100 list since 2012 . He has appeared in over 25 films and is a recipient of several accolades including, nine Nandi Awards, five Filmfare Awards South and four SIIMA Awards .


### Loading the datasets

In [20]:
from datasets import load_dataset

In [23]:
df = load_dataset("sentence-transformers/stsb" )

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


train-00000-of-00001.parquet:   0%|          | 0.00/471k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


validation-00000-of-00001.parquet:   0%|          | 0.00/142k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


test-00000-of-00001.parquet:   0%|          | 0.00/108k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5749 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1379 [00:00<?, ? examples/s]

In [26]:
df

DatasetDict({
    train: Dataset({
        features: ['sentence1', 'sentence2', 'score'],
        num_rows: 5749
    })
    validation: Dataset({
        features: ['sentence1', 'sentence2', 'score'],
        num_rows: 1500
    })
    test: Dataset({
        features: ['sentence1', 'sentence2', 'score'],
        num_rows: 1379
    })
})

In [28]:
df['train'].features

{'sentence1': Value('string'),
 'sentence2': Value('string'),
 'score': Value('float64')}

In [29]:
import pandas as pd

In [30]:
splits = {'train': 'data/train-00000-of-00001.parquet', 'validation': 'data/validation-00000-of-00001.parquet', 'test': 'data/test-00000-of-00001.parquet'}

df = pd.read_parquet("hf://datasets/sentence-transformers/stsb/" + splits["train"])

In [31]:
df

Unnamed: 0,sentence1,sentence2,score
0,A plane is taking off.,An air plane is taking off.,1.00
1,A man is playing a large flute.,A man is playing a flute.,0.76
2,A man is spreading shreded cheese on a pizza.,A man is spreading shredded cheese on an uncoo...,0.76
3,Three men are playing chess.,Two men are playing chess.,0.52
4,A man is playing the cello.,A man seated is playing the cello.,0.85
...,...,...,...
5744,Severe Gales As Storm Clodagh Hits Britain,Merkel pledges NATO solidarity with Latvia,0.00
5745,Dozens of Egyptians hostages taken by Libyan t...,Egyptian boat crash death toll rises as more b...,0.00
5746,President heading to Bahrain,President Xi: China to continue help to fight ...,0.00
5747,"China, India vow to further bilateral ties",China Scrambles to Reassure Jittery Stock Traders,0.00
