In [26]:
!pip install 'transformers[torch]'



In [27]:
!pip install transformers datasets



In [28]:
!pip install accelerate



In [29]:
import torch
from transformers import pipeline
from transformers import BertTokenizer, AutoModel, AutoModelForSequenceClassification,BertForNextSentencePrediction

##  Masked Language Modeling(MLM)

In [30]:
fill_mask_pipeline = pipeline(
    task="fill-mask",
    model="google-bert/bert-base-uncased",
    torch_dtype=torch.float16,
    device=0
)
fill_mask_pipeline("Plants create [MASK] through a process known as photosynthesis.")

Some weights of the model checkpoint at google-bert/bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


[{'score': 0.152099609375,
  'token': 2943,
  'token_str': 'energy',
  'sequence': 'plants create energy through a process known as photosynthesis.'},
 {'score': 0.1451416015625,
  'token': 4870,
  'token_str': 'flowers',
  'sequence': 'plants create flowers through a process known as photosynthesis.'},
 {'score': 0.08203125,
  'token': 9325,
  'token_str': 'sunlight',
  'sequence': 'plants create sunlight through a process known as photosynthesis.'},
 {'score': 0.04290771484375,
  'token': 18670,
  'token_str': 'algae',
  'sequence': 'plants create algae through a process known as photosynthesis.'},
 {'score': 0.037567138671875,
  'token': 12649,
  'token_str': 'atp',
  'sequence': 'plants create atp through a process known as photosynthesis.'}]

## sentiment-analysis task

In [31]:
classifier=pipeline(
    task="sentiment-analysis",
    model="nlptown/bert-base-multilingual-uncased-sentiment",

    )

Device set to use cpu


In [32]:
results = classifier("this movie is not bad")
print(results)

[{'label': '4 stars', 'score': 0.405004620552063}]


## named entity recognation (NER)

In [33]:
ner=pipeline(
    task="ner",
    model="dslim/bert-base-NER",
    grouped_entities=True,
    )

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


In [34]:
result=ner("Steve Jobs introduced iPhone at the Macworld Conference in San Francisco, California, UnitedStates")
print(result)

[{'entity_group': 'PER', 'score': np.float32(0.9983712), 'word': 'Steve Jobs', 'start': 0, 'end': 10}, {'entity_group': 'MISC', 'score': np.float32(0.99365836), 'word': 'iPhone', 'start': 22, 'end': 28}, {'entity_group': 'MISC', 'score': np.float32(0.99538404), 'word': 'Macworld Conference', 'start': 36, 'end': 55}, {'entity_group': 'LOC', 'score': np.float32(0.99930763), 'word': 'San Francisco', 'start': 59, 'end': 72}, {'entity_group': 'LOC', 'score': np.float32(0.9994547), 'word': 'California', 'start': 74, 'end': 84}, {'entity_group': 'ORG', 'score': np.float32(0.99548966), 'word': 'UnitedStates', 'start': 86, 'end': 98}]


## Question Answering

In [35]:
qa=pipeline(
    task="question-answering",
    model="distilbert-base-cased-distilled-squad",
    )

Device set to use cpu


In [36]:
context="Iam Jhon.My favorite sport is football.I happily live in Florida."
ques="Which is your favorite sport?"
result=qa(question=ques,context=context)
print(result)

{'score': 0.9917546510696411, 'start': 30, 'end': 38, 'answer': 'football'}


## Translation

In [37]:
translator=pipeline(
    "translation_en_to_ar",
    model="Helsinki-NLP/opus-mt-en-ar"
    )

config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/308M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/801k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/917k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.12M [00:00<?, ?B/s]

Device set to use cpu


In [38]:
result=translator("how are you?")
print(result)

[{'translation_text': 'كيف حالك؟'}]


In [40]:
result2=translator("I am fine")
print(result2)

[{'translation_text': 'أنا بخير'}]
