In [None]:
#BERT FOR text classification
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load pre-trained BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)  # Adjust num_labels as needed

# Input text
text = "This is a great movie!"

# Tokenize input text
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)

# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Get predictions
logits = outputs.logits
predicted_class = torch.argmax(logits, dim=1).item()

# Output the predicted class
print(f"Predicted class: {predicted_class}")
if predicted_class == 0:
    print("Negative sentiment")
else:
    print("Positive sentiment")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Predicted class: 0
Negative sentiment


In [None]:
#BERT for NER
from transformers import BertTokenizer, BertForTokenClassification
from transformers import pipeline

# Load pre-trained BERT tokenizer and model for NER
tokenizer = BertTokenizer.from_pretrained('dbmdz/bert-large-cased-finetuned-conll03-english')
model = BertForTokenClassification.from_pretrained('dbmdz/bert-large-cased-finetuned-conll03-english')

# NER pipeline
ner_pipeline = pipeline('ner', model=model, tokenizer=tokenizer)

# Input text
text = "My name is John and I live in New York City."

# Perform NER
ner_results = ner_pipeline(text)

# Output the results
for entity in ner_results:
    print(f"Entity: {entity['word']} - Label: {entity['entity']} - Score: {entity['score']:.2f}")


tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Entity: John - Label: I-PER - Score: 1.00
Entity: New - Label: I-LOC - Score: 1.00
Entity: York - Label: I-LOC - Score: 1.00
Entity: City - Label: I-LOC - Score: 1.00


In [8]:
from transformers import BertTokenizer, BertForQuestionAnswering
import torch

# Load the tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

# Define the context and questions
context = """
Elon Musk is the CEO of SpaceX, an American aerospace manufacturer and space transport services company headquartered in Hawthorne, California.
He founded SpaceX in 2002 with the goal of reducing space transportation costs to enable the colonization of Mars.
"""
questions = [
    "Who is the CEO of SpaceX?",
    "When was SpaceX founded?",
    "What is the goal of SpaceX?"
]

# Function to answer a question based on context
def answer_question(question, context):
    # Encode the inputs
    inputs = tokenizer.encode_plus(question, context, return_tensors='pt')
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    # Get model outputs
    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    start_scores = outputs.start_logits
    end_scores = outputs.end_logits

    # Find the most likely start and end of the answer
    start_index = torch.argmax(start_scores)
    end_index = torch.argmax(end_scores)

    # Convert tokens to answer
    answer_tokens = input_ids[0][start_index:end_index + 1]
    answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)

    return answer

# Answer each question
for question in questions:
    answer = answer_question(question, context)
    print(f"Q: {question}")
    print(f"A: {answer}\n")


Q: Who is the CEO of SpaceX?
A: elon musk

Q: When was SpaceX founded?
A: 2002

Q: What is the goal of SpaceX?
A: reducing space transportation costs to enable the colonization of mars

