# Question Answering


In [None]:
%pip install transformers -U
%pip install sentencepiece
%pip install Pillow
%pip install torch
%pip install numpy
%pip install matplotlib
%pip install tqdm
%pip install torchtext
%pip install torchsummary
%pip install torchviz
%pip install tensorboard
%pip install tensorboardX
%pip install torchmetrics
%pip install pytorch-lightning


In [19]:

from langchain_community.llms import HuggingFaceHub
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

from getpass import getpass

# Prompt the user to securely input the Hugging Face API token
api_token = getpass("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")

### DQA

##### microsoft/layoutlmv2-base-uncased

In [13]:
from transformers import pipeline

# Set up pipeline with API key
generator = pipeline('text-generation',
                     api_key='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')

No model was supplied, defaulted to openai-community/gpt2 and revision 607a30d (https://huggingface.co/openai-community/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


#### Key Concepts in the Code
###### OCR with Pytesseract:
Extracts words and their positions from the image.

###### Bounding Box Normalization:
Ensures coordinates match LayoutLMv2’s expected input range.

###### Tokenization and Encoding:
Prepares the image and text for input into the transformer model.

###### Answer Extraction:
Identifies the most probable span of text corresponding to the answer.

###### Pretrained Model:
Leverages LayoutLMv2’s capabilities to process structured documents and handle layout-aware questions.

In [None]:
from transformers import LayoutLMv2ForQuestionAnswering, LayoutLMv2Processor
from PIL import Image
import torch

# Load the model
model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")

# Load the document image
image_path = "/home/vai/Desktop/chat_bot_experiment_one/cloud/test.png"  # Replace with your document image path
image = Image.open(image_path).convert("RGB")


# Define the question and context (text from the document)
question = "waht is langChan format ?"


# Define some example text and bounding boxes
text = ["What", "is", "LangChain", "format", "?"]
bbox = [[50, 50, 150, 100], [160, 50, 240, 100], [250, 50, 350, 100], [360, 50, 460, 100], [470, 50, 520, 100]]

# Process the inputs (image, text, and bounding boxes)
encoded_inputs = processor(image, text,return_tensors="pt", truncation=True)



# Get predictions
outputs = model(**encoded_inputs)



# Extract the start and end logits for the answer
start_logits = outputs.start_logits
end_logits = outputs.end_logits

# Get the answer tokens
start_index = torch.argmax(start_logits, dim=1).item()
end_index = torch.argmax(end_logits, dim=1).item()

# Decode the answer
tokens = encoded_inputs["input_ids"].squeeze()
answer = processor.tokenizer.decode(tokens[start_index:end_index + 1])

print(f"Question: {question}")
print(f"Answer: {answer}")


##### fine-tunned

#### tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa

In [None]:
from transformers import LayoutLMv2Processor, LayoutLMv2ForQuestionAnswering
from PIL import Image
import torch
import pytesseract

# Load model and processor
model = LayoutLMv2ForQuestionAnswering.from_pretrained("tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa")
processor = LayoutLMv2Processor.from_pretrained("tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa")

# Load image
image_path = "/home/vai/Desktop/chat_bot_experiment_one/cloud/test.png"
image = Image.open(image_path).convert("RGB")

# Extract words and bounding boxes using OCR
ocr_results = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
words = []
boxes = []

# Get image dimensions
image_width, image_height = image.size

for i in range(len(ocr_results['text'])):
    if ocr_results['text'][i].strip():  # Ignore empty text
        words.append(ocr_results['text'][i])
        
        # Extract bounding box coordinates
        x, y, w, h = (ocr_results['left'][i], ocr_results['top'][i],
                      ocr_results['width'][i], ocr_results['height'][i])

        # Normalize coordinates to 0–1000 range
        normalized_bbox = [
            int(1000 * (x / image_width)),
            int(1000 * (y / image_height)),
            int(1000 * ((x + w) / image_width)),
            int(1000 * ((y + h) / image_height))
        ]
        boxes.append(normalized_bbox)


# Question
question = "Tell me someting about covid?"

# Encode inputs with normalized bounding boxes
encoded_inputs = processor(
    image,
    words,
    boxes=boxes,  # Normalized bounding boxes
    return_tensors="pt",
    # truncation=True,
    
)

# Get predictions
outputs = model(**encoded_inputs)
start_logits = outputs.start_logits
end_logits = outputs.end_logits

# Extract the answer
start_index = torch.argmax(start_logits, dim=1).item()
end_index = torch.argmax(end_logits, dim=1).item()

tokens = encoded_inputs["input_ids"].squeeze()
answer = processor.tokenizer.decode(tokens[start_index:end_index + 1])

print(f"Question: {question}")
print(f"Answer: {answer}")


###### check quality

In [6]:
print(words)
print(boxes)

['[Document', '(page_content="This', 'transcript', 'is', 'provided', 'for', 'the', 'convenience', 'of', 'investors', 'only,', 'for', 'a', 'full', 'recording', 'pleas', 'e', 'see', 'the', 'Q4', '2021', 'Earnings', 'Call', 'webcast', '.\\n\\nAlphabet', 'Q4', '2021', 'Earnings', 'Call', 'February', '1,', '2022\\n\\nOperator:', 'Welcome', 'eve', 'ryone.', 'And', 'thank', 'you', 'for', 'standing', 'by', 'for', 'the', 'Alphabet', 'fourth', 'quarter', '2021', 'earnings', 'conference', 'call.', 'At', 'this', 'time,', 'all', 'participants', 'are', 'in', 'a', 'listen-only', 'mode.', 'After', 'the', 'speaker', 'presentation,', 'there', 'will', 'be', 'a', 'question', 'and', 'answer', 'session.', 'To', 'ask', 'a', 'question', 'during', 'the', 'session,', 'you', 'will', 'need', 'to', 'press', 'star', 'one', 'on', 'your', 'telephone.', 'If', 'you', 'require', 'any', 'further', 'a', 'ssistance,', 'please', 'press', 'star', 'zero.', 'I', 'would', 'now', 'like', 'to', 'hand', 'the', 'conference', 'over'

#### microsoft/layoutlmv2-base-uncased

In [4]:
from transformers import LayoutLMv2Processor, LayoutLMv2ForQuestionAnswering
from PIL import Image
import torch

# Load the processor and modelprocessor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")

model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")

# Load an image
image = Image.open("cloud/test.png").convert("RGB")

# Define the question
question = "What is the question you want to ask?"

# Process the image and question
encoding = processor(image, question, return_tensors="pt")

# Forward pass
outputs = model(**encoding)
start_logits = outputs.start_logits
end_logits = outputs.end_logits

# Get the most likely beginning and end of answer with the argmax of the logits
start_index = torch.argmax(start_logits)
end_index = torch.argmax(end_logits)

# Convert the tokens to the answer
all_tokens = processor.tokenizer.convert_ids_to_tokens(encoding["input_ids"].squeeze().tolist())
answer = " ".join(all_tokens[start_index:end_index+1])

print("Answer:", answer)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Some weights of LayoutLMv2ForQuestionAnswering were not initialized from the model checkpoint at microsoft/layoutlmv2-base-uncased and are newly initialized: ['layoutlmv2.visual_segment_embedding', 'qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


KeyError: 'boxes'

## naver-clova-ix/donut-base

##### Step-by-step Document Image Classification
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")

model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")

##### Step-by-step Document Parsing
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")

model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2").

##### Step-by-step Document Visual Question Answering (DocVQA)
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")

model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")

#### naver-clova-ix/donut-base-finetuned-docvqa

In [None]:
from transformers import DonutProcessor, VisionEncoderDecoderModel, pipeline
from datasets import load_dataset
import torch
from PIL import Image
import re


# # Load the processor and model
# processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base")
# model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base")


processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# load document image from the DocVQA dataset
# dataset = load_dataset("hf-internal-testing/example-documents", split="test")
dataset = load_dataset("opendatalab/OmniDocBench", split="test")


image = dataset[0]["image"]

# prepare decoder inputs
task_prompt = "<s_docvqa><s_question>{user_input}</s_question><s_answer>"
question = "When is the coffee break?"
prompt = task_prompt.replace("{user_input}", question)
decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids

pixel_values = processor(image, return_tensors="pt").pixel_values

outputs = model.generate(
    pixel_values.to(device),
    decoder_input_ids=decoder_input_ids.to(device),
    max_length=model.decoder.config.max_position_embeddings,
    pad_token_id=processor.tokenizer.pad_token_id,
    eos_token_id=processor.tokenizer.eos_token_id,
    use_cache=True,
    bad_words_ids=[[processor.tokenizer.unk_token_id]],
    return_dict_in_generate=True,
)

sequence = processor.batch_decode(outputs.sequences)[0]
sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
print(processor.token2json(sequence))


Config of the encoder: <class 'transformers.models.donut.modeling_donut_swin.DonutSwinModel'> is overwritten by shared encoder config: DonutSwinConfig {
  "attention_probs_dropout_prob": 0.0,
  "depths": [
    2,
    2,
    14,
    2
  ],
  "drop_path_rate": 0.1,
  "embed_dim": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 1024,
  "image_size": [
    2560,
    1920
  ],
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-05,
  "mlp_ratio": 4.0,
  "model_type": "donut-swin",
  "num_channels": 3,
  "num_heads": [
    4,
    8,
    16,
    32
  ],
  "num_layers": 4,
  "patch_size": 4,
  "path_norm": true,
  "qkv_bias": true,
  "transformers_version": "4.47.1",
  "use_absolute_embeddings": false,
  "window_size": 10
}

Config of the decoder: <class 'transformers.models.mbart.modeling_mbart.MBartForCausalLM'> is overwritten by shared decoder config: MBartConfig {
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_cross_attention": true,
  "add

{'question': 'When is the coffee break?', 'answer': '11-14 to 11:39 a.m.'}


### VQA

#### openai/clip-vit-base-patch32

In [None]:
from PIL import Image
import requests

from transformers import CLIPProcessor, CLIPModel

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# url = "http://images.cocodataset.org/val2017/000000039769.jpg"
# image = Image.open(requests.get(url, stream=True).raw)

image = Image.open("cloud/catandbaby.jpg")

inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)

outputs = model(**inputs)
logits_per_image = outputs.logits_per_image 
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities

print("Label probability of:", probs)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Label probability of: tensor([[0.9737, 0.0263]], grad_fn=<SoftmaxBackward0>)


#### Salesforce/blip2-flan-t5-xl

In [6]:
from PIL import Image
import requests
from transformers import Blip2Processor, Blip2ForConditionalGeneration
import torch

# device = "cuda" if torch.cuda.is_available() else "cpu"

processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
model = Blip2ForConditionalGeneration.from_pretrained(
    "Salesforce/blip2-flan-t5-xl", torch_dtype=torch.float16
)  # doctest: +IGNORE_RESULT

# url = "http://images.cocodataset.org/val2017/000000039769.jpg"
# image = Image.open(requests.get(url, stream=True).raw)



# Example usage
# image = Image.open("cloud/catandbaby.jpg").convert("RGB")
image = Image.open("cloud/demo_pic.jpeg")



inputs = processor(images=image, return_tensors="pt")

outputs = model.generate(pixel_values=inputs["pixel_values"])
generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]

print(generated_text)

Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.42s/it]


the poster for the movie frank vs frank


#### dandelin/vilt-b32-finetuned-vqa

In [None]:
from transformers import ViltProcessor, ViltForQuestionAnswering
from PIL import Image

# Load the processor and model
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")

# Load an image
image = Image.open("cloud/test.png").convert("RGB")

# Define the question
question = "What is the question you want to ask?"

# Process the image and question
encoding = processor(image, question, return_tensors="pt")

# Forward pass
outputs = model(**encoding)
start_logits = outputs.start_logits
end_logits = outputs.end_logits

# Get the most likely beginning and end of answer with the argmax of the logits
start_index = torch.argmax(start_logits)
end_index = torch.argmax(end_logits)

# Convert the tokens to the answer
all_tokens = processor.tokenizer.convert_ids_to_tokens(encoding["input_ids"].squeeze().tolist())
answer = " ".join(all_tokens[start_index:end_index+1])

print("Answer:", answer)

#### openai/clip-vit-large-patch14

In [10]:
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

# Load the processor and model
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")

# Example usage
image = Image.open("cloud/catandbaby.jpg").convert("RGB")
inputs = processor(text=["a photo of a cat"], images=image, return_tensors="pt", padding=True)

outputs = model(**inputs)
logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the probabilities
print(probs)

tensor([[1.]], grad_fn=<SoftmaxBackward0>)


### QA

#### deepset/roberta-base-squad2

In [12]:
from transformers import pipeline

# Load the question answering pipeline
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")

# Example usage
context = """The 2025 Bangladesh General Election is set to take place on January 15, 2025, as scheduled by the Election Commission of Bangladesh. This election will determine the composition of the Jatiya Sangsad (National Parliament) for the next five years.Key Parties and Candidates Awami League (AL) - The ruling party, led by Sheikh Hasina, is campaigning on its track record of economic growth, infrastructure development, and social reforms.
Bangladesh Nationalist Party (BNP) - The main opposition, led by Tarique Rahman, is focusing on anti-corruption measures, electoral reforms, and strengthening democracy.
Jatiya Party (JP) - A centrist party, led by GM Quader, aims to act as a kingmaker if no party wins a majority.
New Progressive Alliance (NPA) - A coalition of smaller parties and independent candidates, advocating for youth employment, climate change mitigation, and digital governance.
Major Issues in the Election
Economic Challenges: High inflation, unemployment, and trade deficits have become key concerns for voters.
Democratic Reforms: Opposition parties are demanding reforms in election procedures, including the deployment of international observers.
Infrastructure and Development: The government highlights major projects like the Padma Bridge and Metro Rail as signs of progress.
Climate Change and Resilience: Bangladesh's vulnerability to natural disasters and rising sea levels remains a major issue.
Youth Empowerment: Parties are addressing demands for modern education reforms, digital jobs, and entrepreneurship opportunities.
Security and Monitoring
The Election Commission has announced the deployment of 70,000 security personnel, including the Rapid Action Battalion (RAB), to ensure a peaceful election. Over 100 international observers and local NGOs will monitor voting transparency.

Opinion Polls
Awami League: 45%
BNP: 35%
Jatiya Party: 10%
Undecided Voters: 10%
Controversies
Election Integrity - The opposition has accused the government of bias in election management and called for a caretaker government to oversee the polls.
Media Freedom - Allegations of media censorship and intimidation of journalists have sparked debates on press freedom.
Violence - Several clashes between supporters of different parties have been reported during pre-election rallies.
Expected Outcomes
While Awami League is projected to retain power based on current trends, BNP is expected to increase its parliamentary seats, potentially leading to a more competitive and divided parliament."""
question = "Which party win the election?"
result = qa_pipeline(question=question, context=context)

print(f"Question: {question}")
print(f"Answer: {result['answer']}")

Device set to use cpu


Question: Which party win the election?
Answer: Awami League


#### timpal0l/mdeberta-v3-base-squad2

In [15]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("timpal0l/mdeberta-v3-base-squad2")
model = AutoModelForQuestionAnswering.from_pretrained("timpal0l/mdeberta-v3-base-squad2")

# Create a question answering pipeline
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)

# Example usage
context = """The 2025 Bangladesh General Election is set to take place on January 15, 2025, as scheduled by the Election Commission of Bangladesh. This election will determine the composition of the Jatiya Sangsad (National Parliament) for the next five years.Key Parties and Candidates Awami League (AL) - The ruling party, led by Sheikh Hasina, is campaigning on its track record of economic growth, infrastructure development, and social reforms.
Bangladesh Nationalist Party (BNP) - The main opposition, led by Tarique Rahman, is focusing on anti-corruption measures, electoral reforms, and strengthening democracy.
Jatiya Party (JP) - A centrist party, led by GM Quader, aims to act as a kingmaker if no party wins a majority.
New Progressive Alliance (NPA) - A coalition of smaller parties and independent candidates, advocating for youth employment, climate change mitigation, and digital governance.
Major Issues in the Election
Economic Challenges: High inflation, unemployment, and trade deficits have become key concerns for voters.
Democratic Reforms: Opposition parties are demanding reforms in election procedures, including the deployment of international observers.
Infrastructure and Development: The government highlights major projects like the Padma Bridge and Metro Rail as signs of progress.
Climate Change and Resilience: Bangladesh's vulnerability to natural disasters and rising sea levels remains a major issue.
Youth Empowerment: Parties are addressing demands for modern education reforms, digital jobs, and entrepreneurship opportunities.
Security and Monitoring
The Election Commission has announced the deployment of 70,000 security personnel, including the Rapid Action Battalion (RAB), to ensure a peaceful election. Over 100 international observers and local NGOs will monitor voting transparency.

Opinion Polls
Awami League: 45%
BNP: 35%
Jatiya Party: 10%
Undecided Voters: 10%
Controversies
Election Integrity - The opposition has accused the government of bias in election management and called for a caretaker government to oversee the polls.
Media Freedom - Allegations of media censorship and intimidation of journalists have sparked debates on press freedom.
Violence - Several clashes between supporters of different parties have been reported during pre-election rallies.
Expected Outcomes
While Awami League is projected to retain power based on current trends, BNP is expected to increase its parliamentary seats, potentially leading to a more competitive and divided parliament."""
question = "What is Jatiya Party percentage?"
result = qa_pipeline(question=question, context=context)

print(f"Question: {question}")
print(f"Answer: {result['answer']}")

Device set to use cpu


Question: What is Jatiya Party percentage?
Answer:  10%


### TQA

#### google/tapas-base-finetuned-wtq

In [5]:


from transformers import TapasTokenizer, TapasForQuestionAnswering
import pandas as pd

# Load the tokenizer and model
tokenizer = TapasTokenizer.from_pretrained("google/tapas-base-finetuned-wtq")
model = TapasForQuestionAnswering.from_pretrained("google/tapas-base-finetuned-wtq")

# Example table
data = {
	"Actors": ["Brad Pitt", "Leonardo DiCaprio", "George Clooney"],
	"Number of movies": ["87", "53", "69"]
}
table = pd.DataFrame.from_dict(data)

# Example question
queries = ["How many movies has George Clooney played in?"]

# Tokenize the inputs
inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")

# Get the model outputs
outputs = model(**inputs)

# Convert the logits to predictions
logits = outputs.logits.detach()  # Detach the logits tensor
predicted_answer_coordinates = tokenizer.convert_logits_to_predictions(
	inputs, logits
)

# Check if the predicted answer coordinates are valid
if predicted_answer_coordinates and predicted_answer_coordinates[0]:
	answer_coordinates = predicted_answer_coordinates[0]
	answer = table.iat[answer_coordinates[0][0], answer_coordinates[0][1]]
else:
	answer = "No answer found"

print(f"Question: {queries[0]}")
print(f"Answer: {answer}")

IndexError: list index out of range

#### microsoft/tapex-large-finetuned-wtq

In [2]:
from transformers import TapasTokenizer, TapasForQuestionAnswering
import pandas as pd

# Load the tokenizer and model
tokenizer = TapasTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
model = TapasForQuestionAnswering.from_pretrained("microsoft/tapex-large-finetuned-wtq")

# Example table
data = {
	"Actors": ["Brad Pitt", "Leonardo DiCaprio", "George Clooney"],
	"Number of movies": ["87", "53", "69"]
}
table = pd.DataFrame.from_dict(data)

# Example question
queries = ["How many movies has George Clooney played in?"]

# Tokenize the inputs
inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")

# Get the model outputs
outputs = model(**inputs)

# Convert the logits to predictions
logits = outputs.logits.detach()  # Detach the logits tensor
predicted_answer_coordinates = tokenizer.convert_logits_to_predictions(inputs, logits)

# Handle empty or invalid predictions
if predicted_answer_coordinates and len(predicted_answer_coordinates[0]) > 0:
	answer_coordinates = predicted_answer_coordinates[0]

	# Ensure coordinates are valid (non-negative and within bounds)
	if all(0 <= coord < len(table) for coord in answer_coordinates[0][0]):
		answer = table.iat[answer_coordinates[0][0][0], answer_coordinates[0][0][1]]
	else:
		answer = "No valid answer found"
else:
	answer = "No answer found"

print(f"Question: {queries[0]}")
print(f"Answer: {answer}")

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'TapexTokenizer'. 
The class this function is called from is 'TapasTokenizer'.


TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType

#### google/tapas-base-finetuned-wtq

In [1]:
from transformers import TapasTokenizer, TapasForQuestionAnswering
import pandas as pd

# Load the tokenizer and model
tokenizer = TapasTokenizer.from_pretrained("google/tapas-base-finetuned-wtq")
model = TapasForQuestionAnswering.from_pretrained("google/tapas-base-finetuned-wtq")

# Example table
data = {
	"Actors": ["Brad Pitt", "Leonardo DiCaprio", "George Clooney"],
	"Number of movies": ["87", "53", "69"]
}
table = pd.DataFrame.from_dict(data)

# Example question
queries = ["How many movies has George Clooney played in?"]

# Tokenize the inputs
inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")

# Get the model outputs
outputs = model(**inputs)

# Convert the logits to predictions
logits = outputs.logits.detach()  # Detach the logits tensor
predicted_answer_coordinates = tokenizer.convert_logits_to_predictions(inputs, logits)

# # Check if the predicted answer coordinates are valid
# if predicted_answer_coordinates and predicted_answer_coordinates[0]:
# 	answer_coordinates = predicted_answer_coordinates[0]
# 	answer = table.iat[answer_coordinates[0][0], answer_coordinates[0][1]]
# else:
# 	answer = "No answer found"

# if predicted_answer_coordinates and predicted_answer_coordinates[0]:
#     answer_coordinates = predicted_answer_coordinates[0]
#     if answer_coordinates:  # Check if there is a valid coordinate
#         answer = table.iat[answer_coordinates[0][0], answer_coordinates[0][1]]
#     else:
#         answer = "No answer found"
# else:
#     answer = "No answer found"

# Handle empty or invalid predictions
if predicted_answer_coordinates and len(predicted_answer_coordinates[0]) > 0:
    answer_coordinates = predicted_answer_coordinates[0]

    # Ensure coordinates are valid (non-negative and within bounds)
    if all(0 <= coord < len(table) for coord in answer_coordinates[0][0]):
        answer = table.iat[answer_coordinates[0][0][0], answer_coordinates[0][0][1]]
    else:
        answer = "No valid answer found"
else:
    answer = "No answer found"



print(f"Question: {queries[0]}")
print(f"Answer: {answer}")

  from .autonotebook import tqdm as notebook_tqdm
  text = normalize_for_match(row[col_index].text)
  cell = row[col_index]


Question: How many movies has George Clooney played in?
Answer: 69


### Translation

#### Helsinki-NLP/opus-mt-en-de

In [2]:
from transformers import MarianMTModel, MarianTokenizer

# Load the model and tokenizer
model_name = "Helsinki-NLP/opus-mt-en-de"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# Example usage
text = "Hello, how are you?"
inputs = tokenizer(text, return_tensors="pt")
translated_tokens = model.generate(**inputs)
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)

print(f"Translated text: {translated_text}")

Translated text: Hallo, wie geht's?


#### facebook/m2m100_418M

In [12]:
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer

en_text = "Where do you live?"
chinese_text = "生活就像一盒巧克力。"

model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")

# translate Hindi to French
tokenizer.src_lang = "en"
encoded_en = tokenizer(en_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.get_lang_id("bn"))
tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
# => "La vie est comme une boîte de chocolat."

# # translate Chinese to English
# tokenizer.src_lang = "zh"
# encoded_zh = tokenizer(chinese_text, return_tensors="pt")
# generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
# tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
# => "Life is like a box of chocolate."


['আপনি কোথায় বসবাস করেন?']

#### google-t5/t5-small

In [23]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load the model and tokenizer
model_name = "google/t5-small"

# Use the api_token variable defined in a previous cell
tokenizer = T5Tokenizer.from_pretrained(model_name, use_auth_token=api_token)
model = T5ForConditionalGeneration.from_pretrained(model_name, use_auth_token=api_token)

# Example usage
text = "Translate English to German: Hello, how are you?"
inputs = tokenizer(text, return_tensors="pt")
outputs = model.generate(**inputs)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(f"Generated text: {generated_text}")


OSError: google/t5-small is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

#### Helsinki-NLP/opus-mt-zh-en

In [24]:
from transformers import MarianMTModel, MarianTokenizer

# Load the model and tokenizer
model_name = "Helsinki-NLP/opus-mt-zh-en"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# Example usage
text = "你好，你怎么样？"
inputs = tokenizer(text, return_tensors="pt")
translated_tokens = model.generate(**inputs)
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)

print(f"Translated text: {translated_text}")

Translated text: Hello. How are you?


#### facebook/nllb-200-distilled-600M

In [3]:
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer

# Load the model and tokenizer
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)

# Example usage
en_text = "Where do you live?"
chinese_text = "生活就像一盒巧克力。"

# translate english to Bangla
tokenizer.src_lang = "en"
encoded_en = tokenizer(en_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.get_lang_id("bn"))
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

print(f"Translated text: {translated_text}")

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'NllbTokenizer'. 
The class this function is called from is 'M2M100Tokenizer'.


TypeError: expected str, bytes or os.PathLike object, not NoneType

### Summerization

#### facebook/bart-large-cnn

In [18]:
from transformers import BartForConditionalGeneration, BartTokenizer

# Load the model and tokenizer
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Example usage
text = "Artificial intelligence (AI) has significantly transformed various industries over the past decade, offering innovative solutions to complex problems. From healthcare to finance, AI-powered tools have streamlined processes, improved decision-making, and enhanced customer experiences. For instance, machine learning algorithms are being used in hospitals to predict patient outcomes and recommend personalized treatment plans, while financial institutions leverage AI for fraud detection and algorithmic trading. Despite these advancements, challenges such as ethical considerations, data privacy concerns, and the need for transparency in AI decision-making remain critical issues. As technology evolves, addressing these challenges will be crucial to ensure the responsible and equitable use of AI in society."
inputs = tokenizer(text, return_tensors="pt")
summary_ids = model.generate(inputs["input_ids"])
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print(f"Summary: {summary}")

Summary: Artificial intelligence (AI) has significantly transformed various industries over the past decade. From healthcare to finance, AI-powered tools have streamlined processes, improved decision-making, and enhanced customer experiences. Despite these advancements, challenges such as ethical considerations, data privacy concerns, and the need for transparency in AI decision- making remain critical issues.


#### google/pegasus-multi_news

In [1]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration
import torch

# Load the model and tokenizer
model_name = "google/pegasus-multi_news"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)

# Example usage
text = """The quick brown fox jumps over the lazy dog. This is a well-known sentence in English used to test typing, fonts, and keyboards.
          It contains every letter in the English alphabet, making it a pangram. Such sentences are useful for showcasing typefaces."""
          
# Tokenize and process the input text
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="longest", max_length=512)

# Generate summary
summary_ids = model.generate(inputs["input_ids"], max_length=100, min_length=30, length_penalty=2.0, num_beams=4)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print(f"Summary: {summary}")


  from .autonotebook import tqdm as notebook_tqdm
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-multi_news and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Summary: – "The quick brown fox jumps over the lazy dog." That's a sentence you'd expect to see in a dog-eat-dog kind of story, but it turns out it's actually a sentence you'd expect to see in any other English-language article. It's a well-known sentence in English used to test typing, fonts, and keyboards, and it contains every letter in the English alphabet, making it a pangram. "The quick brown
