In [1]:
import os
import nltk
import logging
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms.base import LLM
from langchain.prompts import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
import google.generativeai as genai
from google.oauth2 import service_account
from pydantic import BaseModel, Field
from rouge_score import rouge_scorer
from collections import Counter
import re
import sacrebleu
from collections import Counter
from docx import Document as DocxDocument
from tqdm.notebook import tqdm
from langchain.docstore.document import Document as LangchainDocument
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd
from bert_score import score
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [2]:
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [3]:
# Download required NLTK resources
nltk.download("punkt")
nltk.download("wordnet")
nltk.download("stopwords")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\farha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\farha\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\farha\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

### Data Preprocessing

In [4]:
# Initialize lemmatizer and stopwords
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))

In [5]:
# Define text preprocessing function with lemmatization
def preprocess_text(text):
    # 1. Strip whitespace
    text = text.strip()

    # 2. Remove special characters and numbers (optional, depending on requirements)
    text = re.sub(r"[^a-zA-Z\s]", "", text)

    # 3. Tokenize the text
    tokens = nltk.word_tokenize(text)

    # 4. Remove stopwords and apply lemmatization
    lemmatized_tokens = [lemmatizer.lemmatize(token.lower()) for token in tqdm(tokens, desc="Processing tokens") if token.lower() not in stop_words]

    # 5. Join the tokens back into a string
    preprocessed_text = " ".join(lemmatized_tokens)

    return preprocessed_text

In [6]:
# Read .docx files from 'dataset/word_standards' folder
def read_docx_files(folder_path):
    documents = []
    for filename in tqdm(os.listdir(folder_path), desc="Reading .docx files"):
        if filename.endswith(".docx"):
            file_path = os.path.join(folder_path, filename)
            try:
                docx_doc = DocxDocument(file_path)
                full_text = []
                for para in docx_doc.paragraphs:
                    full_text.append(para.text)
                text = "\n".join(full_text)
                # Create a LangChain Document object with text and metadata
                langchain_doc = LangchainDocument(page_content=text, metadata={"source": filename})
                documents.append(langchain_doc)
            except Exception as e:
                logger.error(f"Error reading {file_path}: {e}")
    return documents

In [7]:
# Load and preprocess documents from the folder
folder_path = "../data/word_standards"
documents = read_docx_files(folder_path)

Reading .docx files:   0%|          | 0/91 [00:00<?, ?it/s]

In [8]:
# Preprocess the text in each document
for doc in documents:
    doc.page_content = preprocess_text(doc.page_content)

Processing tokens:   0%|          | 0/5660 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1495 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2294 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1500 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/3281 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2210 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2094 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4567 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/5778 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/5287 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2395 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1981 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1686 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/3591 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2411 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2151 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2390 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2426 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4136 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2958 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2414 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4691 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1732 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/5873 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2143 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1625 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4335 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2313 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/11415 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/13090 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2792 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1475 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1469 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1390 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/3035 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/3170 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4989 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1801 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2859 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1616 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/3074 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/3097 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2606 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1730 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2163 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4040 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1623 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1728 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1868 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4897 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1952 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4528 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1692 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2763 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/3139 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1585 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1921 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2893 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2848 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1925 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/3549 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/21832 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1566 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/19434 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2652 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2863 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/10037 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/5124 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/7576 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/7670 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/12289 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4759 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/10748 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4217 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4355 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2685 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2580 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4090 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2285 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8357 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1753 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/5253 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/3824 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/6877 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2009 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/1956 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/4008 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/2729 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/6939 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/11245 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/15170 [00:00<?, ?it/s]

### Saving to Vector database

In [9]:
# Initialize the text splitter with overlap
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=3000,
    chunk_overlap=600,
    separators=["\n\n", "\n", " ", "", "\t", "\r\n", "\r", "\v", "\f", "\u0085", "\u2028", "\u2029"]
)

In [10]:
# Split the documents into chunks
split_docs = text_splitter.split_documents(documents)

logger.info(f"Total number of documents after splitting: {len(split_docs)}")

INFO:__main__:Total number of documents after splitting: 792


In [11]:
# Initialize the embeddings model
embedding_model_name = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

# Initialize FAISS vector store
vector_store = FAISS.from_documents(split_docs, embeddings)

# Save the vector store locally
vector_store.save_local("faiss_index")

# # To load the vector store from disk
# vector_store = FAISS.load_local("faiss_index", embeddings)

  embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
  attn_output = torch.nn.functional.scaled_dot_product_attention(
INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Could not load library with AVX2 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx2'")
INFO:faiss.loader:Loading faiss.
INFO:faiss.loader:Successfully loaded faiss.


### Gemini API configuration

In [12]:
# Path to the service account's JSON file
service_account_path = "adv-nlp-uts-faa7595a22eb.json"

# Create credentials using the service account JSON file
try:
    credentials = service_account.Credentials.from_service_account_file(service_account_path, scopes=["https://www.googleapis.com/auth/generative-language"])
except FileNotFoundError:
    logger.error(f"Service account file not found at {service_account_path}.")
    raise
except Exception as e:
    logger.error(f"Error creating credentials from the service account file: {e}")
    raise

# Configure the Gemini API client with the credentials
genai.configure(credentials=credentials)

### Configuring Gemini-1.5-Flash as serving LLM client

In [13]:
# Implement the Gemini LLM class
class GeminiLLM(LLM, BaseModel):
    model_name: str = Field(default="gemini-1.5-flash")
    temperature: float = Field(default=0.7)

    @property
    def _llm_type(self):
        return "gemini"

    def _call(self, prompt: str, stop: list[str] = None) -> str:
        try:
            # Initialize the model
            model = genai.GenerativeModel(model_name=self.model_name)

            # Generate content using the Gemini API
            response = model.generate_content(
                prompt,
                # temperature=self.temperature,
                # max_output_tokens=512  # Adjust token limit as needed
            )

            # Extract generated text from the response
            generated_text = response.text

            # Handle stop tokens if provided
            if stop:
                for token in stop:
                    generated_text = generated_text.split(token)[0]

            return generated_text.strip()

        except Exception as e:
            logger.error(f"Gemini API error: {e}")
            return "I'm sorry, but I couldn't process your request at this time."

In [14]:
# Initialize the Gemini LLM client
llm = GeminiLLM(model_name="gemini-1.5-flash", temperature=0.7)

# Define a prompt template
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an AI assistant with professional expertise in financial regulations and banking statistics, particularly knowledgeable about Australian APRA guidelines.
Based on the provided context, please answer the following question in a clear, well detailed, and informative manner. 
Ensure your response directly addresses the query.
If you are unable to answer the question based on the context, answer with "I don't have information about...."

Context:
{context}

Question:
{question}

Answer:
""",
)

### Configuring the RAG agent

In [15]:
# Create a RetrievalQA chain with the custom prompt
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # You can experiment with 'refine' or 'map_reduce'
    retriever=vector_store.as_retriever(search_kwargs={"k": 100}),
    chain_type_kwargs={"prompt": prompt_template},
    return_source_documents=True,
)

In [16]:
# Function to handle user queries
def answer_query(query):
    try:
        response = qa_chain({"query": query})
        answer = response["result"]
        return answer
    except Exception as e:
        logger.error(f"Error during query processing: {e}")

In [17]:
# # Function to handle user queries
# def answer_query(query):
#     try:
#         response = qa_chain({"query": query})
#         answer = response["result"]
#         # source_docs = response["source_documents"]
#         # print("Response:")
#         # print(answer)
#         # print("\nRelevant Source Documents:")
#         # for doc in source_docs:
#         #     print(f"Source: {doc.metadata.get('source', 'Unknown Source')}")
#         #     print(doc.page_content)
#         #     print("-" * 80)
#         return answer
#     except Exception as e:
#         logger.error(f"Error during query processing: {e}")

##### Testing the RAG agent

In [18]:
# # Test the RAG system with a query
# query = "What is RWA?"
# # query = "What is the quality control on International Banking Statistics Balance Sheet Items?"
# # query = "What are the key elements in the definition of Effective Maturity?"
# answer_query(query)

### Evaluation of RAG Agent

In [19]:
csv_path = "../data/question-answers.csv"
try:
    # Load the CSV file with a specified encoding
    data = pd.read_csv(csv_path, encoding="utf-8")
except UnicodeDecodeError:
    # If utf-8 fails, try a different encoding
    data = pd.read_csv(csv_path, encoding="ISO-8859-1")

In [20]:
def evaluate_rouge(predicted, reference):
    if predicted is None:
        predicted = ""
    if reference is None:
        reference = ""
    scorer = rouge_scorer.RougeScorer(["rouge1", "rougeL"], use_stemmer=True)
    return scorer.score(reference, predicted)


def evaluate_bleu(predicted, reference):
    if predicted is None:
        predicted = ""
    if reference is None:
        reference = ""
    bleu = sacrebleu.corpus_bleu([predicted], [[reference]])
    return bleu.score


def evaluate_f1(predicted, reference):
    if predicted is None:
        predicted = ""
    if reference is None:
        reference = ""
    predicted_tokens = nltk.word_tokenize(preprocess_text(predicted))
    reference_tokens = nltk.word_tokenize(preprocess_text(reference))
    predicted_counts = Counter(predicted_tokens)
    reference_counts = Counter(reference_tokens)

    common_tokens = sum((predicted_counts & reference_counts).values())

    precision = common_tokens / sum(predicted_counts.values()) if predicted_counts else 0
    recall = common_tokens / sum(reference_counts.values()) if reference_counts else 0

    if precision + recall == 0:
        return 0.0
    return 2 * (precision * recall) / (precision + recall)


def evaluate_bert_score(predicted, reference):
    if predicted is None:
        predicted = ""
    if reference is None:
        reference = ""
    P, R, F1 = score([predicted], [reference], lang="en", verbose=False)
    return F1.mean().item()


# Initialize SentenceTransformer for embedding similarity
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")


def evaluate_embedding_similarity(predicted, reference):
    if predicted is None:
        predicted = ""
    if reference is None:
        reference = ""
    predicted_embedding = sentence_model.encode(predicted)
    reference_embedding = sentence_model.encode(reference)
    similarity = cosine_similarity([predicted_embedding], [reference_embedding])[0][0]
    return similarity


def evaluate_hybrid(predicted, reference):
    rouge = evaluate_rouge(predicted, reference)
    rouge_score = rouge["rougeL"].fmeasure

    bleu_score = evaluate_bleu(predicted, reference)
    f1_score = evaluate_f1(predicted, reference)
    bert_score = evaluate_bert_score(predicted, reference)
    embedding_similarity = evaluate_embedding_similarity(predicted, reference)

    # Define a weighted hybrid score
    hybrid_score = (0.2 * rouge_score) + (0.2 * bleu_score) + (0.2 * f1_score) + (0.2 * bert_score) + (0.2 * embedding_similarity)
    return hybrid_score

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


In [21]:
# Add a new column for predicted answers
data["RAG 1"] = ""

# Initialize list to hold the predicted answers
predicted_answers = []

# Initialize lists to hold metric scores
rouge_scores, bleu_scores, f1_scores, bert_scores, embedding_similarities = [], [], [], [], []

In [22]:
# First pass to collect metric values and determine min and max for normalization
for index, row in tqdm(data.iterrows(), total=len(data), desc="Processing questions", leave=False):
    question = row["Question"]
    reference_answer = row["Answer"]
    predicted_answer = answer_query(question)
    predicted_answers.append(predicted_answer)

    # Evaluate the metrics
    rouge = evaluate_rouge(predicted_answer, reference_answer)
    bleu = evaluate_bleu(predicted_answer, reference_answer)
    f1 = evaluate_f1(predicted_answer, reference_answer)
    bert_score = evaluate_bert_score(predicted_answer, reference_answer)
    embedding_similarity = evaluate_embedding_similarity(predicted_answer, reference_answer)

    # Update min and max values for each metric
    rouge_scores.append(rouge["rougeL"].fmeasure)
    bleu_scores.append(bleu)
    f1_scores.append(f1)
    bert_scores.append(bert_score)
    embedding_similarities.append(embedding_similarity)

Processing questions:   0%|          | 0/80 [00:00<?, ?it/s]

  response = qa_chain({"query": query})
INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/32 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/57 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/41 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/156 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/271 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/43 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/18 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/45 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/22 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/119 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/480 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/47 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/375 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/218 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/402 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/279 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/323 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/27 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/14 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/82 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/395 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/332 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/441 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/42 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/356 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/427 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/37 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/43 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/18 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/22 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/113 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/33 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/491 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/34 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/25 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/78 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/93 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/38 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/61 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/47 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/292 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/43 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/10 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/29 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/11 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/35 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/77 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/22 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/32 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/48 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/255 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/34 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/418 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/44 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/574 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/28 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/484 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/39 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/43 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/154 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/41 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/326 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/40 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/114 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/35 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/13 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/37 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/11 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/87 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/10 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/39 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/15 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/229 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/50 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/494 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/16 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/134 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/30 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/75 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/225 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/45 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/15 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/62 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/13 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/174 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/18 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/99 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/45 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/144 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/23 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/246 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/280 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/80 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/60 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/54 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/298 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/41 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/29 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/37 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/9 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/17 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/23 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/10 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/9 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/7 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/22 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/11 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/9 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/10 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/9 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/10 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/28 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/9 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/13 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/7 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/218 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/35 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/316 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/47 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/16 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/12 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/45 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/70 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/39 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/6 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/11 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/13 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/272 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/260 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/89 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/33 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/42 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/32 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/18 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/8 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:absl:Using default tokenizer.


Processing tokens:   0%|          | 0/39 [00:00<?, ?it/s]

Processing tokens:   0%|          | 0/36 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [23]:
# Perform min-max normalization for each metric
def min_max_normalize(values):
    min_val, max_val = min(values), max(values)
    return [(val - min_val) / (max_val - min_val) if max_val != min_val else 0.5 for val in values]


rouge_scores_normalized = min_max_normalize(rouge_scores)
bleu_scores_normalized = min_max_normalize(bleu_scores)
f1_scores_normalized = min_max_normalize(f1_scores)
bert_scores_normalized = min_max_normalize(bert_scores)
embedding_similarities_normalized = min_max_normalize(embedding_similarities)

In [24]:
hybrid_scores = [
    (0.2 * rouge) + (0.2 * bleu) + (0.2 * f1) + (0.2 * bert) + (0.2 * embedding)
    for rouge, bleu, f1, bert, embedding in zip(
        rouge_scores_normalized, bleu_scores_normalized, f1_scores_normalized, bert_scores_normalized, embedding_similarities_normalized
    )
]

In [25]:
# Print each result
for index in range(len(data)):
    data.at[index, "RAG 1"] = predicted_answers[index]
    
    print(f"[Processing question {index + 1}]:\n")
    print(f"ROUGE-L: {rouge_scores[index]}")
    print(f"BLEU: {bleu_scores[index]}")
    print(f"F1: {f1_scores[index]}")
    print(f"BERT Score: {bert_scores[index]}")
    print(f"Embedding Similarity: {embedding_similarities[index]}")
    print(f"Hybrid Score: {hybrid_scores[index]}")
    print("-" * 80)

# Save the DataFrame with predictions and hybrid scores to a new CSV file
data.to_csv("../data/answers_rag1.csv", index=False)

[Processing question 1]:

ROUGE-L: 0.1797752808988764
BLEU: 4.429078935197404
F1: 0.2545454545454545
BERT Score: 0.869399905204773
Embedding Similarity: 0.7339451313018799
Hybrid Score: 0.43890744814352534
--------------------------------------------------------------------------------
[Processing question 2]:

ROUGE-L: 0.18269230769230768
BLEU: 0.332584821468322
F1: 0.2095238095238095
BERT Score: 0.8366906642913818
Embedding Similarity: 0.6116490364074707
Hybrid Score: 0.31272268586339136
--------------------------------------------------------------------------------
[Processing question 3]:

ROUGE-L: 0.10526315789473684
BLEU: 0.5495915670435931
F1: 0.0737327188940092
BERT Score: 0.815432608127594
Embedding Similarity: 0.557623565196991
Hybrid Score: 0.20511961411155882
--------------------------------------------------------------------------------
[Processing question 4]:

ROUGE-L: 0.2352941176470588
BLEU: 0.6192869128422068
F1: 0.21621621621621623
BERT Score: 0.8714306950569153
Em

In [26]:
# Save the evaluation metrics to a CSV file
evaluation_metrics = {
    "ROUGE-L": rouge_scores,
    "BLEU": bleu_scores,
    "F1": f1_scores,
    "BERT Score": bert_scores,
    "Embedding Similarity": embedding_similarities,
    "Hybrid Score": hybrid_scores
}
evaluation_df = pd.DataFrame(evaluation_metrics)

evaluation_df.to_csv("../data/rag1_evaluation_metrics.csv", index=False)

# Print evaluation summary
print("=== Evaluation Summary ===")
print(f"Average ROUGE-L: {sum(rouge_scores) / len(rouge_scores):.4f}")
print(f"Average BLEU: {sum(bleu_scores) / len(bleu_scores):.2f}")
print(f"Average F1: {sum(f1_scores) / len(f1_scores):.4f}")
print(f"Average BERT Score: {sum(bert_scores) / len(bert_scores):.4f}")
print(f"Average Embedding Similarity: {sum(embedding_similarities) / len(embedding_similarities):.4f}")
print(f"Average Hybrid Score: {sum(hybrid_scores) / len(hybrid_scores):.4f}")

=== Evaluation Summary ===
Average ROUGE-L: 0.2345
Average BLEU: 5.47
Average F1: 0.2430
Average BERT Score: 0.8596
Average Embedding Similarity: 0.5305
Average Hybrid Score: 0.4032
