In [None]:
# %pip install transformers torch

In [1]:
import pandas as pd
from datasets import Dataset
from transformers import TFBertForQuestionAnswering, BertTokenizerFast
import tensorflow as tf

# Load dataset
df = pd.read_csv('../data/final_dataset.csv')

# Define a function to find context based on the user's question
def find_context_for_question(question, dataset):
    for _, row in dataset.iterrows():
        if row['question'].strip().lower() == question.strip().lower():
            return row['context']
    return None

# Define the model and tokenizer
model_name = "Rifky/Indobert-QA"
tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = TFBertForQuestionAnswering.from_pretrained(model_name)

# Function to answer a question using the model and found context
def answer_question(question):
    context = find_context_for_question(question, df)
    if context is None:
        return "Pertanyaan tidak ditemukan dalam dataset."
    
    inputs = tokenizer(question, context, return_tensors="tf")
    outputs = model(inputs)
    
    answer_start = tf.argmax(outputs.start_logits, axis=1).numpy()[0]
    answer_end = tf.argmax(outputs.end_logits, axis=1).numpy()[0] + 1
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))
    
    return answer

# Example usage
user_question = "cara membuat uang?"
answer = answer_question(user_question)
print(f"Q: {user_question}\nA: {answer}")










Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForQuestionAnswering: ['bert.embeddings.position_ids']
- This IS expected if you are initializing TFBertForQuestionAnswering from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForQuestionAnswering from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertForQuestionAnswering were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForQuestionAnswering for predictions without further training.


Q: cara membuat uang?
A: Pertanyaan tidak ditemukan dalam dataset.


In [3]:
from transformers import pipeline

# Inisialisasi pipeline untuk question answering
qa_pipeline = pipeline(
    "question-answering",
    model="Rifky/Indobert-QA",
    tokenizer="Rifky/Indobert-QA"
)

# Contoh penggunaan pipeline
context = """
Pangeran Harya Dipanegara (atau biasa dikenal dengan nama Pangeran Diponegoro, 
lahir di Ngayogyakarta Hadiningrat, 11 November 1785 – meninggal di Makassar, 
Hindia Belanda, 8 Januari 1855 pada umur 69 tahun) adalah salah seorang pahlawan 
nasional Republik Indonesia, yang memimpin Perang Diponegoro atau Perang Jawa selama 
periode tahun 1825 hingga 1830 melawan pemerintah Hindia Belanda. Sejarah mencatat, 
Perang Diponegoro atau Perang Jawa dikenal sebagai perang yang menelan korban terbanyak 
dalam sejarah Indonesia, yakni 8.000 korban serdadu Hindia Belanda, 7.000 pribumi, 
dan 200 ribu orang Jawa serta kerugian materi 25 juta Gulden.
"""
question = "kapan pangeran diponegoro meninggal?"

# Melakukan penjawaban pertanyaan
result = qa_pipeline({
    'context': context,
    'question': question
})

# Menampilkan hasil
print(result)


{'score': 0.9597133994102478, 'start': 176, 'end': 190, 'answer': '8 Januari 1855'}
