In [None]:
import os
from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline
import torch

# 1. Load FLAN-T5 Model
def load_flan_model():
    try:
        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base").to(device)
        tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
        return model, tokenizer, device
    except Exception as e:
        print(f"[ERROR] Gagal load model: {e}")
        return None, None, None

# 2. Aman dari token overflow
def safe_generate(pipeline, question, context):
    context = context[:100]  # Batasi karakter agar tidak melebihi token
    input_text = f"question: {question.strip()} context: {context.strip()}"
    try:
        output = pipeline(
            input_text,
            max_new_tokens=100  # HANYA ini, jangan set max_length
        )[0]["generated_text"]
        return output
    except Exception as e:
        return f"[ERROR] {e}"

# 3. Main Program
def main():
    processed_folder = "processed"
    model, tokenizer, device = load_flan_model()
    if model is None: return

    qa_pipeline = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        device=0 if device == "cuda" else -1
    )

    for file in os.listdir(processed_folder):
        if not file.endswith(".txt"):
            continue
        
        file_path = os.path.join(processed_folder, file)
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()

        print(f"\n📄 File: {file}")
        user_input = input("💬 Masukkan pertanyaan (pisahkan dengan ; jika lebih dari satu):\n")
        questions = [q.strip() for q in user_input.split(";") if q.strip()]

        for question in questions:
            answer = safe_generate(qa_pipeline, question, text)
            print(f"\nQ: {question}\nA: {answer}")

main()



📄 File: translated_translated_الأربعون الشبابية.txt


Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Q: ما هو الإسلام
A: 

📄 File: translated_translated_اللآليء المكية من كلام خير البرية.txt


Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



Q: ما هو الإسلام
A: 

📄 File: translated_translated_بداية القاري في ختم صحيح البخاري -.txt

📄 File: translated_translated_تيسير اللطيف المنان في خلاصة تفسير القرآن - ط الأوقاف السعودية.txt

📄 File: translated_الأربعون الشبابية.txt

📄 File: translated_اللآليء المكية من كلام خير البرية.txt

📄 File: translated_بداية القاري في ختم صحيح البخاري -.txt

📄 File: translated_تيسير اللطيف المنان في خلاصة تفسير القرآن - ط الأوقاف السعودية.txt

📄 File: translated_معالم السنة النبوية -.txt
