In [1]:
pip install -U transformers datasets peft accelerate bitsandbytes sentence-transformers faiss-cpu datasets


Collecting transformers
  Downloading transformers-4.52.3-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.2/40.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate
  Downloading accelerate-1.7.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)
  D

In [11]:
!pip install flask-ngrok flask flask-cors pyngrok



In [10]:
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok

# ✅ Set the auth token directly
ngrok.set_auth_token("2xcrtHjXG07zQEF07geBg5Wi6ZM_2JQbpLDKkHDZNvBiSwSNx")

# Create Flask app
app = Flask(__name__)
CORS(app)

# Start ngrok tunnel
public_url = ngrok.connect(5000)
print(" * ngrok tunnel URL:", public_url)

@app.route('/generate', methods=['POST'])
def generate():
    data = request.get_json()
    prompt = data.get('prompt', '')
    return jsonify({'response': f'You said: {prompt}'})

# Run the app
app.run()


 * ngrok tunnel URL: NgrokTunnel: "https://7810-34-125-255-247.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


In [12]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import requests
from bs4 import BeautifulSoup
import json
import torch
from transformers import (
    T5ForConditionalGeneration, T5Tokenizer,
    AutoTokenizer, AutoModelForCausalLM,
    TrainingArguments, Trainer,
    DataCollatorForLanguageModeling,
)
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset
from transformers import BitsAndBytesConfig
from pyngrok import ngrok  # import ngrok

app = Flask(__name__)
CORS(app)  # Allow CORS for React frontend

# Load question generation model once globally
qg_model_name = "valhalla/t5-small-e2e-qg"
qg_tokenizer = T5Tokenizer.from_pretrained(qg_model_name)
qg_model = T5ForConditionalGeneration.from_pretrained(qg_model_name)

# LoRA / LLaMA3 Model variables (init as None)
llama_tokenizer = None
llama_model = None

############ Scraping & Q&A Generation #############

def scrape_generic(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
    except Exception as e:
        print(f"Error fetching {url}: {e}")
        return ""

    soup = BeautifulSoup(response.text, 'html.parser')
    body = soup.body
    if not body:
        return ""

    allowed_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li']
    content = []

    for tag in allowed_tags:
        for element in body.find_all(tag):
            text = element.get_text(separator=' ', strip=True)
            if text:
                content.append(text)

    return '\n'.join(content)

def split_into_chunks(text, max_words=80):
    words = text.split()
    chunks = []
    for i in range(0, len(words), max_words):
        chunk = ' '.join(words[i:i+max_words])
        chunks.append(chunk)
    return chunks

def generate_questions(text):
    inputs = f"generate questions: {text}"
    input_ids = qg_tokenizer.encode(inputs, return_tensors="pt", truncation=True, max_length=512)
    outputs = qg_model.generate(input_ids=input_ids, max_length=64, num_return_sequences=1)
    questions = [qg_tokenizer.decode(o, skip_special_tokens=True) for o in outputs]
    return questions

def generate_qas(chunks):
    qa_pairs = []
    for chunk in chunks:
        questions = generate_questions(chunk)
        for q in questions:
            qa_pairs.append({
                "question": q,
                "answer": chunk.strip()
            })
    return qa_pairs

def prepare_falcon_dataset(json_data, output_txt_path):
    with open(output_txt_path, 'w', encoding='utf-8') as f:
        for item in json_data:
            question = item['question'].strip()
            answer = item.get('answer', item.get('context', '')).strip()
            formatted = f"User: {question}\nAssistant: {answer}\n"
            f.write(formatted + "\n")

############# LoRA Fine-tuning Setup ###############

def load_falcon_formatted(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read().split("User:")

    inputs, outputs = [], []
    for chunk in text:
        if "Assistant:" in chunk:
            q, a = chunk.split("Assistant:")
            q = q.strip().replace("<sep>", " ")
            a = a.strip()
            if q and a:
                inputs.append(q)
                outputs.append(a)
    return Dataset.from_dict({"input": inputs, "output": outputs})

def preprocess(example):
    prompt = f"<s> [INST] {example['input']} [/INST] {example['output']} </s>"
    tokenized = llama_tokenizer(prompt, padding="max_length", truncation=True, max_length=512)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

def fine_tune_lora(json_data):
    global llama_tokenizer, llama_model

    # Save Q&A JSON temporarily
    with open("qa_dataset.json", "w", encoding="utf-8") as f:
        json.dump(json_data, f, ensure_ascii=False, indent=2)

    prepare_falcon_dataset(json_data, "falcon_formatted.txt")

    # Load model/tokenizer if not already loaded
    if llama_tokenizer is None or llama_model is None:
        model_name = "unsloth/Llama-3.2-3B-Instruct"
        llama_tokenizer = AutoTokenizer.from_pretrained(model_name)
        llama_tokenizer.pad_token = llama_tokenizer.eos_token

        quant_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype="float16")
        llama_model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=quant_config,
            device_map="auto"
        )
        peft_config = LoraConfig(
            r=8,
            lora_alpha=32,
            lora_dropout=0.1,
            bias="none",
            task_type=TaskType.CAUSAL_LM,
        )
        llama_model = get_peft_model(llama_model, peft_config)

    dataset = load_falcon_formatted("falcon_formatted.txt")
    tokenized_dataset = dataset.map(preprocess, remove_columns=["input", "output"])

    data_collator = DataCollatorForLanguageModeling(tokenizer=llama_tokenizer, mlm=False)

    training_args = TrainingArguments(
        output_dir="./llama3-qna-lora",
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        num_train_epochs=3,
        logging_steps=10,
        save_steps=100,
        save_total_limit=2,
        report_to="none",
        fp16=True,
    )

    trainer = Trainer(
        model=llama_model,
        args=training_args,
        train_dataset=tokenized_dataset,
        tokenizer=llama_tokenizer,
        data_collator=data_collator,
    )

    print("Starting fine-tuning LoRA model...")
    trainer.train()
    print("Fine-tuning complete!")

    # Save LoRA weights & tokenizer
    llama_model.save_pretrained("./llama3-qna-lora")
    llama_tokenizer.save_pretrained("./llama3-qna-lora")

############# Inference ###############

@app.route("/generate", methods=["POST"])
def generate():
    data = request.get_json()
    url = data.get("url")
    if not url:
        return jsonify({"error": "Missing URL"}), 400

    print(f"Received URL: {url}")

    scraped_text = scrape_generic(url)
    if not scraped_text:
        return jsonify({"error": "Failed to scrape content"}), 500

    chunks = split_into_chunks(scraped_text)
    qa_pairs = generate_qas(chunks)

    fine_tune_lora(qa_pairs)

    return jsonify({"message": "Fine-tuning completed successfully", "qa_count": len(qa_pairs)})

@app.route("/ask", methods=["POST"])
def ask():
    data = request.get_json()
    user_question = data.get("question")
    if not user_question:
        return jsonify({"error": "Missing question"}), 400

    global llama_tokenizer, llama_model
    if llama_tokenizer is None or llama_model is None:
        return jsonify({"error": "Model not loaded"}), 500

    prompt = f"<s> [INST] {user_question} [/INST]"

    inputs = llama_tokenizer(prompt, return_tensors="pt").to(llama_model.device)
    with torch.no_grad():
        outputs = llama_model.generate(**inputs, max_new_tokens=100)

    answer = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = answer.replace(prompt, "").strip()

    return jsonify({"answer": answer})

if __name__ == "__main__":
    # Start ngrok tunnel
    public_url = ngrok.connect(5000)
    print(f" * ngrok tunnel URL: {public_url}")

    app.run(port=5000)


 * ngrok tunnel URL: NgrokTunnel: "https://4158-34-125-255-247.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [26/May/2025 17:19:48] "OPTIONS /generate HTTP/1.1" 200 -


Map:   0%|          | 0/44 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Starting fine-tuning LoRA model...


Step,Training Loss
10,2.6488
20,2.4881
30,2.3746


Fine-tuning complete!


Map:   0%|          | 0/44 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Starting fine-tuning LoRA model...


Step,Training Loss
10,2.2695
20,2.1269
30,2.0704


Fine-tuning complete!


ERROR:__main__:Exception on /generate [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/flask/app.py", line 919, in full_dispatch_request
    rv = self.handle_user_exception(e)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/flask_cors/extension.py", line 176, in wrapped_function
    return cors_after_request(app.make_response(f(*args, **kwargs)))
                                                ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/flask/app.py", line 917, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/flask/app.py", line 902, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_ar

In [13]:
pip install evaluate rouge_score



NameError: name 'scores' is not defined