<a href="https://colab.research.google.com/github/RicardoPoleo/DeepLearning_FactChecker/blob/main/notebooks/Agents/WebService_Agent_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Install dependencies
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes
!npm install -g localtunnel

In [None]:
#@title Manual imports
from unsloth import FastLanguageModel
import torch

In [None]:
#@title
def login_huggingface():
    from google.colab import userdata
    from huggingface_hub import login
    hf_token = userdata.get('hg_token')
    login(token=hf_token)
    pass

import torch
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import FastLanguageModel, is_bfloat16_supported

class OurFineTuner:
    def __init__(self, dataset_filepath, dataset_type="csv"):
        self.training_stats = None
        self.trainer = None
        self.max_seq_length = 2048
        self.instructions_format = ""
        self.dataset_filepath = dataset_filepath
        self.dataset_type = dataset_type
        self.model = None
        self.tokenizer = None
        self.dataset = None
        self.train_dataset = None
        self.validation_dataset = None

    def pick_model(self, model_name):
        dtype = None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
        load_in_4bit = True  # Use 4bit quantization to reduce memory usage. Can be False.
        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_name,
            max_seq_length=self.max_seq_length,
            dtype=dtype,
            load_in_4bit=load_in_4bit,
        )
        self.add_qlora()

    def add_qlora(self):
        self.model = FastLanguageModel.get_peft_model(
            self.model,
            r=16,
            target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
            lora_alpha=16,
            lora_dropout=0,
            bias="none",
            use_gradient_checkpointing="unsloth",
            random_state=3407,
            use_rslora=False,
            loftq_config=None,
        )

    def load_dataset(self):
        if self.dataset_type == "csv":
            self.dataset = load_dataset("csv", data_files=self.dataset_filepath, split="train")
        elif self.dataset_type == "HuggingFace":
            self.dataset = load_dataset(self.dataset_filepath)
        else:
            raise ValueError(f"Unsupported dataset type: {self.dataset_type}")

    def format_data(self, test_size=0.2):
        split_dataset = self.dataset.train_test_split(test_size=test_size)
        self.train_dataset = split_dataset['train']
        self.validation_dataset = split_dataset['test']
        self.train_dataset = self.train_dataset.map(self.formatting_prompts_func, batched=True)
        self.validation_dataset = self.validation_dataset.map(self.formatting_prompts_func, batched=True)

    def formatting_prompts_func(self, examples):
        EOS_TOKEN = self.tokenizer.eos_token
        texts = [self.instructions_format.format(ex['instruction'], ex['input'], ex['output']) + EOS_TOKEN for ex in zip(examples['instruction'], examples['input'], examples['output'])]
        return {"text": texts}

    def prepare_trainer(self, max_steps=60):
        self.trainer = SFTTrainer(
            model=self.model,
            tokenizer=self.tokenizer,
            train_dataset=self.train_dataset,
            dataset_text_field="text",
            max_seq_length=self.max_seq_length,
            dataset_num_proc=2,
            packing=False,
            args=TrainingArguments(
                per_device_train_batch_size=2,
                gradient_accumulation_steps=4,
                warmup_steps=5,
                max_steps=max_steps,
                learning_rate=2e-4,
                fp16=not is_bfloat16_supported(),
                bf16=is_bfloat16_supported(),
                logging_steps=1,
                optim="adamw_8bit",
                weight_decay=0.01,
                lr_scheduler_type="linear",
                seed=3407,
                output_dir="outputs",
            ),
        )

    def perform_inference(self, instruction, claim, explanation):
        input_text = f"Claim: {claim}. Explanation: {explanation}."
        inputs = self.tokenizer(
            self.instructions_format.format(instruction, input_text, ""),
            return_tensors="pt"
        ).to("cuda")
        outputs = self.model.generate(**inputs, max_new_tokens=64, use_cache=True)
        response = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
        return response

    def set_instructions_format(self, instructions_format=""):
        if instructions_format == "":
            self.instructions_format = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""
        else:
            self.instructions_format = instructions_format


In [None]:
#@title Create the instance of the model for fast inference
dataset_path = "https://github.com/RicardoPoleo/DeepLearning_FactChecker/raw/main/datasets/3rd-attempt-input-instruction-claim-veredict-output-veredict.csv"
finetuner = OurFineTuner(dataset_filepath=dataset_path, dataset_type="csv")  # Assuming OurFineTuner class is already defined/imported
finetuner.pick_model(model_name)
finetuner.set_instructions_format()

In [None]:
import json

def do_inference(model_name, instruction, claim, explanation):
    print(f"=== Inference with the model: {model_name}")
    response = finetuner.perform_inference(instruction, claim, explanation)
    print(response)
    return response

In [None]:
#@title Start the Web Service
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import subprocess
import threading

app = FastAPI()

class RequestModel(BaseModel):
    text: str

@app.post("inference")
def inference(request: RequestModel):
    model_name = "unsloth/llama-3-8b-bnb-4bit"
    data = json.loads(request.json())
    instruction = "You are a fact-checker AI. Evaluate the following claim with its explanation and, based on the provided information, determine whether or not the claim is true or not, followed by the explanation of why."
    claim = data["claim"]
    explanation = data["explanation"]
    response = do_inference(model_name, instruction, claim, explanation)
    return {"response": response}



def start_uvicorn():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Free the port before starting the server
!fuser -k 8000/tcp

thread = threading.Thread(target=start_uvicorn)
thread.start()

process = subprocess.Popen(["lt", "--port", "8000"], stdout=subprocess.PIPE)
for line in process.stdout:
    print(line.decode().strip())