In [None]:
pip install torch transformers accelerate bitsandbytes peft pdfplumber datasets


In [None]:
import pdfplumber
import os

def extract_text_from_pdfs(pdf_folder):
    all_texts = []
    for pdf_file in os.listdir(pdf_folder):
        if pdf_file.endswith(".pdf"):
            pdf_path = os.path.join(pdf_folder, pdf_file)
            with pdfplumber.open(pdf_path) as pdf:
                text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
                all_texts.append(text)
    return all_texts

# Set your folder path
pdf_folder = "path/to/your/pdf/folder"
dataset_texts = extract_text_from_pdfs(pdf_folder)


In [None]:
from datasets import Dataset

def format_dataset(texts):
    formatted_data = []
    for text in texts:
        formatted_data.append({"prompt": "Summarize this document:", "response": text[:1000]})  # Truncate long texts
    return Dataset.from_dict({"prompt": [d["prompt"] for d in formatted_data], "response": [d["response"] for d in formatted_data]})

dataset = format_dataset(dataset_texts)


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

# Load Llama 3 base model
model_name = "meta-llama/Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name, device_map="auto", torch_dtype="auto"
)

# Apply LoRA
lora_config = LoraConfig(
    r=8, lora_alpha=32, lora_dropout=0.1, bias="none", task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Tokenize dataset
def tokenize_function(examples):
    return tokenizer(examples["prompt"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Training setup
training_args = TrainingArguments(
    output_dir="./llama3_finetuned",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model, args=training_args, train_dataset=tokenized_dataset, tokenizer=tokenizer
)
trainer.train()


In [None]:
# pip install llama-cpp-python
# python convert.py --model ./llama3_finetuned --output llama3.gguf --quantize q4_0
# run on bash

In [None]:
from llama_cpp import Llama

llm = Llama(model_path="./llama3.gguf")
response = llm("Summarize this document: ...")
print(response)


In [None]:
from fastapi import FastAPI
from llama_cpp import Llama

app = FastAPI()
llm = Llama(model_path="./llama3.gguf")

@app.post("/generate")
async def generate(prompt: str):
    response = llm(prompt, max_tokens=100)
    return {"response": response["choices"][0]["text"]}

# Run server: uvicorn filename:app --reload
