# Genenerate Dataset

Using the deepseek, conver the extracted text preserving the chapters content as exact will be send to generate Q&A pairs.

In [None]:
!pip install requests dotenv




[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import os
import time
import json
import requests
from dotenv import load_dotenv

load_dotenv()

# Replace with your DeepSeek API key
API_KEY = os.getenv("DEEPSEEK_API_KEY")

# DeepSeek API endpoint
API_URL = "https://api.deepseek.com/v1/chat/completions"

# Input and output directories
INPUT_DIR = "./data/text_extraction_data"
OUTPUT_DIR = "./data/json_dataset"

# Wait time between API calls (in seconds)
WAIT_TIME = 10  # Adjust based on API rate limits

# Function to generate Q&A pairs
def generate_qa_pairs(text):
    # Define the prompt for the API
    prompt = f"""
    Generate question-answer pairs in JSON format from the following text. 
    Ensure the questions are clear and the answers are concise and accurate.
    The output should be a list of dictionaries, where each dictionary has two keys: "question" and "answer".

    Example Output:
    [
      {{
        "question": "What is the purpose of DeepSeek's open-source initiative?",
        "answer": "DeepSeek's open-source initiative aims to share production-ready tools and frameworks to accelerate AGI exploration. By open-sourcing their code, DeepSeek fosters collaboration, transparency, and innovation within the AI community."
      }},
      {{
        "question": "What is FlashMLA, and how is it optimized for Hopper GPUs?",
        "answer": "FlashMLA is an efficient Multi-Head Latent Attention (MLA) decoding kernel optimized for Hopper GPUs. It supports BF16, uses a paged KV cache with a block size of 64, and achieves 3000 GB/s memory-bound performance and 580 TFLOPS compute-bound performance on H800 GPUs."
      }}
    ]

    Text:
    {text}
    """

    # Prepare the request payload
    payload = {
        "model": "deepseek-chat",  # Specify the model
        "messages": [
            {"role": "system", "content": "You are a helpful assistant that generates question-answer pairs in JSON format."},
            {"role": "user", "content": prompt}
        ],
        "max_tokens": 2000,  # Adjust based on the length of the text
        "temperature": 0.7,  # Controls creativity (0.7 is a good balance)
    }

    # Set up headers with the API key
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    # Send the request to the DeepSeek API
    response = requests.post(API_URL, headers=headers, json=payload)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the response
        response_data = response.json()
        qa_pairs = response_data["choices"][0]["message"]["content"]

        # Convert the response to a Python list of dictionaries
        try:
            qa_pairs = json.loads(qa_pairs)
            return qa_pairs
        except json.JSONDecodeError:
            print("Error: The response is not valid JSON.")
            return None
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

# Function to process all input files
def process_files(input_dir, output_dir, wait_time):
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Get a list of all .txt files in the input directory
    input_files = [f for f in os.listdir(input_dir) if f.endswith(".txt")]

    # Process each file sequentially
    for file_name in input_files:
        # Construct full file paths
        input_path = os.path.join(input_dir, file_name)
        output_path = os.path.join(output_dir, file_name.replace(".txt", ".json"))

        # Read the input file
        with open(input_path, "r", encoding="utf-8") as f:
            text = f.read()

        # Generate Q&A pairs
        print(f"Processing {file_name}...")
        qa_pairs = generate_qa_pairs(text)

        # Save the result as a JSON file
        if qa_pairs:
            with open(output_path, "w", encoding="utf-8") as f:
                json.dump(qa_pairs, f, indent=4)
            print(f"Saved Q&A pairs to {output_path}")
        else:
            print(f"Failed to generate Q&A pairs for {file_name}")

        # Wait before processing the next file
        print(f"Waiting for {wait_time} seconds before the next file...")
        time.sleep(wait_time)

# Run the script
process_files(INPUT_DIR, OUTPUT_DIR, WAIT_TIME)

Processing data_1.txt...
Error: 402 - {"error":{"message":"Insufficient Balance","type":"unknown_error","param":null,"code":"invalid_request_error"}}
Failed to generate Q&A pairs for data_1.txt
Waiting for 10 seconds before the next file...
Processing data_10.txt...
Error: 402 - {"error":{"message":"Insufficient Balance","type":"unknown_error","param":null,"code":"invalid_request_error"}}
Failed to generate Q&A pairs for data_10.txt
Waiting for 10 seconds before the next file...
Processing data_11.txt...
Error: 402 - {"error":{"message":"Insufficient Balance","type":"unknown_error","param":null,"code":"invalid_request_error"}}
Failed to generate Q&A pairs for data_11.txt
Waiting for 10 seconds before the next file...
Processing data_2.txt...
Error: 402 - {"error":{"message":"Insufficient Balance","type":"unknown_error","param":null,"code":"invalid_request_error"}}
Failed to generate Q&A pairs for data_2.txt
Waiting for 10 seconds before the next file...
Processing data_3.txt...
Error: 

## Combine all json data to single file

In [4]:
import os
import json

# Output directory containing individual JSON files
OUTPUT_DIR = "./data/json_dataset"

# Output file for the combined dataset
COMBINED_OUTPUT_FILE = "./data/dataset.json"

# Function to combine JSON files
def combine_json_files(output_dir, combined_output_file):
    # Initialize an empty list to store all Q&A pairs
    combined_data = []

    # Get a list of all .json files in the output directory
    json_files = [f for f in os.listdir(output_dir) if f.endswith(".json")]

    # Process each JSON file
    for file_name in json_files:
        # Construct the full file path
        file_path = os.path.join(output_dir, file_name)

        # Read the JSON file
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        # Append the data to the combined list
        combined_data.extend(data)
        print(f"Added {file_name} to the combined dataset.")

    # Save the combined data to a single JSON file
    with open(combined_output_file, "w", encoding="utf-8") as f:
        json.dump(combined_data, f, indent=4)
    print(f"Combined dataset saved to {combined_output_file}")

# Run the script
combine_json_files(OUTPUT_DIR, COMBINED_OUTPUT_FILE)

Added data_1.json to the combined dataset.
Added data_10.json to the combined dataset.
Added data_11.json to the combined dataset.
Added data_12.json to the combined dataset.
Added data_13.json to the combined dataset.
Added data_14.json to the combined dataset.
Added data_15.json to the combined dataset.
Added data_2.json to the combined dataset.
Added data_3.json to the combined dataset.
Added data_4.json to the combined dataset.
Added data_5.json to the combined dataset.
Added data_6.json to the combined dataset.
Added data_7.json to the combined dataset.
Added data_8.json to the combined dataset.
Added data_9.json to the combined dataset.
Combined dataset saved to ./data/dataset.json


## Fien-tune the model

In [5]:
!pip install torch torchvision torchaudio
!pip install transformers
!pip install datasets
!pip install peft

Collecting torch
  Using cached torch-2.6.0-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting torchvision
  Using cached torchvision-0.21.0-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting torchaudio
  Downloading torchaudio-2.6.0-cp312-cp312-win_amd64.whl.metadata (6.7 kB)
Collecting networkx (from torch)
  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch)
  Downloading jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting setuptools (from torch)
  Using cached setuptools-75.8.2-py3-none-any.whl.metadata (6.7 kB)
Collecting sympy==1.13.1 (from torch)
  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Collecting pillow!=8.3.*,>=5.3.0 (from torchvision)
  Using cached pillow-11.1.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)
Collecting MarkupSafe>=2.0 (from jinja2->torch)
  Using cached MarkupSafe-3


[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting transformers
  Using cached transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.5.3-cp38-abi3-win_amd64.whl.metadata (3.9 kB)
Using cached transformers-4.49.0-py3-none-any.whl (10.0 MB)
Downloading safetensors-0.5.3-cp38-abi3-win_amd64.whl (308 kB)
Using cached tokenizers-0.21.0-cp39-abi3-win_amd64.whl (2.4 MB)
Installing collected packages: safetensors, tokenizers, transformers
Successfully installed safetensors-0.5.3 tokenizers-0.21.0 transformers-4.49.0



[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting peft
  Downloading peft-0.14.0-py3-none-any.whl.metadata (13 kB)
Collecting accelerate>=0.21.0 (from peft)
  Downloading accelerate-1.4.0-py3-none-any.whl.metadata (19 kB)
Downloading peft-0.14.0-py3-none-any.whl (374 kB)
Downloading accelerate-1.4.0-py3-none-any.whl (342 kB)
Installing collected packages: accelerate, peft
Successfully installed accelerate-1.4.0 peft-0.14.0



[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
!pip install -U bitsandbytes




[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## Generate the gguf File

In [None]:
git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
make

In [None]:
!pip install transformers torch

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the fine-tuned model and tokenizer
model_name = "./qwen2.5-3b-research-qa-lora"  # Path to your fine-tuned model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

# Save the model in a format compatible with llama.cpp
model.save_pretrained("./qwen-gguf")
tokenizer.save_pretrained("./qwen-gguf")

# Convert to gguf format using llama.cpp
# Run this in your terminal after saving the model
# python3 llama.cpp/convert.py --model ./qwen-gguf --outfile ./qwen-gguf/model.gguf

## Build a Chatbot with In-Memory Database

In [None]:
!pip install redis

In [None]:
import torch
from transformers import AutoTokenizer
from llama_cpp import Llama
import redis

# Load the gguf model
model_path = "./qwen-gguf/model.gguf"
llm = Llama(model_path=model_path)

# Initialize Redis for chat history
redis_client = redis.Redis(host="localhost", port=6379, db=0)

# Function to generate a response
def generate_response(prompt, chat_history):
    # Combine chat history with the new prompt
    full_prompt = "\n".join(chat_history + [f"User: {prompt}"])
    output = llm(full_prompt, max_tokens=512, stop=["User:"], echo=False)
    return output["choices"][0]["text"].strip()

# Chat loop
def chat():
    chat_id = "user_123"  # Unique ID for the chat session
    chat_history = redis_client.lrange(chat_id, 0, -1)  # Load chat history
    chat_history = [msg.decode("utf-8") for msg in chat_history]

    print("Chatbot: Hello! How can I assist you today?")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("Chatbot: Goodbye!")
            break

        # Generate response
        response = generate_response(user_input, chat_history)
        print(f"Chatbot: {response}")

        # Update chat history
        chat_history.append(f"User: {user_input}")
        chat_history.append(f"Chatbot: {response}")
        redis_client.rpush(chat_id, *chat_history[-2:])  # Save last two messages

# Start the chat
if __name__ == "__main__":
    chat()

In [16]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

def finetune_qwen(train_file, model_name="Qwen/Qwen2.5-3B-Instruct", output_dir="./qwen2.5-3b-research-qa-lora"):
    """
    Fine-tunes a Qwen model using a custom dataset.

    Args:
        train_file (str): Path to the JSON file containing the training data.
        model_name (str): Name of the Qwen model to fine-tune.
        output_dir (str): Output directory for saving the fine-tuned LoRA adapters.
    """

    # 1. Load Model and Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        offload_folder="offload",  # Offload to CPU
        offload_state_dict=True,   # Offload state dict to CPU
    )

    # 2. Prepare Model for QLoRA
    model = prepare_model_for_kbit_training(model)

    config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "v_proj"]  # Adjust for Qwen if needed
    )
    model = get_peft_model(model, config)

    # 3. Load and Tokenize Dataset
    def preprocess_function(examples):
        inputs = [f"Question: {q} Answer: " for q in examples["question"]]  # Customize prompt
        targets = [a for a in examples["answer"]]
        model_inputs = tokenizer(inputs, text_target=targets, max_length=512, truncation=True,
                                   padding="max_length")  # Adjust max_length
        return model_inputs

    try:
        dataset = load_dataset("json", data_files=train_file, split="train") # Loading with key file location.
        tokenized_train_dataset = dataset.map(preprocess_function, batched=True) # pre-processing is still batch processing
    except FileNotFoundError:
        print(f"Error: Training data file not found at {train_file}")
        return
    except Exception as e:
        print(f"Error: An error occured reading the JSON file {e}")
        return

    # 4. Set up Training Arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        learning_rate=2e-4,
        num_train_epochs=3,
        logging_steps=50,
        save_steps=500,
        save_total_limit=2,
        evaluation_strategy="no",
        fp16=True,  # Or bf16=True if your GPU supports it
        optim="paged_adamw_8bit",
        lr_scheduler_type="cosine",
        warmup_ratio=0.05,
        report_to="none",
        push_to_hub=False,
    )

    # 5. Create Trainer and Train
    trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        args=training_args,
        train_dataset=tokenized_train_dataset,
    )

    trainer.train()

    # 6. Save Trained Model (LoRA Adapters)
    model.save_pretrained(output_dir)
    print(f"Fine-tuning complete! LoRA adapters saved to {output_dir}")


if __name__ == "__main__":
    train_data_file = "./data/dataset.json"  # Path to your JSON training data file
    finetune_qwen(train_data_file)  # run model.

Downloading shards:   0%|          | 0/2 [11:59<?, ?it/s]


KeyboardInterrupt: 