# üöÄ Product Hunt RAG Analyzer - Fine-Tuning with Unsloth + QLoRA

This notebook fine-tunes a 3B parameter LLM for Product Hunt competitive analysis tasks:
- Product Q&A
- Product Comparison
- Product Analysis
- Feature Gap Detection
- Sentiment Analysis

**Framework:** Unsloth (2x faster training, 60% less memory)
**Method:** QLoRA (4-bit quantization + LoRA adapters)
**Model:** Qwen2.5-3B-Instruct

## Step 0: Install Dependencies

In [None]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes

## Step 1: Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Base directory
BASE_DIR = "/content/drive/MyDrive/ProductHuntRAG"
!mkdir -p "{BASE_DIR}"

## Step 2: Imports

In [None]:
import unsloth
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset
import torch
import json
import os

## Step 3: Define File Paths

In [None]:
# Training data files
TRAIN_FILES = [
    os.path.join(BASE_DIR, "train.jsonl"),
]

TEST_FILE = os.path.join(BASE_DIR, "test.jsonl")  # optional

# Quick check that files exist
for f in TRAIN_FILES:
    if not os.path.exists(f):
        print(f"Missing file: {f}")
        print("Please upload your .jsonl files to your ProductHuntRAG folder in Drive first!")
        raise FileNotFoundError(f)
print("All training files found ‚úì")

## Step 4: Load Qwen 2.5 3B Model

In [None]:
MODEL_NAME = "unsloth/Qwen2.5-3B-Instruct"
MAX_SEQ_LEN = 2048

print("Loading model...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_NAME,
    max_seq_length = MAX_SEQ_LEN,
    load_in_4bit = True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    lora_alpha = 16,
    lora_dropout = 0,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    use_gradient_checkpointing = "unsloth",
)

## Step 5: Format Dataset for Chat

In [None]:
eos_token = tokenizer.eos_token
SYSTEM_PROMPT = "You are an expert Product Hunt analyst specializing in competitive analysis, feature gap detection, and market positioning insights."

def formatting_prompts_func(examples):
    """Convert instruction/input/output format to chat messages format."""
    texts = []
    for i in range(len(examples['instruction'])):
        instruction = examples['instruction'][i]
        input_data = examples['input'][i]
        output_data = examples['output'][i]
        
        # Build user content with instruction and input context
        if isinstance(input_data, dict):
            import json
            context = json.dumps(input_data, indent=2, ensure_ascii=False)
        else:
            context = str(input_data)
        
        user_content = f"{instruction}\n\nProduct Data:\n{context}"
        
        # Extract final answer from output
        if isinstance(output_data, dict):
            assistant_content = output_data.get('final_answer', json.dumps(output_data))
        else:
            assistant_content = str(output_data)
        
        # Create chat messages
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_content},
            {"role": "assistant", "content": assistant_content}
        ]
        
        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
        texts.append(text + eos_token)
    
    return {"text": texts}

## Step 6: Load and Split Datasets

In [None]:
print("Loading datasets from Drive...")
full_train = load_dataset("json", data_files=TRAIN_FILES, split="train")

# 80/20 split
split = full_train.train_test_split(test_size=0.20, seed=3407)
train_dataset = split["train"].map(formatting_prompts_func, batched=True)
val_dataset = split["test"].map(formatting_prompts_func, batched=True)

print(f"Train size: {len(train_dataset)}")
print(f"Val size: {len(val_dataset)}")

## Step 7: Training

In [None]:
training_args = TrainingArguments(
    output_dir = os.path.join(BASE_DIR, "outputs"),
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 4,
    num_train_epochs = 3,
    learning_rate = 2e-4,
    warmup_steps = 5,
    logging_steps = 10,
    eval_strategy = "steps",
    eval_steps = 25,
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    optim = "adamw_8bit",
    report_to = "none",
    seed = 3407,
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = val_dataset,
    dataset_text_field = "text",
    max_seq_length = MAX_SEQ_LEN,
    args = training_args,
)

print("\nStarting fine-tuning now...\n")
trainer.train()

## Step 8: Save Adapters

In [None]:
ADAPTER_DIR = os.path.join(BASE_DIR, "adapters_qwen3b")
model.save_pretrained(ADAPTER_DIR)
tokenizer.save_pretrained(ADAPTER_DIR)
print(f"\nDone! Adapters saved in {ADAPTER_DIR}")
print("You can now use this model with FastLanguageModel.from_pretrained(..., load_in_4bit=True)")

In [None]:
!nvidia-smi

In [None]:
import torch
print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
    print("CUDA version:", torch.version.cuda)
else:
    print("No GPU detected by PyTorch!")

## Step 9: Evaluation & Loss Curve Plotting

In [None]:
import os
import unsloth
import matplotlib.pyplot as plt
import pandas as pd
import torch
import json
import glob
from unsloth import FastLanguageModel
from google.colab import drive

# 1. Mount Drive
drive.mount('/content/drive')

# 2. Define Paths
BASE_DIR = "/content/drive/MyDrive/ProductHuntRAG"
ADAPTER_DIR = os.path.join(BASE_DIR, "adapters_qwen3b")
OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")

# ==============================================================
# A. Plot Training vs Validation Loss
# ==============================================================
print("üìä Generating Training/Validation Loss Chart...")

history = []

# Try from active trainer
if 'trainer' in globals() and hasattr(trainer.state, 'log_history'):
    history = trainer.state.log_history
    print("‚úÖ Loaded history from active trainer.")
else:
    print("üîç Searching for saved logs...")
    checkpoint_dirs = glob.glob(os.path.join(OUTPUTS_DIR, "checkpoint-*"))
    if checkpoint_dirs:
        latest_checkpoint = max(checkpoint_dirs, key=os.path.getctime)
        log_file = os.path.join(latest_checkpoint, "trainer_state.json")
        if os.path.exists(log_file):
            with open(log_file) as f:
                state = json.load(f)
            history = state.get('log_history', [])
            print(f"‚úÖ Loaded logs from: {log_file}")

# Plot
if history:
    steps = []
    train_loss = []
    val_steps = []
    val_loss = []

    for entry in history:
        if 'loss' in entry:
            steps.append(entry.get('step', len(steps)+1))
            train_loss.append(entry['loss'])
        if 'eval_loss' in entry:
            val_steps.append(entry.get('step', len(val_steps)+1))
            val_loss.append(entry['eval_loss'])

    plt.figure(figsize=(11, 6))
    plt.plot(steps, train_loss, label='Training Loss', color='blue', alpha=0.8)
    if val_loss:
        plt.plot(val_steps, val_loss, label='Validation Loss', color='red', linewidth=2.5, marker='o')

    plt.title('ProductHunt RAG Fine-Tuning Learning Curve (Qwen2.5-3B)', fontsize=15)
    plt.xlabel('Training Steps')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True, alpha=0.3)

    chart_path = os.path.join(BASE_DIR, "producthunt_loss_curve_final.png")
    plt.savefig(chart_path, dpi=200, bbox_inches='tight')
    plt.show()
    print(f"‚úÖ Loss curve saved: {chart_path}")
else:
    print("‚ùå No training history found.")

## Step 10: Qualitative Inference Test

In [None]:
print("\nü§ñ Loading base model + ProductHunt RAG LoRA adapter...")

BASE_MODEL = "unsloth/Qwen2.5-3B-Instruct"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = BASE_MODEL,
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)

# Re-create LoRA structure (must match training)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    lora_alpha = 16,
    lora_dropout = 0,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    use_gradient_checkpointing = False,
)

# Load saved adapter with a name and activate it
model.load_adapter(ADAPTER_DIR, adapter_name="producthunt")
model.set_adapter("producthunt")

FastLanguageModel.for_inference(model)
print("‚úÖ ProductHunt RAG model fully loaded and ready!")

In [None]:
# Test prompts for Product Hunt analysis
test_prompts = [
    "What are the key features of Notion and how does it compare to competitors?",
    "Analyze the sentiment of user reviews for a task management app.",
    "What feature gaps exist in current AI writing tools?",
    "Compare Figma vs Sketch for UI design workflows.",
    "What makes a Product Hunt launch successful?",
    "Identify potential competitors for a new AI-powered code review tool.",
]

results = []
print("\nüß† Running inference...\n")

for prompt in test_prompts:
    messages = [
        {"role": "system", "content": "You are an expert Product Hunt analyst specializing in competitive analysis, feature gap detection, and market positioning insights."},
        {"role": "user", "content": prompt}
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        use_cache=True,
    )

    full_text = tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]

    # Extract assistant reply (Qwen2.5 safe)
    if "<|im_start|>assistant" in full_text:
        reply = full_text.split("<|im_start|>assistant")[-1]
        if "<|im_end|>" in reply:
            reply = reply.split("<|im_end|>")[0]
        reply = reply.strip()
    else:
        reply = full_text

    results.append({"Prompt": prompt, "Response": reply})
    print(f"Prompt: {prompt}")
    print(f"Response: {reply}")
    print("-" * 100)

# Save results
df = pd.DataFrame(results)
csv_path = os.path.join(BASE_DIR, "producthunt_final_evaluation.csv")
df.to_csv(csv_path, index=False)
print(f"\n‚úÖ Final evaluation report saved: {csv_path}")

print("\nüéâ Evaluation complete! Your ProductHunt RAG model is ready!")

## Step 11: Merge LoRA into Full FP16 Model

In [None]:
from unsloth import FastLanguageModel
import torch
import os
from google.colab import drive

drive.mount('/content/drive')

BASE_DIR = "/content/drive/MyDrive/ProductHuntRAG"
ADAPTER_DIR = os.path.join(BASE_DIR, "adapters_qwen3b")
MERGED_DIR = os.path.join(BASE_DIR, "merged_producthunt_16bit")

os.makedirs(MERGED_DIR, exist_ok=True)

print("Loading base model + LoRA adapter...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen2.5-3B-Instruct",
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = False,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    lora_alpha = 16,
    lora_dropout = 0,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
)

# Load fine-tuned weights
model.load_adapter(ADAPTER_DIR, adapter_name="producthunt")
model.set_adapter("producthunt")

print("Merging LoRA into full model (this takes 5-10 minutes)...")
model = model.merge_and_unload()

print("Saving full merged FP16 model...")
model.save_pretrained(MERGED_DIR, safe_serialization=True)
tokenizer.save_pretrained(MERGED_DIR)

print(f"‚úÖ Full merged model saved to: {MERGED_DIR}")
print("Now you can run your GGUF conversion script on this folder!")

## Step 12: Convert to GGUF for Ollama

In [None]:
import os
import shutil
from google.colab import drive

# ==============================================================
#  1. Setup & Mount Drive
# ==============================================================
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# Define Paths
project_dir = "/content/drive/MyDrive/ProductHuntRAG"
INPUT_DIR   = os.path.join(project_dir, "merged_producthunt_16bit")
FINAL_GGUF  = os.path.join(project_dir, "GGUF_Models", "producthunt_rag_3b_q4_k_m.gguf")

# Ensure output directory exists
os.makedirs(os.path.dirname(FINAL_GGUF), exist_ok=True)

print(f"üìÇ Input Model: {INPUT_DIR}")
print(f"üìÇ Output GGUF: {FINAL_GGUF}")

In [None]:
# ==============================================================
#  2. Build llama.cpp (New CMake Method)
# ==============================================================
# Clean up any old build to avoid conflicts
if os.path.exists("llama.cpp"):
    print("üßπ Removing old llama.cpp directory...")
    shutil.rmtree("llama.cpp")

print("‚¨áÔ∏è Cloning llama.cpp...")
!git clone https://github.com/ggerganov/llama.cpp

# Install dependencies for the conversion script
print("üì¶ Installing Python dependencies...")
!pip install -r llama.cpp/requirements.txt

# Enter directory
os.chdir("llama.cpp")

print("üõ†Ô∏è Building with CMake...")
# Configure and Build
!cmake -B build
!cmake --build build --config Release -j 4

# Verify the binary exists in the new location (build/bin/)
QUANTIZE_BIN = "build/bin/llama-quantize"

if not os.path.exists(QUANTIZE_BIN):
    # Fallback check for some environments
    if os.path.exists("build/bin/Release/llama-quantize"):
        QUANTIZE_BIN = "build/bin/Release/llama-quantize"
    else:
        raise FileNotFoundError(f"‚ùå Build failed! Could not find '{QUANTIZE_BIN}'.")

print(f"‚úÖ Build complete. Binary found at: {QUANTIZE_BIN}")

In [None]:
# ==============================================================
#  3. Convert HF to GGUF (FP16)
# ==============================================================
print("\n--- üì¶ Converting HF to GGUF (16-bit) ---")

TEMP_GGUF = "../temp_f16.gguf"

!python3 convert_hf_to_gguf.py "{INPUT_DIR}" --outfile "{TEMP_GGUF}" --outtype f16

if not os.path.exists(TEMP_GGUF):
    raise FileNotFoundError("‚ùå Conversion failed! Intermediate F16 GGUF was not created.")

print("‚úÖ Intermediate F16 GGUF created.")

In [None]:
# ==============================================================
#  4. Quantize to q4_k_m
# ==============================================================
print("\n--- üìâ Quantizing to q4_k_m ---")

!./{QUANTIZE_BIN} "{TEMP_GGUF}" "{FINAL_GGUF}" q4_k_m

In [None]:
# ==============================================================
#  5. Cleanup & Verify
# ==============================================================
if os.path.exists(FINAL_GGUF):
    print("\n--- üßπ Cleaning up temp file ---")
    os.remove(TEMP_GGUF)

    file_size = os.path.getsize(FINAL_GGUF) / (1024 * 1024 * 1024)  # Size in GB
    print(f"üéâ SUCCESS! Final model saved to: {FINAL_GGUF}")
    print(f"üìä File Size: {file_size:.2f} GB")
else:
    print("‚ùå Error: Quantization failed. Final file not created.")

# Return to root directory
os.chdir("..")

## üéâ Congratulations!

You've successfully fine-tuned a 3B model for Product Hunt competitive analysis!

**Next Steps:**
1. Download the LoRA adapters or GGUF model from Google Drive
2. For Ollama: Create a Modelfile and run `ollama create producthunt-rag -f Modelfile`
3. Integrate with your Product Hunt RAG Analyzer
4. Test with real product queries