In [3]:
# === STEP 1: SETUP ===
from google.colab import drive
import os

# 1. Mount Google Drive (You will be asked to click a link and authorize)
print("üìÇ Connecting to Google Drive...")
drive.mount('/content/drive')

# 2. Install AI Libraries
print("‚è≥ Installing libraries...")
os.system('pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" --quiet')
os.system('pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes --quiet')
os.system('pip install langchain langchain-community langchain-huggingface chromadb flask pyngrok duckduckgo-search --quiet')

print("‚úÖ Step 1 Complete: Drive mounted and libraries installed.")


üìÇ Connecting to Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚è≥ Installing libraries...
‚úÖ Step 1 Complete: Drive mounted and libraries installed.


In [None]:
# === STEP 2: SMART DATASET LOADING ===
import json
import os

# 1. Define the path in Google Drive
dataset_path = "/content/drive/My Drive/my_training_data.jsonl"

# 2. Check if you already have a custom dataset
if os.path.exists(dataset_path):
    print(f"üìÇ Found custom dataset at: {dataset_path}")
    print("   Using your personal data for training.")

else:
    print("‚ö†Ô∏è No custom dataset found. Creating a default one for you...")

    # --- DEFAULT DATA (Generic Template) ---
    default_data = [
        {"instruction": "Who are you?", "output": "I am an AI assistant trained to help you."},
        {"instruction": "What can you do?", "output": "I can answer questions, write code, and assist with tasks."},
        {"instruction": "Explain AI.", "output": "AI stands for Artificial Intelligence, which enables computers to mimic human logic."},
        {"instruction": "Write a hello world in Python.", "output": "print('Hello World')"}
    ]

    # Save this default data to Drive so you can edit it later
    with open(dataset_path, 'w') as f:
        for entry in default_data:
            json.dump(entry, f)
            f.write('\n')

    print(f"‚úÖ Created default dataset at: {dataset_path}")
    print("   (To use your own data, just edit this file in your Google Drive!)")

print("‚úÖ Step 2 Complete: Dataset is ready.")

‚úÖ Step 2 Complete: Expanded Dataset created with 144 rich examples.


In [None]:
# === STEP 3: TRAIN MODEL ===
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments

# 1. Load Base Model
print("‚è≥ Loading Llama-3.1-8B Model...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)

# 2. Add LoRA Adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

# 3. Define Format
alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Response:
{}"""

def formatting_func(examples):
    texts = []
    for instruction, output in zip(examples["instruction"], examples["output"]):
        texts.append(alpaca_prompt.format(instruction, output) + tokenizer.eos_token)
    return texts

# 4. Load Dataset (FROM GOOGLE DRIVE NOW)
dataset_file = "/content/drive/My Drive/my_training_data.jsonl"
print(f"üìÇ Loading dataset from: {dataset_file}")

dataset = load_dataset("json", data_files=dataset_file, split="train")

# 5. Start Training
print("üöÄ Starting Fine-Tuning...")
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 2048,
    dataset_num_proc = 1,
    formatting_func = formatting_func,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)
trainer.train()

# 6. Save Locally (Temporary)
print("üíæ Saving temporary model...")
model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora_model")
print("‚úÖ Step 3 Complete: Training finished.")

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!
‚è≥ Loading Llama-3.1-8B Model...
==((====))==  Unsloth 2026.1.2: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2026.1.2 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


Generating train split: 0 examples [00:00, ? examples/s]

üöÄ Starting Fine-Tuning (Operation: CrownClown)...


Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/144 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 144 | Num Epochs = 4 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)
wandb: (1) Create a W&B account
wandb: (2) Use an existing W&B account
wandb: (3) Don't visualize my results
wandb: Enter your choice:

 3


wandb: You chose "Don't visualize my results"


wandb: Detected [huggingface_hub.inference, openai] in use.
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.3458
2,2.3561
3,2.1907
4,2.5927
5,2.1475
6,2.1275
7,1.8211
8,1.7583
9,1.5955
10,1.1872




0,1
train/epoch,‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà
train/global_step,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà
train/grad_norm,‚ñá‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñà‚ñÖ‚ñÖ‚ñÑ‚ñÖ‚ñá‚ñà‚ñá‚ñá‚ñà‚ñá‚ñà‚ñÜ‚ñà‚ñÜ‚ñÇ‚ñÉ‚ñÇ‚ñÉ‚ñÇ‚ñÅ‚ñÑ‚ñÅ‚ñÉ‚ñÇ‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
train/learning_rate,‚ñÅ‚ñÇ‚ñÖ‚ñá‚ñà‚ñà‚ñà‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ
train/loss,‚ñá‚ñá‚ñá‚ñà‚ñá‚ñÜ‚ñÑ‚ñÖ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ

0,1
total_flos,2668240360783872.0
train/epoch,3.33333
train/global_step,60.0
train/grad_norm,0.15214
train/learning_rate,0.0
train/loss,0.0315
train_loss,0.51531
train_runtime,424.1665
train_samples_per_second,1.132
train_steps_per_second,0.141


üíæ Saving model locally...
‚úÖ Step 3 Complete: CrownClown is trained and saved.


In [2]:
# === STEP 4: CREATE KNOWLEDGE BASE (RAG) ===
import os

# 1. Define the path in Google Drive
rag_path = "/content/drive/My Drive/my_knowledge.txt"

# 2. Check if it already exists
if os.path.exists(rag_path):
    print(f"üìÇ Found existing knowledge base at: {rag_path}")
    print("   (The bot will read facts from this file)")

else:
    print("‚ö†Ô∏è Creating a new knowledge base...")

    # --- DEFAULT FACTS (The "Cheat Sheet") ---
    # You can edit this file in your Google Drive anytime!
    knowledge_text = """
Identity:
- Name: CrownClown
- Creator: Joshua
- Creation Year: 2026
- Architecture: Llama 3.1 8B (Fine-tuned)

User Preferences:
- The user is a developer.
- Prefers code examples over long explanations.
"""

    with open(rag_path, "w") as f:
        f.write(knowledge_text)

    print(f"‚úÖ Created default knowledge file at: {rag_path}")

print("‚úÖ Step 4 Complete: Knowledge Base is ready.")

‚ö†Ô∏è Creating a new knowledge base...
‚úÖ Created default knowledge file at: /content/drive/My Drive/my_knowledge.txt
‚úÖ Step 4 Complete: Knowledge Base is ready.


In [None]:
# === STEP 5: PERMANENT SAVE TO DRIVE ===
import shutil
import os

# Define paths
source_folder = "lora_model"
destination_folder = "/content/drive/My Drive/CrownClown_Model"

print(f"üíæ Copying '{source_folder}' to Google Drive...")

# 1. Clean up old version in Drive (if exists)
if os.path.exists(destination_folder):
    shutil.rmtree(destination_folder)

# 2. Copy the new model
shutil.copytree(source_folder, destination_folder)

print(f"‚úÖ Step 5 Complete: Model successfully saved to {destination_folder}")

üíæ Copying 'lora_model' to Google Drive...


In [None]:
# === STEP 6: START SERVER (STATELESS & DRIVE-CONNECTED) ===
# üî¥ PASTE YOUR NGROK TOKEN BELOW üî¥
NGROK_AUTH_TOKEN = "387ajRwhcZVYCEMbS8PYwjrR04v_86Ws1H64EPTQx7c5bM25F"

import os
import torch
from flask import Flask, request, jsonify
from pyngrok import ngrok, conf
from unsloth import FastLanguageModel
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from duckduckgo_search import DDGS

# 1. Load Model from Drive
drive_model_path = "/content/drive/My Drive/CrownClown_Model"
print(f"‚è≥ Loading CrownClown from: {drive_model_path}...")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = drive_model_path,
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)
FastLanguageModel.for_inference(model)

# 2. Setup RAG (Knowledge Base from Drive)
# We point to the file we created in Step 4
rag_path = "/content/drive/My Drive/my_knowledge.txt"
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

def get_rag_context(query):
    if not os.path.exists(rag_path):
        return "" # If no file in Drive, return nothing

    with open(rag_path, "r") as f:
        doc_text = f.read()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = [Document(page_content=x) for x in text_splitter.split_text(doc_text)]

    if not docs: return ""

    db = Chroma.from_documents(docs, embedding_model)
    results = db.similarity_search(query, k=1)
    return results[0].page_content if results else ""

def search_web(query):
    try:
        results = DDGS().text(query, max_results=3)
        return "\n".join([f"- {r['title']}: {r['body']}" for r in results]) if results else ""
    except: return ""

app = Flask(__name__)

@app.route('/chat', methods=['POST'])
def chat():
    data = request.json
    user_message = data.get("message", "")
    chat_history = data.get("history", []) # Frontend sends the history!
    use_web = data.get("use_web", False)

    # 1. Get Context
    rag_context = get_rag_context(user_message)
    web_context = ""
    if use_web or any(k in user_message.lower() for k in ["plan", "price", "news", "trip", "weather"]):
        web_context = search_web(user_message)

    # 2. Format History
    history_text = "\n".join(chat_history[-6:])

    # 3. Construct Prompt
    input_text = f"""You are CrownClown.
Context: {rag_context}
Web Info: {web_context}

PREVIOUS CHAT:
{history_text}

CURRENT USER QUESTION: {user_message}
System Note: Do not introduce yourself if you already did in the previous chat. Answer directly.
"""

    prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{input_text}

### Response:
"""

    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=1024,
        use_cache=True,
        stop_strings=["### Instruction"],
        tokenizer=tokenizer
    )

    response = tokenizer.batch_decode(outputs)[0]
    final_answer = response.split("### Response:")[-1].split("### Instruction")[0].replace("<|eot_id|>", "").strip()
    is_code = "```" in final_answer

    return jsonify({"response": final_answer, "is_code": is_code})

# Run
conf.get_default().auth_token = NGROK_AUTH_TOKEN
ngrok.kill()
public_url = ngrok.connect(5000).public_url
print(f"\nüöÄ SMART SERVER LIVE: {public_url}")
app.run(port=5000)

‚è≥ Loading CrownClown from: /content/drive/My Drive/CrownClown_Model...
==((====))==  Unsloth 2026.1.2: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!

üöÄ SMART SERVER LIVE: https://forgivingly-ungloomy-joesph.ngrok-free.dev
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
