In [1]:
pip install torch transformers accelerate sentence-transformers faiss-cpu pandas datasets peft trl bitsandbytes


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import random
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, PeftModel, get_peft_model
from trl import SFTTrainer
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df = pd.read_csv("./data/kjv.csv")

In [4]:
themes = {
    "love": ["love", "charity"],
    "faith": ["faith", "believe", "trust"],
    "sin": ["sin", "iniquity", "transgression", "forbidden"],
    "creation": ["create", "made", "beginning"],
    "wisdom": ["wisdom", "understanding", "knowledge"],
    "forgiveness": ["forgive", "forgiveness", "pardon", "mercy"],
    "prayer": ["pray", "prayer", "ask", "supplication"],
    "hope": ["hope", "promise", "wait", "salvation"],
    "justice": ["justice", "correct", "righteous"],
    "unity": ["unity", "community", "choosen"]
}

In [5]:
def collect_theme_examples(theme, keywords, max_verses=3):
    matches = df[df["Text"].str.contains("|".join(keywords), case=False)]
    if len(matches) == 0:
        return None
    matches = matches.sample(min(max_verses, len(matches)))  # random sample
    verses = [f"{row['Book Name']} {row['Chapter']}:{row['Verse']} - {row['Text']}"
              for _, row in matches.iterrows()]
    question = f"What does the Bible say about {theme}?"
    answer = " ".join(verses)
    return {"prompt": question, "completion": answer}

In [6]:
qa_thematic = []
for theme, keywords in themes.items():
    for _ in range(100):  # 30 examples per theme
        example = collect_theme_examples(theme, keywords)
        if example:
            qa_thematic.append(example)

In [None]:
qa_lookup = [
    {
        "prompt": f"What does {row['Book Name']} {row['Chapter']}:{row['Verse']} say?",
        "completion": row["Text"]
    }
    for _, row in df.sample(15000, random_state=42).iterrows()
]

qa_all = qa_lookup + qa_thematic
random.shuffle(qa_all)

In [8]:
dataset = Dataset.from_list(qa_all)
dataset = dataset.train_test_split(test_size=0.1)

def format_example(example):
    return {
        "text": f"### Question:\n{example['prompt']}\n\n### Answer (in KJV style):\n{example['completion']}"
    }

train_dataset = dataset["train"].map(format_example)
eval_dataset = dataset["test"].map(format_example)

Map: 100%|██████████| 14400/14400 [00:00<00:00, 17564.46 examples/s]
Map: 100%|██████████| 1600/1600 [00:00<00:00, 17919.02 examples/s]


In [9]:
model_id = "Qwen/Qwen2.5-0.5B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = torch.float16
)


tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config = bnb_config,
    offload_folder="offload",
    torch_dtype=torch.float16
)

In [10]:
peft_config = LoraConfig(
    r=4,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [11]:
model.gradient_checkpointing_enable()
model.config.use_cache = False
model.enable_input_require_grads()


In [12]:
model = get_peft_model(model, peft_config)

for name, param in model.named_parameters():
    if "lora_" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

In [13]:
training_args = TrainingArguments(
    output_dir="./christAIn",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    logging_steps=50,
    num_train_epochs=3,
    save_strategy="epoch",
    fp16=True,
    push_to_hub=False
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset.shuffle().select(range(5000)),
    eval_dataset=eval_dataset.shuffle().select(range(500)),
    processing_class=tokenizer,
)

Adding EOS to train dataset: 100%|██████████| 5000/5000 [00:00<00:00, 16453.17 examples/s]
Tokenizing train dataset: 100%|██████████| 5000/5000 [00:01<00:00, 4163.43 examples/s]
Truncating train dataset: 100%|██████████| 5000/5000 [00:00<00:00, 988942.75 examples/s]
Adding EOS to eval dataset: 100%|██████████| 500/500 [00:00<00:00, 15900.40 examples/s]
Tokenizing eval dataset: 100%|██████████| 500/500 [00:00<00:00, 4013.47 examples/s]
Truncating eval dataset: 100%|██████████| 500/500 [00:00<00:00, 242810.24 examples/s]


In [14]:
for name, param in model.named_parameters():
    param.requires_grad = "lora_" in name

# quick check
trainable = [n for n, p in model.named_parameters() if p.requires_grad]
print("Trainable parameters:", trainable)


Trainable parameters: ['base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight', 'base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight', 'base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight', 'base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.weight', 'base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.weight', 'base_model.mod

In [15]:
trainer.train()

# Save model
trainer.save_model("./christAIn")

Step,Training Loss
10,3.3974
20,3.2626
30,3.3435
40,3.1959
50,3.2567
60,3.2816
70,3.0842
80,3.1041
90,3.0005
100,3.0824


In [16]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

model_id = "Qwen/Qwen2.5-0.5B-Instruct"
base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
model = PeftModel.from_pretrained(base_model, "./christAIn")
tokenizer = AutoTokenizer.from_pretrained(model_id)

def ask(question):
    inputs = tokenizer(f"### Question:\n{question}\n\n### Answer (in KJV style):", return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=500, temperature=0.7)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

print(ask("What does the Bible say about love?"))
print(ask("What was the original sin?"))

### Question:
What does the Bible say about love?

### Answer (in KJV style):

**Proverbs 12:8** And when a man loveth, he is not as one that liveth; for his heart is more than flesh. **Proverbs 13:5** And the way of the wicked is to destroy the poor in their time, and to destroy the widow's daughter; but the way of the righteous shall be good, and the ways of the upright shall be blessed. **Proverbs 14:7** The love of money is a root of all evil. **Proverbs 16:29** The wicked will do great wrong, and the wicked will cause great damage. **Proverbs 17:16** Love is the greatest pleasure, and the best thing that ever was seen. **Proverbs 19:10** A friend without a friend is a stranger, and a friend without a friend is a stranger indeed. **Proverbs 24:10** The wise are full of knowledge, and the prudent have understanding. **Proverbs 26:12** Let not your love be so heavy as to make you sad, nor let it be so light as to make you angry. **Proverbs 27:11** He that loves others will love himse

In [21]:
def ask_kjv(question, max_tokens=500):
    inputs = tokenizer(
        f"### Question:\n{question}\n\n### Answer (in KJV style):",
        return_tensors="pt"
    ).to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_tokens,
        temperature=0.5,
        do_sample=True,
        top_p=0.5
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

print(ask_kjv("What does the Bible say about love?"))
print(ask_kjv("What was the original sin?"))

### Question:
What does the Bible say about love?

### Answer (in KJV style):

And the Lord said unto Moses, I have heard thy voice in the wilderness: and behold, I will send thee a cloud to overshadow thee from afar off; and thou shalt see a fire upon the face of the land. And when the fire shall come down, and cover all the land of Egypt, so that there shall be no more water for thee to drink, then shall the people go out of the land of Egypt. And they shall eat the fat of the flock and of the herd, and the flesh of the ox, and the fat of the sheep, and the fat of the venison, and the fat of the fowl of the field, and the fat of the fat of the fowl of the sea, and the fat of the fat of the fowl of the mountains, and the fat of the fat of the fowl of the nest, and the fat of the fat of the fowl of the cave, and the fat of the fat of the fowl of the orchard, and the fat of the fat of the fowl of the vineyard, and the fat of the fat of the fowl of the olive tree, and the fat of the fat 

In [20]:
print(ask_kjv("Are there dinosaurs?"))

### Question:
Are there dinosaurs?

### Answer (in KJV style):

**Thou art a great and mighty God, O Jehovah; the earth is thy dwelling place. Thou art a king in the midst of the earth: thou art a prince upon the face of the waters. The earth is full of thy works, and all that thou hast done. Thy footprints are upon every mountain, and on every hill, and on every plain, and on every river, and on every good land. Thou art a great and powerful God, O Jehovah, and art a mighty warrior. Thou art a king upon the face of the earth, and art a prince upon the face of the waters. Thou art a great and mighty God, O Jehovah, and art a mighty warrior. Thou art a king upon the face of the earth, and art a prince upon the face of the waters. Thou art a great and mighty God, O Jehovah, and art a mighty warrior. Thou art a king upon the face of the earth, and art a prince upon the face of the waters. Thou art a great and mighty God, O Jehovah, and art a mighty warrior. Thou art a king upon the face o

In [None]:
base_model_name = "Qwen/Qwen2.5-0.5B-Instruct"  # Original base model
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

# Load LoRA adapter
lora_model = PeftModel.from_pretrained(model, "./christAIn/")

# Merge and save
merged_model = lora_model.merge_and_unload()
merged_model.save_pretrained("./christAIn-merged/")
tokenizer.save_pretrained("./christAIn-merged/")

print("Model merged and saved!")

Model merged and saved!


: 