In [1]:
%cd /content/drive/MyDrive/ai_gf

/content/drive/MyDrive/ai_gf


In [2]:
!pip install -q bitsandbytes

In [3]:
!pip install -U bitsandbytes



In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import load_dataset, Dataset
import torch
import json

# ✅ Load and preprocess the dataset
with open("/content/drive/MyDrive/ai_gf/girlfriend_question_answer.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

# Filter or clean if needed
conversations = [
    {
        "text": f"### Boyfriend: {item['question'].strip()}\n### Girlfriend: {item['answer'].strip()}"
    }
    for item in raw_data
    if item["question"].strip() and item["answer"].strip()
]

# Convert to Hugging Face Dataset
dataset = Dataset.from_list(conversations)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16"
)

# ✅ Load tokenizer and model
model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# ✅ Enable QLoRA
model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # common for Qwen/DeepSeek
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# ✅ Tokenization
def tokenize(example):
    return tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )

tokenized_data = dataset.map(tokenize, batched=True, remove_columns=["text"])

# ✅ Training args
training_args = TrainingArguments(
    output_dir="deepseek-gf-qlora",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    gradient_accumulation_steps=2,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    eval_strategy="no",
    save_total_limit=2,
    report_to="none"
)

# ✅ Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# ✅ Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data,
    tokenizer=tokenizer,
    data_collator=data_collator
)

# ✅ Start training
trainer.train()

# ✅ Save
model.save_pretrained("deepseek-gf-qlora")
tokenizer.save_pretrained("deepseek-gf-qlora")


Map:   0%|          | 0/480 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,4.7415
20,3.8764
30,3.4136
40,3.1673
50,3.0403
60,2.9241
70,2.9514
80,2.9084
90,2.7341
100,2.7929


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


('deepseek-gf-qlora/tokenizer_config.json',
 'deepseek-gf-qlora/special_tokens_map.json',
 'deepseek-gf-qlora/chat_template.jinja',
 'deepseek-gf-qlora/tokenizer.json')

In [6]:
!zip -r /content/file.zip /content/drive/MyDrive/ai_gf/deepseek-gf-qlora

  adding: content/drive/MyDrive/ai_gf/deepseek-gf-qlora/ (stored 0%)
  adding: content/drive/MyDrive/ai_gf/deepseek-gf-qlora/checkpoint-120/ (stored 0%)
  adding: content/drive/MyDrive/ai_gf/deepseek-gf-qlora/checkpoint-120/README.md (deflated 65%)
  adding: content/drive/MyDrive/ai_gf/deepseek-gf-qlora/checkpoint-120/adapter_model.safetensors (deflated 8%)
  adding: content/drive/MyDrive/ai_gf/deepseek-gf-qlora/checkpoint-120/adapter_config.json (deflated 55%)
  adding: content/drive/MyDrive/ai_gf/deepseek-gf-qlora/checkpoint-120/chat_template.jinja (deflated 75%)
  adding: content/drive/MyDrive/ai_gf/deepseek-gf-qlora/checkpoint-120/tokenizer_config.json (deflated 88%)
  adding: content/drive/MyDrive/ai_gf/deepseek-gf-qlora/checkpoint-120/special_tokens_map.json (deflated 73%)
  adding: content/drive/MyDrive/ai_gf/deepseek-gf-qlora/checkpoint-120/tokenizer.json (deflated 81%)
  adding: content/drive/MyDrive/ai_gf/deepseek-gf-qlora/checkpoint-120/training_args.bin (deflated 51%)
  add

In [7]:
from google.colab import files
files.download("/content/file.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# ✅ Load tokenizer and model
model_path = "/content/drive/MyDrive/ai_gf/deepseek-gf-qlora"  # your trained model directory
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float32).to("cpu")

# ✅ Get user input
while True:
    user_input = input("You (Boyfriend): ")
    if user_input.lower() in ["exit", "quit"]:
        break

    # ✅ Create the prompt
    prompt = f"### Boyfriend: {user_input}\n### Girlfriend:"

    # ✅ Tokenize input
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cpu")

    # ✅ Generate response
    output = model.generate(
        input_ids,
        max_new_tokens=100,
        do_sample=True,
        temperature=0.75,
        top_p=0.9,
        repetition_penalty=1.2
    )

    # ✅ Decode and print response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    response_lines = response.split("\n")
    for line in response_lines:
        if line.strip().lower().startswith("### girlfriend:"):
            print("Her:", line.replace("### Girlfriend:", "").strip())
            break
    else:
        print("Her:", response.strip())


config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

You (Boyfriend): I missed you so much today.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Her: Oh, really? Did you miss me just now before leaving home or something? Yeah, seriously. That's why we call it 'missed'... Or is that a joke? Anyway, whatever the reason was for missing each other, let's try to make up some reasons and see if there's anything else we can do about this thing of ours!
You (Boyfriend): exit
