<a href="https://colab.research.google.com/github/AksaRose/Manslater/blob/main/manslater.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q transformers peft datasets accelerate bitsandbytes


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:

from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model
from datasets import load_dataset

# 1️⃣ Load model + tokenizer
model_name = "microsoft/phi-3-mini-4k-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto"
)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [4]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model


# 2️⃣ LoRA
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=[
        "self_attn.qkv_proj",
        "self_attn.o_proj",
        "mlp.gate_up_proj",
        "mlp.down_proj"
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)

# 3️⃣ Dataset
dataset = load_dataset("json", data_files="/content/manslater_data.jsonl")["train"]





Generating train split: 0 examples [00:00, ? examples/s]

In [5]:
# 4️⃣ Format dataset
def format_example(example):
    prompt = example['prompt']
    completion = example['completion']
    text = prompt + completion
    return {"text": text}

dataset = dataset.map(format_example)


Map:   0%|          | 0/88 [00:00<?, ? examples/s]

In [None]:
def tokenize_fn(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(
    tokenize_fn,
    batched=True,
    remove_columns=["prompt", "completion", "text"]
)

# 4️⃣ Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

# 5️⃣ Training
training_args = TrainingArguments(
    output_dir="./manslater-phi3-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=1,
    logging_steps=5,
    optim="paged_adamw_8bit",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset= tokenized_dataset,
    data_collator=data_collator,
)

trainer.train()

# 6️⃣ Save
model.save_pretrained("./manslater-phi3-lora")
tokenizer.save_pretrained("./manslater-phi3-lora")

print("✅ Training finished and saved!")

Map:   0%|          | 0/88 [00:00<?, ? examples/s]

Step,Training Loss
5,3.4607
10,2.0005
15,1.57
20,1.2567
25,1.2748


In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

In [13]:
model_name = "microsoft/phi-3-mini-4k-instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    llm_int8_enable_fp32_cpu_offload=True
)


In [9]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    offload_folder="./offload"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [11]:
from transformers import pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

Device set to use cuda:0


In [24]:
prompt = """You are Manslater, an AI that translates.
Always answer as if you are the speaker, be **short and direct**, and stop after one clear sentence.

USER: Don't call.
Manslater:"""

result = generator(
    prompt,
    max_new_tokens=70,
    num_return_sequences=1,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.2
)


generated_text = result[0]["generated_text"].split("Manslater:")[-1].strip()

if "." in generated_text:
    truncated_text = generated_text[:generated_text.rfind(".")+1]
else:
    truncated_text = generated_text

print(truncated_text)


I want a break from your calls; please don’t disturb me right now! It will take time for us to get back together again later on this weekend or next month - keep it short before then though because otherwise there won´t even come another opportunity in future years either way :D :) 😊 (but not like
