# Installing Dependencies

In [None]:
%%capture
!pip install unsloth evaluate transformers nltk trl bitsandbytes peft
!pip install sympy --upgrade
!pip install llama-cpp-python
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
# Import all necessary dependencies
from unsloth import FastLanguageModel
import torch
import pandas as pd
from datasets import load_dataset, Dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from llama_cpp import Llama

In [None]:
# Login to Huggingface to interact and push the models
from huggingface_hub import login
from google.colab import userdata
# Authenticate using Hugging Face token for accessing gated models and upload the model to hub
hf_token = userdata.get("hf_token")
login(token=hf_token)

# Model training

In [None]:
# Load the pre-trained base model using the FastLanguageModel as was mentioned in unsloth nnotebook
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-bnb-4bit",
    max_seq_length = 512,
    dtype = None,          # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = True,   # Use 4bit quantization to reduce memory usage. Can be False.
)

In [None]:
# Configure and apply PEFT (Parameter-Efficient Fine-Tuning) to the model
model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 12
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
                      # only unfreezes these parameters for training, can also add more based on your model
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
# Defined a input prompt for chinese translation purpose
translation_prompt = """Below is a Chinese text that needs to be translated into English.

### Chinese Text:
{}

### English Translation:
{}"""
# Placeholder '{}' is used for formatting the Chinese text and English translation

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
# this function takes dataset with chinese and english columns, creates a translation prompt then saves them to text column
    chinese = examples["Chinese"]
    english = examples["English"]
    texts = []
    wtexts = []
    for chinese, english in zip(chinese, english):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = translation_prompt.format(chinese, english) + EOS_TOKEN    # this is prompt with both english and chinese texts, used for training
        wtext = translation_prompt.format(chinese, "")                    # this is prompt with chinese text only, used for inferencing and evaluation
        texts.append(text)
        wtexts.append(wtext)
    return { "text" : texts, "wtext" : wtexts}                            # Add both texts to the dataset


In [None]:
from sklearn.model_selection import train_test_split

# Load the custom dataset
dataset = pd.read_csv("/content/translations_dataset.csv")

# Sample 45000 rows for processing (adjust as needed for system capacity)
dataset = dataset.sample(n=45000, random_state=42)

# Split the dataset into training and evaluation datasets (80-20 split)
train_df, eval_df = train_test_split(dataset, test_size=0.01, random_state=42)

# Convert to Hugging Face Dataset format
train_dataset = Dataset.from_pandas(train_df)
eval_dataset = Dataset.from_pandas(eval_df)

# Apply formatting function to both datasets
train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
eval_dataset = eval_dataset.map(formatting_prompts_func, batched=True)

# Output dataset sizes
print(f"Training dataset size: {len(train_dataset)}")
print(f"Evaluation dataset size: {len(eval_dataset)}")


In [None]:
train_dataset

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=512,
    dataset_num_proc = 4,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 40,
        gradient_accumulation_steps = 3,
        warmup_steps = 5,
        #num_train_epochs = 2, # Set this for 1 full training run.
        max_steps = 170,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
trainer_stats = trainer.train()

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    translation_prompt.format(
        "尤其，这是孟浩在第七命下展开的七婴图腾，威力之强，轰天撼地。", # chinese
        "", # english - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True)
tokenizer.batch_decode(outputs)

# Save and Upload the Model

To save the final model as LoRA adapters, either use Huggingface's push_to_hub for an online save or save_pretrained for a local save.

[NOTE] This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!

In [None]:
# Save locally and on hub as GGUF (8-bit Q8_0 quantization)
# change the model as the correct version
model.save_pretrained("llama3.1_zhtoen_translation_v4") # Local saving
tokenizer.save_pretrained("llama3.1_zhtoen_translation_v4")

model.push_to_hub("Rawsand/llama3.1_zhtoen_translation_v4") # Online saving
tokenizer.push_to_hub("Rawsand/llama3.1_zhtoen_translation_v4") # Online saving

In [None]:
# Save locally and on hub as 16bit
model.save_pretrained_merged("model_name", tokenizer, save_method = "merged_16bit",)
model.push_to_hub_merged("username/model_name", tokenizer, save_method = "merged_16bit", token = "")

In [None]:
# Save locally and on hub as GGUF (8-bit Q8_0 quantization)
model.save_pretrained_gguf("llama3.1_zhtoen_translation_v4_gguf", tokenizer)
model.push_to_hub_gguf("Rawsand/llama3.1_zhtoen_translation_v4_gguf", tokenizer)

# Evaluation

In [None]:
# If  you did not train the model you can just call the previous version

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Rawsand/llama3.1_zhtoen_translation_v4",
    max_seq_length = 512,
    dtype = None,
    load_in_4bit = True,
)
model = FastLanguageModel.for_inference(model)

In [None]:
# Defined a input prompt for chinese translation purpose
translation_prompt = """Below is a Chinese text that needs to be translated into English.

### Chinese Text:
{}

### English Translation:
{}"""
# Placeholder '{}' is used for formatting the Chinese text and English translation

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
# this function takes dataset with chinese and english columns, creates a translation prompt then saves them to text column
    chinese = examples["Chinese"]
    english = examples["English"]
    texts = []
    wtexts = []
    for chinese, english in zip(chinese, english):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = translation_prompt.format(chinese, english) + EOS_TOKEN    # this is prompt with both english and chinese texts, used for training
        wtext = translation_prompt.format(chinese, "")                    # this is prompt with chinese text only, used for inferencing and evaluation
        texts.append(text)
        wtexts.append(wtext)
    return { "text" : texts, "wtext" : wtexts}                            # Add both texts to the dataset

In [None]:
from sklearn.model_selection import train_test_split

# Load the custom dataset
dataset = pd.read_csv("/content/translations_dataset.csv")

# Sample 45000 rows for processing (adjust as needed for system capacity)
dataset = dataset.sample(n=45000, random_state=42)

# Split the dataset into training and evaluation datasets (80-20 split)
train_df, eval_df = train_test_split(dataset, test_size=0.01, random_state=42)

# Convert to Hugging Face Dataset format
train_dataset = Dataset.from_pandas(train_df)
eval_dataset = Dataset.from_pandas(eval_df)

# Apply formatting function to both datasets
train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
eval_dataset = eval_dataset.map(formatting_prompts_func, batched=True)

# Output dataset sizes
print(f"Training dataset size: {len(train_dataset)}")
print(f"Evaluation dataset size: {len(eval_dataset)}")


In [None]:
# Assuming `validation_dataset` is your Hugging Face Dataset object
inputs = eval_dataset["wtext"]      # Get the Chinese only text prompts
references = eval_dataset["text"]   # Get the English and chinese text prompt as references, which are considered as desired output

# Generate predictions
predictions = []
for text in inputs:
    inputs = tokenizer(text, return_tensors="pt").to("cuda")                    # tokenize the output and load it to gpu
    outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True)  # generate the output labels
    prediction = tokenizer.batch_decode(outputs)                                # detokenize the labels to output
    predictions.append(prediction)

In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
nltk.download('punkt_tab')

bleu_scores = []
for ref, pred in zip(references, predictions):
    # Tokenize the reference and prediction
    reference_tokens = nltk.word_tokenize(ref)        # Tokenize the reference
    prediction_tokens = nltk.word_tokenize(pred[0])   # Tokenize the prediction

    bleu_score = sentence_bleu([reference_tokens], prediction_tokens) # get the bleu score
    bleu_scores.append(bleu_score)

# Print results
for bleu in bleu_scores:
    print(f"BLEU Score: {bleu:.4f}")

# Calculate average BLEU score
average_bleu = sum(bleu_scores) / len(bleu_scores)
print(f"Average BLEU Score: {average_bleu:.4f}")

# Model Inferencing using Llama-cpp

In [None]:
# Import the Llama model from the pre-trained weights
llm = Llama.from_pretrained(
    repo_id="Rawsand/llama3.1_zhtoen_translation_v4_gguf",  # Hugging Face repository ID for the pre-trained model
    filename="unsloth.Q8_0.gguf",  # The filename of the quantized model file
)

In [None]:
# Define the translation prompt template for translating Chinese text to English
translation_prompt = """Below is a Chinese text that needs to be translated into English.

### Chinese Text:
{}

### English Translation:
{}"""  # Placeholder '{}' is used for formatting the Chinese text and English translation

# Function to format prompts for translation, without providing the English translation initially
def formatting_prompts_func_without_english(chinese_texts):
    """
    Format Chinese text inputs into the translation prompt template.

    Args:
        chinese_texts (list): List of Chinese text strings to be translated.

    Returns:
        list: A list of formatted prompt strings ready for input to the LLM.
    """
    texts = []  # Initialize an empty list to store formatted prompts
    for chinese_text in chinese_texts:  # Iterate over each Chinese text
        # Format the prompt by filling in the Chinese text, leaving the English translation blank
        text = translation_prompt.format(chinese_text, "")
        texts.append(text)  # Add the formatted prompt to the list
    return texts  # Return the list of formatted prompts

In [None]:
# Sample Chinese texts to be translated
texts = [
    "你的模型可以翻译中文吗？",  # "Can your model translate Chinese?"
    "在这面具出现的刹那，忽然的，皮冻那里猛地睁大了眼，拍打着翅膀飞起，绕着孟浩飞了几圈，口中传出聒噪之声。"  # A more complex Chinese sentence
]

In [None]:
# Use the formatting function to prepare the prompts
prompt = formatting_prompts_func_without_english(texts)

# Generate the output for the first prompt using the pre-trained model
output = llm(
    prompt[0],
    max_tokens=512,  # Maximum number of tokens to generate in the output
    echo=True  # Include the prompt in the output for reference
)

# print the response
output['choices'][0]['text']