In [2]:
import torch

hyper_params = {
    # Model hyperparameters
    "max_seq_length": 4096, # 8192 | Choose any! We auto support RoPE Scaling internally!
    "dtype": None, # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    "load_in_4bit": True, # Use 4bit quantization to reduce memory usage. Can be False.,
    "model_name": "unsloth/gemma-2b-it-bnb-4bit",
    "r": 8, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128,
    "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",], # Add more to target more modules
    "lora_alpha": 16,
    "lora_dropout": 0, # Supports any, but = 0 is optimized
    "lora_bias": "none", # Supports any, but = "none" is optimized
    "lora_use_gradient_checkpointing": "unsloth", # True or "unsloth" for very long context
    "lora_random_state": 3407,
    "lora_use_rslora": False, # We support rank stabilized LoRA
    "lora_loftq_config": None, # And LoftQ
    # Training hyperparameters
    "dataset_train_path": "./data/gemma_chat_train",
    "dataset_eval_path": "./data/gemma_chat_eval",
    "per_device_train_batch_size": 2,
    "gradient_accumulation_steps": 1,
    "warmup_steps": 15, # will replace num_warmup_steps in lr_scheduler_kwargs
    # "num_train_epochs": 1, # choose between num_train_epochs and max_steps
    "max_steps": 60, # choose between num_train_epochs and max_steps
    "learning_rate": 1e-4,
    "fp16": not torch.cuda.is_bf16_supported(),
    "bf16": torch.cuda.is_bf16_supported(),
    "logging_steps": 1,
    "optim": "adamw_8bit",
    "weight_decay": 0.01,
    "lr_scheduler_type": "cosine_with_restarts",
    "lr_scheduler_kwargs": {"num_cycles": 2}, # "num_warmup_steps" and "num_training_steps" will be added automatically
    "seed": 3407,
}

In [3]:
from unsloth import FastLanguageModel

max_seq_length = hyper_params["max_seq_length"] # 8192 | Choose any! We auto support RoPE Scaling internally!
dtype = hyper_params["dtype"] # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = hyper_params["load_in_4bit"] # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    "unsloth/llama-2-7b-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit",
    "unsloth/gemma-7b-it-bnb-4bit", # Instruct version of Gemma 7b
    "unsloth/gemma-2b-bnb-4bit",
    "unsloth/gemma-2b-it-bnb-4bit", # Instruct version of Gemma 2b
] # More models at https://huggingface.co/unsloth

model, tokenizer= FastLanguageModel.from_pretrained(
    model_name = hyper_params["model_name"],
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth: Fast Gemma patching release 2024.4
   \\   /|    GPU: Tesla T4. Max memory: 14.581 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.2. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.25.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


In [4]:
# load meta movies
import os
import json

# read ./data/asins_small.json
with open('./data/asins_small.json', 'r') as f:
    asins_small = json.load(f)

In [5]:
movie_titles = []
for asin in asins_small:
    ## movie asin
    #print(asin)
    ## movie title:
    #print(asins_small[asin][0])
    ## movie description:
    #print(' '.join(asins_small[asin][1]))
    tokens = tokenizer(asins_small[asin][0], return_tensors="pt")
    # movie_titles.append((tokens.input_ids[0][1:].tolist(), asins_small[asin][0]))
    movie_titles.append(tokens.input_ids[0][1:].tolist())

In [53]:
movie_titles[2834], movie_titles[1234]

([62631, 576, 37295],
 [56170,
  235303,
  235256,
  3855,
  235248,
  235284,
  591,
  235274,
  235315,
  235315,
  235304,
  235275,
  591,
  9739,
  235275,
  892,
  41330,
  235290,
  1040,
  235307])

In [6]:
# construct the token tree (trie) for the movie descriptions
# the root must be the start of sentence token
# the child nodes must be the tokens that follow the parent node in the sentence
# movie_titles is the list of lists of tokens
class TrieNode:
    def __init__(self, token):
        self.token = token
        self.children = {}

def construct_token_tree(movie_titles, bos_token):
    root = TrieNode(bos_token)  # Root node is the start of sentence token

    for title in movie_titles:
        current_node = root
        for token in title:
            if token not in current_node.children:
                current_node.children[token] = TrieNode(token)
            current_node = current_node.children[token]

    return root

token_tree = construct_token_tree(movie_titles, tokenizer.bos_token)

In [10]:
token_tree.children[36911].token

36911