In [1]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    Trainer,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import os
import json
import pandas as pd
import gc

In [2]:
from accelerate import PartialState
device_string = PartialState().process_index

In [3]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 2

# Batch size per GPU for evaluation
per_device_eval_batch_size = 2

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {'':device_string}

In [4]:
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [5]:
model_name = "NingLab/eCeLLM-L"

In [6]:
def get_response(prompt):
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map=device_map
    )
    model.config.use_cache = False
    model.config.pretraining_tp = 1

    # Load LLaMA tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training


    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=4000)
    result = pipe(f"{prompt}'### Output':")
    result = result[0]['generated_text']

    del model
    del tokenizer
    gc.collect()
    gc.collect()
    return result

In [7]:
import json

with open("./data/Clustering.json", "r") as f:
    clustering = json.load(f)

with open("./data/Preferences.json", "r") as f:
    Preferences = json.load(f)
    
with open("./data/Insurance.json", "r") as f:
    Insurance = json.load(f)

In [8]:
import random
#Create a prompt
keys_cluster = list(clustering.keys())

get_random_keys = random.sample(keys_cluster, 3)

values = []
for keys in get_random_keys:
    values.append(clustering[keys]["tags"])

final = []
for subarray in values:
    final.extend(subarray)

final = ', '.join(final)

preferences = []
for key in Preferences.keys():
    preferences.append(key)

preferences = ', '.join(preferences)

insurances = []
for key in Insurance.keys():
    insurances.append(key)

insurances = ', '.join(insurances)


prompt3 = f"What is the next product that the user would be interested in? The user's interests are '{final}'. The possible options are '{insurances}'"
prompt2 = f"What is the next product that the user would be interested in? The user's interests are '{final}'." 
prompt = f"What is the next product that the user would be interested in? The user's interests are '{final}'. The possible options are '{preferences}'"


In [9]:
with open("./output/prompt.txt", "w") as f:
    f.write(prompt)
    f.write("\n\n")
    f.write("Output: \n" + get_response(prompt).split("Output")[1])



Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [10]:
with open("./output/prompt2.txt", "w") as f:
    f.write(prompt)
    f.write("\n\n")
    f.write("Output: \n" + get_response(prompt2).split("Output")[1])

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [11]:
with open("./output/prompt3.txt", "w") as f:
    f.write(prompt)
    f.write("\n\n")
    f.write("Output: \n" + get_response(prompt3).split("Output")[1])

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]