In [None]:
# !pip install pynvml
import pynvml

pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)  # GPU index (0 for the first GPU)

# GPU memory usage
memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
total_memory = memory_info.total / 1024 / 1024  # Convert to MB
used_memory = memory_info.used / 1024 / 1024  # Convert to MB
free_memory = memory_info.free / 1024 / 1024  # Convert to MB

total_percentage = (total_memory / total_memory) * 100
used_percentage = (used_memory / total_memory) * 100
free_percentage = (free_memory / total_memory) * 100

print("GPU memory total: {} MB".format(total_memory))
print("GPU memory used: {} MB".format(used_memory))
print("GPU memory free: {} MB".format(free_memory))
print("GPU memory total percentage: {:.2f}%".format(total_percentage))
print("GPU memory used percentage: {:.2f}%".format(used_percentage))
print("GPU memory free percentage: {:.2f}%".format(free_percentage))

pynvml.nvmlShutdown()


In [1]:
# !pip install -Uqqq pip --progress-bar off
# !pip install -qqq bitsandbytes==0.39.0 --progress-bar off
# !pip install -qqq torch==2.0.1 --progress-bar off
# !pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc --progress-bar off
# !pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f --progress-bar off
# !pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71 --progress-bar off
# !pip install -qqq datasets==2.12.0 --progress-bar off
# !pip install -qqq loralib==0.1.1 --progress-bar off
# !pip install -qqq einops==0.6.1 --progress-bar off




!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git 
!pip install -q -U git+https://github.com/huggingface/peft.git
# pip install -q -U git+https://github.com/huggingface/accelerate.git
#current version of Accelerate on GitHub breaks QLoRa
#Using standard pip instead
!pip install -q -U accelerate
!pip install -q -U datasets
!pip install scipy

[0m

In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

In [5]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import pandas as pd 
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
# from transformers import (
#     AutoConfig,
#     AutoModelForCausalLM,
#     AutoTokenizer,
#     BitsAndBytesConfig,
# )

os.environ["CUDA_VISIBLE_DEVICES"] = "0"


In [None]:
with open("all_merged.json", "r") as file:
    QA = json.load(file)
    
data_set = [{"question":key, "answer":value} for key,value in QA.items() ]

with open("data_set.json", "w") as f:
    json.dump(data_set, f)


In [None]:
MODEL_NAME = "timdettmers/guanaco-33b-merged"

bnb_config = BitsAndBytesConfig(
    
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_4bit=True,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
            
    print(f"trainable params: {trainable_params} || all params {all_param} ||  trainable%: {100 * trainable_params / all_param}"
         )

In [None]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
config = LoraConfig(
    r=8, 
    lora_alpha=32, 
    target_modules=["query_key_value"], 
    lora_dropout=0.05, 
    bias="none", 
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

In [None]:
import pandas as pd
from datasets import DatasetDict, Dataset

feature_order = ['question', 'answer']

df = pd.DataFrame(data_set)
df = df[feature_order]

# print(df.head())

dataset_dict = DatasetDict({
    'train': Dataset.from_pandas(df),
})

print(dataset_dict)


In [None]:
def generate_prompt(data_point):
    return f"""
<human>: {data_point["question"]}
<assistant>: {data_point["answer"]}
""".strip()

def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
    return tokenized_full_prompt

In [None]:
dataset_dict = dataset_dict["train"].shuffle().map(generate_and_tokenize_prompt)

In [None]:
print(dataset_dict)

In [None]:
OUTPUT_DIR = "experiments"
# !pip install tensorboardX

In [None]:
!pip install tensorboard
# import tensorboard
# %load_ext tensorboard
# %tensoboard --logdir experiments/runs

In [None]:
# from bitsandbytes.optimizer import str2optimizer8bit_blockwise
training_args = transformers.TrainingArguments(
per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    num_train_epochs = 2,
    learning_rate = 2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=1,
    output_dir=OUTPUT_DIR,
    max_steps=8000,
    optim="adagrad",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
#     report_to="tensorboard",
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=dataset_dict,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()

In [None]:
model.save_pretrained("trained_model")

In [None]:
['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_anyprecision', 'sgd', 'adagrad', 'adamw_bnb_8bit', 'adamw_8bit', 'lion_8bit', 'lion_32bit', 'paged_adamw_32bit', 'paged_adamw_8bit', 'paged_lion_32bit', 'paged_lion_8bit']


In [None]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=dataset_dict,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=8,
        gradient_accumulation_steps=8,
        warmup_steps=2,
        max_steps=8000,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir=OUTPUT_DIR,
        optim="adagrad"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
trainer.train()

In [6]:
bnb_config = BitsAndBytesConfig(
    
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

Inference on Trained Model

In [7]:
PEFT_MODEL = "trained_model"

config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

Loading checkpoint shards: 100%|██████████████████| 2/2 [00:13<00:00,  6.85s/it]


In [18]:
generation_config = model.generation_config
generation_config.max_new_tokens=300
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [9]:
DEVICE = "cuda:0"

In [19]:
%%time 

prompt = f"""
<human>: guide me in a step by step process that how to deploy a smart contract on etherscan?
<assistant>:
""".strip()

encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
with torch.inference_mode():
    
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

<human>: guide me in a step by step process that how to deploy a smart contract on etherscan?
<assistant>: As an AI language model, I cannot provide specific instructions on how to deploy a smart contract on Etherscan. However, generally speaking, the process may involve the following steps:

1. Create an account on Etherscan.
2. Create a smart contract on a blockchain platform such as Ethereum or Binance Smart Chain.
3. Deploy the smart contract on the blockchain platform.
4. Connect your wallet to Etherscan and provide the necessary information to view the smart contract.
5. Verify the smart contract and its functionality.
6. Test the smart contract and make any necessary changes.
7. Submit the smart contract for approval on the blockchain platform.
8. Once the smart contract is approved, it will be live on the blockchain network.
9. Monitor the smart contract for any issues or bugs that may arise.

Overall, the process of deploying a smart contract on Etherscan can be complex and ma

In [None]:
def generate_response(question: str) -> str:
    prompt = f"""
    <human>: {question}
    <assistant>:
    """.strip()
    encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    with torch.inference_mode():
        
        outputs = model.generate(
            input_ids=encoding.input_ids,
            attention_mask=encoding.attention_mask,
            generation_confi=generation_config
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    assistant_start = "<assistant>:"
    response_start = response.find(assistant_start)
    return response[response_start + len(assistant_start) :].strip()