In [None]:
#!/usr/bin/env python

"""clip_classes.py: Contains the CLIPDataset and CLIPTrainer wrapper"""

__author__ = "Christoper Alexander"
__copyright__ = "Copyright 2023"
__credits__ = ["Andrew D'Amico", "Christoper Alexander", "Katya Nosulko", "Vivek Chamala", "Matthew Conger"]
__license__ = ""
__version__ = "0.0.1"
__maintainer__ = "Andrew Damico"
__email__ = "andrew.damico@u.northwestern.edu"

In [1]:
import torch
import transformers
from datasets import load_dataset
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
)
from transformers import LlamaTokenizer, LlamaForCausalLM, pipeline

  from .autonotebook import tqdm as notebook_tqdm



Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/ubuntu/miniconda3/envs/transformers/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 116
CUDA SETUP: Loading binary /home/ubuntu/miniconda3/envs/transformers/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda116.so...


  warn(msg)


In [2]:
tokenizer = LlamaTokenizer.from_pretrained("elinas/llama-7b-hf-transformers-4.29")
tokenizer.pad_token_id = (
    0  # unk. we want this to be different from the eos token
)

In [3]:
model = LlamaForCausalLM.from_pretrained(
    "elinas/llama-7b-hf-transformers-4.29",
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:38<00:00, 19.01s/it]


In [4]:
model = prepare_model_for_int8_training(model)

In [5]:
config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

In [6]:
model = get_peft_model(model, config)

In [7]:
model.print_trainable_parameters()

trainable params: 4194304 || all params: 6742609920 || trainable%: 0.06220594176090199


In [8]:
# Load the datasets from the text files
dataset = load_dataset("text", data_files={"train": "train.txt", "test": "test.txt"})

Downloading and preparing dataset text/default to /home/ubuntu/.cache/huggingface/datasets/text/default-d2a13576f1036f22/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2...


Downloading data files: 100%|██████████| 2/2 [00:00<00:00, 8380.23it/s]
Extracting data files: 100%|██████████| 2/2 [00:00<00:00, 1429.06it/s]
                                                                   

Dataset text downloaded and prepared to /home/ubuntu/.cache/huggingface/datasets/text/default-d2a13576f1036f22/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2. Subsequent calls will reuse this data.


100%|██████████| 2/2 [00:00<00:00, 25.80it/s]


In [9]:
# Tokenize the dataset
train_dataset = dataset["train"].map(
    lambda examples: tokenizer(examples["text"], max_length=2048, truncation=True, padding="max_length"), batched=True
)
test_dataset = dataset["test"].map(
    lambda examples: tokenizer(examples["text"], max_length=2048, truncation=True, padding="max_length"), batched=True
)

                                                                  

In [10]:
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [11]:
training_args = transformers.TrainingArguments(
    output_dir="test_llama_7b_2",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    eval_steps=100,
    save_steps=100,
    warmup_steps=50,
    prediction_loss_only=True,
    logging_dir="logs",
    fp16=True,  # Enable mixed precision training
    half_precision_backend="auto",  # Set the backend for mixed precision training
)

In [12]:
trainer = transformers.Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

In [13]:
model.config.use_cache = False

In [14]:
old_state_dict = model.state_dict
model.state_dict = (
    lambda self, *_, **__: get_peft_model_state_dict(
        self, old_state_dict()
    )
).__get__(model, type(model))

In [None]:
trainer.train()



Step,Training Loss


In [16]:
train_dataset

Dataset({
    features: ['text', 'input_ids', 'attention_mask'],
    num_rows: 12477
})

In [17]:
model.save_pretrained("llama_7b_finetuned")

In [19]:
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForCausalLM', 'RoCBertForCausalLM', 'RoFormerForCausalLM', 'Speech2

In [35]:
test = generator("Tumor discovered", max_length=300, num_return_sequences=2, do_sample=True, num_beams=2)

In [36]:
print(test[0]["generated_text"])

Tumor discovered on the surface of the oesophagus. The patient is a 60-year-old man who complains of a 2-year history of a progressive dysphagia. The patient underwent an endoscopic examination, which revealed a dysplastic lesion in the oesophagus. The patient underwent an oesophageal resection. The pathological examination revealed an oesophageal adenocarcinoma with a high-grade dysplasia. The patient was treated with adjuvant chemotherapy and radiotherapy. The prognosis of oesophageal adenocarcinoma is poor due to the fact that it is usually diagnosed at a late stage. The 5-year survival rate is only 10-20%. Oesophageal adenocarcinoma is a neoplasm of the oesophageal mucosa. It is the most common type of oesophageal cancer, accounting for 50-70% of all oesophageal cancers. Oesophageal adenocarcinoma is most commonly diagnosed in the 6th and 7th decades of life. The most common symptoms of oesophageal adenocarcinoma are dysphagia


In [37]:
print(test[1]["generated_text"])

Tumor discovered in the liver. It was 1.5 cm in size. The liver was removed and sent to the pathology department. The pathology report showed metastasis of the colon adenocarcinoma to the liver. The patient was diagnosed with metastatic colon adenocarcinoma to the liver. The patient was treated with palliative chemotherapy. The patient died 2 months after the diagnosis of liver metastasis. Colorectal cancer is one of the most common cancers in the world. It is the third most common cause of cancer-related deaths. Colorectal cancer is one of the most common cancers in the world. It is the third most common cause of cancer-related deaths. Colorectal cancer is one of the most common cancers in the world. It is the third most common cause of cancer-related deaths. Colorectal cancer is one of the most common cancers in the world. It is the third most common cause of cancer-related deaths. Colorectal cancer is one of the most common cancers in the world. It is the third most common cause of 