In [None]:
!pip install -q accelerate==0.21.0 --progress-bar off
!pip install -q peft==0.4.0 --progress-bar off
!pip install -q bitsandbytes==0.40.2 --progress-bar off
!pip install -q transformers==4.31.0 --progress-bar off
!pip install -q trl==0.4.7 --progress-bar off

In [None]:
import os
from random import randrange
from functools import partial
import torch
from datasets import load_dataset
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          HfArgumentParser,
                          Trainer,
                          TrainingArguments,
                          DataCollatorForLanguageModeling,
                          EarlyStoppingCallback,
                          pipeline,
                          logging,
                          set_seed)

import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel, AutoPeftModelForCausalLM
from trl import SFTTrainer
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Hugging Face Hub Login

Meta's family of Llama 2 models is gated. You will require approval to access it using the Hugging Face Hub.

Below are the steps to request permission for the Llama-2-7B model:
1. Get approval from Hugging Face (https://huggingface.co/meta-llama/Llama-2-7b-hf).
2. Get approval from Meta (https://ai.meta.com/resources/models-and-libraries/llama-downloads/).
3. Create a WRITE access token on Hugging Face (https://huggingface.co/settings/tokens).
4. Execute `!huggingface-cli login` in Google Colab Notebook, enter the token, and enter "Y."

Note: Make sure your email address on your Hugging Face account is the same as the one you enter on Meta's website for approval.

If you don't want to perform the above steps, use a cloned version of Llama-2-7B, such as https://huggingface.co/daryl149/llama-2-7b-chat-hf. Additionally, you'll have to set `use_auth_token` to `False` while loading the model and its tokenizer.

In [None]:
def create_bnb_config(load_in_4bit, bnb_4bit_use_double_quant, bnb_4bit_quant_type, bnb_4bit_compute_dtype):
    """
    Configures model quantization method using bitsandbytes to speed up training and inference

    :param load_in_4bit: Load model in 4-bit precision mode
    :param bnb_4bit_use_double_quant: Nested quantization for 4-bit model
    :param bnb_4bit_quant_type: Quantization data type for 4-bit model
    :param bnb_4bit_compute_dtype: Computation data type for 4-bit model
    """

    bnb_config = BitsAndBytesConfig(
        load_in_4bit = load_in_4bit,
        bnb_4bit_use_double_quant = bnb_4bit_use_double_quant,
        bnb_4bit_quant_type = bnb_4bit_quant_type,
        bnb_4bit_compute_dtype = bnb_4bit_compute_dtype,
    )

    return bnb_config

In [None]:
def load_model(model_name, bnb_config):
    """
    Loads model and model tokenizer

    :param model_name: Hugging Face model name
    :param bnb_config: Bitsandbytes configuration
    """

    # Get number of GPU device and set maximum memory
    n_gpus = torch.cuda.device_count()
    max_memory = f'{40960}MB'

    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config = bnb_config,
        device_map = "auto", # dispatch the model efficiently on the available resources
        max_memory = {i: max_memory for i in range(n_gpus)},
    )

    # Load model tokenizer with the user authentication token
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token = True)

    # Set padding token as EOS token
    tokenizer.pad_token = tokenizer.eos_token

    return model, tokenizer

In [None]:
################################################################################
# transformers parameters
################################################################################

# The pre-trained model from the Hugging Face Hub to load and fine-tune
model_name = "TinyPixel/Llama-2-7B-bf16-sharded"

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
load_in_4bit = True

# Activate nested quantization for 4-bit base models (double quantization)
bnb_4bit_use_double_quant = True

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Compute data type for 4-bit base models
bnb_4bit_compute_dtype = torch.bfloat16

In [None]:


import pandas as pd
from sklearn.model_selection import train_test_split

# Read the original dataset
df = pd.read_csv('/content/drive/MyDrive/alldata_1_for_kaggle.csv', encoding='ISO-8859-1')
df.head()



Unnamed: 0.1,Unnamed: 0,0,a
0,0,Thyroid_Cancer,Thyroid surgery in children in a single insti...
1,1,Thyroid_Cancer,""" The adopted strategy was the same as that us..."
2,2,Thyroid_Cancer,coronary arterybypass grafting thrombosis ï¬b...
3,3,Thyroid_Cancer,Solitary plasmacytoma SP of the skull is an u...
4,4,Thyroid_Cancer,This study aimed to investigate serum matrix ...


In [None]:
df= df.drop (columns=['Unnamed: 0'])
df.columns = ['label', 'text']
df.head()

Unnamed: 0,label,text
0,Thyroid_Cancer,Thyroid surgery in children in a single insti...
1,Thyroid_Cancer,""" The adopted strategy was the same as that us..."
2,Thyroid_Cancer,coronary arterybypass grafting thrombosis ï¬b...
3,Thyroid_Cancer,Solitary plasmacytoma SP of the skull is an u...
4,Thyroid_Cancer,This study aimed to investigate serum matrix ...


In [None]:
len(df)

7570

In [None]:
df.drop_duplicates(subset="text", keep="first", inplace=True)
len(df)

996

In [None]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)  # 80% training, 20% testing

# Save the splits into separate CSV files
train_df.to_csv('/content/drive/MyDrive/train_data_new.csv', index=False)
test_df.to_csv('/content/drive/MyDrive/test_data_new.csv', index=False)

In [None]:
import pandas as pd
train_df= pd.read_csv('/content/drive/MyDrive/train_data_new.csv',encoding='ISO-8859-1')
test_df= pd.read_csv ('/content/drive/MyDrive/test_data_new.csv', encoding='ISO-8859-1')

In [None]:
train_df.head()

Unnamed: 0,label,text
0,Lung_Cancer,""" Despite previous investigations it remains u..."
1,Colon_Cancer,""" the influence of anastomotic leakage al on ..."
2,Lung_Cancer,"""About half (49%) of UNCeqRMETA mutations had ..."
3,Colon_Cancer,"""circadian clocks have important physiological..."
4,Lung_Cancer,"""Thymidylate synthase (TS) gene expression in ..."


In [None]:
len(train_df), len(test_df)

(796, 200)

In [None]:
train_texts = train_df['text'].tolist()
train_labels = train_df['label'].tolist()
#idx2label = {'Colon_Cancer': 0, 'Lung_Cancer': 1, 'Thyroid_Cancer': 2}
dataset_data = []

# Assuming you have train_texts and train_labels defined somewhere
# in your code

for text, label in zip(train_texts, train_labels):
   # label = list(idx2label.keys())[list(idx2label.values()).index(label_idx)]

    entry = {
        "instruction": "Categorize the input article into one of the three categories\nColon_Cancer\nLung_Cancer\nThyroid_Cancer\n\n.",
        "input": text,
        "output": label
    }
    dataset_data.append(entry)

# Now, dataset_data contains entries with class names instead of encoded labels

# Import the necessary library
import csv

# Define the CSV file name
csv_file = 'dataset.csv'

# Write the dataset_data to a CSV file
with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    fieldnames = ['instruction', 'input', 'output']
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    for entry in dataset_data:
        writer.writerow(entry)

print(f"CSV file '{csv_file}' has been created.")

#test_texts = test_df['text'].tolist()
#test_labels = test_df['label'].tolist()

CSV file 'dataset.csv' has been created.


In [None]:
import pandas as pd


In [None]:
import pandas as pd
dataset_name='/content/dataset.csv'
data= pd.read_csv('/content/dataset.csv',encoding='ISO-8859-1')


In [None]:
data.head()

Unnamed: 0,instruction,input,output
0,Categorize the input article into one of the t...,""" Despite previous investigations it remains u...",Lung_Cancer
1,Categorize the input article into one of the t...,""" the influence of anastomotic leakage al on ...",Colon_Cancer
2,Categorize the input article into one of the t...,"""About half (49%) of UNCeqRMETA mutations had ...",Lung_Cancer
3,Categorize the input article into one of the t...,"""circadian clocks have important physiological...",Colon_Cancer
4,Categorize the input article into one of the t...,"""Thymidylate synthase (TS) gene expression in ...",Lung_Cancer


In [None]:
# Load dataset
dataset = load_dataset("csv", data_files = dataset_name, split = "train")

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
print(f'Number of prompts: {len(dataset)}')
print(f'Column names are: {dataset.column_names}')

Number of prompts: 796
Column names are: ['instruction', 'input', 'output']


In [None]:
def create_prompt_formats(sample):
    """
    Creates a formatted prompt template for a prompt in the instruction dataset

    :param sample: Prompt or sample from the instruction dataset
    """

    # Initialize static strings for the prompt template
    INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
    INSTRUCTION_KEY = "### Instruction:"
    INPUT_KEY = "Input:"
    RESPONSE_KEY = "### Response:"
    END_KEY = "### End"

    # Combine a prompt with the static strings
    blurb = f"{INTRO_BLURB}"
    instruction = f"{INSTRUCTION_KEY}\n{sample['instruction']}"
    input_context = f"{INPUT_KEY}\n{sample['input']}" if sample["input"] else None
    response = f"{RESPONSE_KEY}\n{sample['output']}"
    end = f"{END_KEY}"

    # Create a list of prompt template elements
    parts = [part for part in [blurb, instruction, input_context, response, end] if part]

    # Join prompt template elements into a single string to create the prompt template
    formatted_prompt = "\n\n".join(parts)

    # Store the formatted prompt template in a new key "text"
    sample["text"] = formatted_prompt

    return sample

In [None]:
create_prompt_formats(dataset[randrange(len(dataset))])

{'instruction': 'Categorize the input article into one of the three categories\nColon_Cancer\nLung_Cancer\nThyroid_Cancer\n\n.',
 'input': '"The lower replication rate of adipose meQTLs in whole-blood samples6 might be explained by the heterogeneity of different cell types in whole blood and by their more liberal P-value threshold (8.6Â\x9710?4) which led to the identification of a large number of weak cis-meQTLs. Compared with cis-regulation trans-eQTL regulation is typically considered to be more complex has smaller effect sizes and is more difficult to be replicated even in the same tissue. However in our study the lung trans-meQTLs are highly reproducible in TCGA lung breast and kidney tissues. Notably this similarity allows mapping meQTLs with substantially improved power by borrowing strength across tissues51. meQTL SNPs are strongly associated with multiple epigenetic marks. Chromatin regulators play a role in maintaining genomic integrity and anization52. We found that meQTL SN

In [None]:
def get_max_length(model):
    """
    Extracts maximum token length from the model configuration

    :param model: Hugging Face model
    """

    # Pull model configuration
    conf = model.config
    # Initialize a "max_length" variable to store maximum sequence length as null
    max_length = None
    # Find maximum sequence length in the model configuration and save it in "max_length" if found
    for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max lenth: {max_length}")
            break
    # Set "max_length" to 1024 (default value) if maximum sequence length is not found in the model configuration
    if not max_length:
        max_length = 1024
        print(f"Using default max length: {max_length}")
    return max_length

In [None]:
def preprocess_batch(batch, tokenizer, max_length):
    """
    Tokenizes dataset batch

    :param batch: Dataset batch
    :param tokenizer: Model tokenizer
    :param max_length: Maximum number of tokens to emit from the tokenizer
    """

    return tokenizer(
        batch["text"],
        max_length = max_length,
        truncation = True,
    )

In [None]:
def preprocess_dataset(tokenizer: AutoTokenizer, max_length: int, seed, dataset: str):
    """
    Tokenizes dataset for fine-tuning

    :param tokenizer (AutoTokenizer): Model tokenizer
    :param max_length (int): Maximum number of tokens to emit from the tokenizer
    :param seed: Random seed for reproducibility
    :param dataset (str): Instruction dataset
    """

    # Add prompt to each sample
    print("Preprocessing dataset...")
    dataset = dataset.map(create_prompt_formats)

    # Apply preprocessing to each batch of the dataset & and remove "instruction", "input", "output", and "text" fields
    _preprocessing_function = partial(preprocess_batch, max_length = max_length, tokenizer = tokenizer)
    dataset = dataset.map(
        _preprocessing_function,
        batched = True,
        remove_columns = ["instruction", "input", "output", "text"],
    )

    # Filter out samples that have "input_ids" exceeding "max_length"
    dataset = dataset.filter(lambda sample: len(sample["input_ids"]) < max_length)

    # Shuffle dataset
    dataset = dataset.shuffle(seed = seed)

    return dataset

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) Y
Token is valid (permission: read).
[1m[31mCannot authenticate throu

In [None]:
# Load model from Hugging Face Hub with model name and bitsandbytes configuration

bnb_config = create_bnb_config(load_in_4bit, bnb_4bit_use_double_quant, bnb_4bit_quant_type, bnb_4bit_compute_dtype)

model, tokenizer = load_model(model_name, bnb_config)

Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]



In [None]:
# Random seed
seed = 33

max_length = get_max_length(model)
preprocessed_dataset = preprocess_dataset(tokenizer, max_length, seed, dataset)

Found max lenth: 2048
Preprocessing dataset...


Map:   0%|          | 0/796 [00:00<?, ? examples/s]

Map:   0%|          | 0/796 [00:00<?, ? examples/s]

Filter:   0%|          | 0/796 [00:00<?, ? examples/s]

In [None]:
print(preprocessed_dataset)

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 111
})


In [None]:
print(preprocessed_dataset[0])

{'input_ids': [1, 13866, 338, 385, 15278, 393, 16612, 263, 3414, 29889, 14350, 263, 2933, 393, 7128, 2486, 1614, 2167, 278, 2009, 29889, 13, 13, 2277, 29937, 2799, 4080, 29901, 13, 29907, 20440, 675, 278, 1881, 4274, 964, 697, 310, 278, 2211, 13997, 13, 1625, 265, 29918, 6028, 2265, 13, 29931, 686, 29918, 6028, 2265, 13, 1349, 29891, 1007, 29918, 6028, 2265, 13, 13, 29889, 13, 13, 4290, 29901, 13, 29908, 23036, 26229, 310, 278, 7744, 573, 323, 3038, 2933, 9251, 756, 1063, 28585, 304, 19224, 2820, 2462, 29871, 29896, 29900, 30212, 30344, 29896, 29946, 1400, 325, 5753, 3381, 411, 2087, 29899, 29902, 29875, 29899, 19903, 470, 2087, 29899, 19903, 372, 471, 10579, 15668, 304, 1284, 393, 3517, 325, 5753, 3381, 1258, 451, 4556, 738, 4328, 297, 18530, 4603, 1090, 1438, 5855, 518, 29896, 29906, 29962, 518, 29906, 29896, 1822, 2398, 445, 3117, 9432, 29879, 393, 372, 4893, 777, 931, 363, 278, 325, 5753, 457, 29899, 19910, 1133, 3677, 2101, 29899, 14940, 323, 9101, 304, 18414, 5987, 297, 8002, 369

In [None]:
def create_peft_config(r, lora_alpha, target_modules, lora_dropout, bias, task_type):
    """
    Creates Parameter-Efficient Fine-Tuning configuration for the model

    :param r: LoRA attention dimension
    :param lora_alpha: Alpha parameter for LoRA scaling
    :param modules: Names of the modules to apply LoRA to
    :param lora_dropout: Dropout Probability for LoRA layers
    :param bias: Specifies if the bias parameters should be trained
    """
    config = LoraConfig(
        r = r,
        lora_alpha = lora_alpha,
        target_modules = target_modules,
        lora_dropout = lora_dropout,
        bias = bias,
        task_type = task_type,
    )

    return config

In [None]:
def find_all_linear_names(model):
    """
    Find modules to apply LoRA to.

    :param model: PEFT model
    """

    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    if 'lm_head' in lora_module_names:
        lora_module_names.remove('lm_head')
    print(f"LoRA module names: {list(lora_module_names)}")
    return list(lora_module_names)

In [None]:
def print_trainable_parameters(model, use_4bit = False):
    """
    Prints the number of trainable parameters in the model.

    :param model: PEFT model
    """

    trainable_params = 0
    all_param = 0

    for _, param in model.named_parameters():
        num_params = param.numel()
        if num_params == 0 and hasattr(param, "ds_numel"):
            num_params = param.ds_numel
        all_param += num_params
        if param.requires_grad:
            trainable_params += num_params

    if use_4bit:
        trainable_params /= 2

    print(
        f"All Parameters: {all_param:,d} || Trainable Parameters: {trainable_params:,d} || Trainable Parameters %: {100 * trainable_params / all_param}"
    )

In [None]:
!pip3 install langchain openai
!pip install openai==0.27.7 wandb
!pip3 install langchain openai



In [None]:
%env OPENAI_API_KEY=sk-

env: OPENAI_API_KEY=sk-


In [None]:
def fine_tune(model,
          tokenizer,
          dataset,
          lora_r,
          lora_alpha,
          lora_dropout,
          bias,
          task_type,
          per_device_train_batch_size,
          gradient_accumulation_steps,
          warmup_steps,
          max_steps,
          learning_rate,
          fp16,
          logging_steps,
          output_dir,
          optim):
    """
    Prepares and fine-tune the pre-trained model.

    :param model: Pre-trained Hugging Face model
    :param tokenizer: Model tokenizer
    :param dataset: Preprocessed training dataset
    """

    # Enable gradient checkpointing to reduce memory usage during fine-tuning
    model.gradient_checkpointing_enable()

    # Prepare the model for training
    model = prepare_model_for_kbit_training(model)

    # Get LoRA module names
    target_modules = find_all_linear_names(model)

    # Create PEFT configuration for these modules and wrap the model to PEFT
    peft_config = create_peft_config(lora_r, lora_alpha, target_modules, lora_dropout, bias, task_type)
    model = get_peft_model(model, peft_config)

    # Print information about the percentage of trainable parameters
    print_trainable_parameters(model)

    # Training parameters
    trainer = Trainer(
        model = model,
        train_dataset = dataset,
        args = TrainingArguments(
            per_device_train_batch_size = per_device_train_batch_size,
            gradient_accumulation_steps = gradient_accumulation_steps,
            warmup_steps = warmup_steps,
            max_steps = max_steps,
            learning_rate = learning_rate,
            fp16 = fp16,
            logging_steps = logging_steps,
            output_dir = output_dir,
            optim = optim,
        ),
        data_collator = DataCollatorForLanguageModeling(tokenizer, mlm = False)
    )

    model.config.use_cache = False

    do_train = True

    # Launch training and log metrics
    print("Training...")

    if do_train:
        train_result = trainer.train()
        metrics = train_result.metrics
        trainer.log_metrics("train", metrics)
        trainer.save_metrics("train", metrics)
        trainer.save_state()
        print(metrics)

    # Save model
    print("Saving last checkpoint of the model...")
    os.makedirs(output_dir, exist_ok = True)
    trainer.model.save_pretrained(output_dir)

    # Free memory for merging weights
    del model
    del trainer
    torch.cuda.empty_cache()

In [None]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 16

# Alpha parameter for LoRA scaling
lora_alpha = 64

# Dropout probability for LoRA layers
lora_dropout = 0.1

# Bias
bias = "none"

# Task type
task_type = "CAUSAL_LM"

In [None]:
################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "/content/drive/MyDrive/result_new_2"

# Batch size per GPU for training
per_device_train_batch_size = 1

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 4

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Optimizer to use
optim = "paged_adamw_32bit"

# Number of training steps (overrides num_train_epochs)
max_steps = 20

# Linear warmup steps from 0 to learning_rate
warmup_steps = 2

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = True

# Log every X updates steps
logging_steps = 1

In [None]:
fine_tune(model,
      tokenizer,
      preprocessed_dataset,
      lora_r,
      lora_alpha,
      lora_dropout,
      bias,
      task_type,
      per_device_train_batch_size,
      gradient_accumulation_steps,
      warmup_steps,
      max_steps,
      learning_rate,
      fp16,
      logging_steps,
      output_dir,
      optim)

LoRA module names: ['gate_proj', 'v_proj', 'down_proj', 'q_proj', 'up_proj', 'k_proj', 'o_proj']
All Parameters: 3,540,389,888 || Trainable Parameters: 39,976,960 || Trainable Parameters %: 1.1291682911958425
Training...


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,2.0877
2,1.9346
3,1.6923
4,1.7351
5,1.502
6,1.5029
7,1.5141
8,1.9251
9,1.8892
10,1.5449


***** train metrics *****
  epoch                    =       0.72
  total_flos               =  2281077GF
  train_loss               =     1.6906
  train_runtime            = 0:15:57.62
  train_samples_per_second =      0.084
  train_steps_per_second   =      0.021
{'train_runtime': 957.621, 'train_samples_per_second': 0.084, 'train_steps_per_second': 0.021, 'total_flos': 2449288063918080.0, 'train_loss': 1.69057697057724, 'epoch': 0.72}
Saving last checkpoint of the model...


In [None]:
# Load fine-tuned weights
model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map = "auto", torch_dtype = torch.bfloat16)
# Merge the LoRA layers with the base model
model = model.merge_and_unload()

# Save fine-tuned model at a new location
output_merged_dir = "/content/drive/MyDrive/merged_result/news_classification_llama2_7b/final_merged_checkpoint"
os.makedirs(output_merged_dir, exist_ok = True)
model.save_pretrained(output_merged_dir, safe_serialization = True)

# Save tokenizer for easy inference
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.save_pretrained(output_merged_dir)

Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]

('/content/drive/MyDrive/merged_result/news_classification_llama2_7b/final_merged_checkpoint/tokenizer_config.json',
 '/content/drive/MyDrive/merged_result/news_classification_llama2_7b/final_merged_checkpoint/special_tokens_map.json',
 '/content/drive/MyDrive/merged_result/news_classification_llama2_7b/final_merged_checkpoint/tokenizer.json')

In [None]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )


In [None]:
tokenizer

LlamaTokenizerFast(name_or_path='TinyPixel/Llama-2-7B-bf16-sharded', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True)}, clean_up_tokenization_spaces=False)

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

output_merged_dir = "/content/drive/MyDrive/merged_result/news_classification_llama2_7b/final_merged_checkpoint"
model = AutoModelForSequenceClassification.from_pretrained(output_merged_dir)
tokenizer = AutoTokenizer.from_pretrained(output_merged_dir)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /content/drive/MyDrive/merged_result/news_classification_llama2_7b/final_merged_checkpoint and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
test_texts=test_df['text'][1]
print(test_texts)

"confidence. If you choose not to take part in this survey it will not affect the care you receive from the NHS in any way. Please do not write your name and address anywhere on the questionnaire as this information is not required. No information you give in this questionnaire will be shared in a way that allows you to be identified. How to complete the survey and how long it will take. The questionnaire is short and will take 5Â10?min to complete. Please try to answer every question. Please return your questionnaire even if you have not answered every question. If English is not your first language or if you if you have difficulty understanding the questions then please ask a relative or carer to help you complete the questionnaire. Questions or help? If you have any questions please contact your local lung clinical nurse specialist team. Please select one answer to each question by placing a in the appropriate box. There is space at the end of the survey for you to write any commen

In [None]:
# Add the [PAD] token as a special token for padding
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

input_text = test_texts
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [None]:
with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_idx = logits.argmax().item()
#predicted_class = idx2label[predicted_class_idx]
print("Predicted class:", predicted_class_idx)



In [None]:
test_df['label'][1]

2