In [1]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m57.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m47.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m35.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━

In [2]:

import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [55]:
import pandas as pd
from datasets import Dataset

### Prepare the dataset

In [35]:
# The instruction dataset to use
dataset_name = "CShorten/ML-ArXiv-Papers"


In [None]:

# Load dataset (you can process it here)
dataset = load_dataset(dataset_name, split="train")

In [37]:
dataset.shape

(117592, 4)

In [39]:
type(dataset)

datasets.arrow_dataset.Dataset

In [40]:
first_1000_rows = dataset[:1000]


In [71]:
# Convert to DataFrame
df = pd.DataFrame(first_1000_rows)
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,title,abstract
0,0,0.0,Learning from compressed observations,The problem of statistical learning is to co...
1,1,1.0,Sensor Networks with Random Links: Topology De...,"In a sensor network, in practice, the commun..."
2,2,2.0,The on-line shortest path problem under partia...,The on-line shortest path problem is conside...
3,3,3.0,A neural network approach to ordinal regression,Ordinal regression is an important type of l...
4,4,4.0,Parametric Learning and Monte Carlo Optimization,This paper uncovers and explores the close r...


In [72]:
df['text'] = df.apply(lambda row: f"<s>[INST] {row['title']} [/INST] {row['abstract']}</s>", axis=1)


In [73]:
df = df[['text']]

In [74]:
df['text'][0]

'<s>[INST] Learning from compressed observations [/INST]   The problem of statistical learning is to construct a predictor of a random\nvariable $Y$ as a function of a related random variable $X$ on the basis of an\ni.i.d. training sample from the joint distribution of $(X,Y)$. Allowable\npredictors are drawn from some specified class, and the goal is to approach\nasymptotically the performance (expected loss) of the best predictor in the\nclass. We consider the setting in which one has perfect observation of the\n$X$-part of the sample, while the $Y$-part has to be communicated at some\nfinite bit rate. The encoding of the $Y$-values is allowed to depend on the\n$X$-values. Under suitable regularity conditions on the admissible predictors,\nthe underlying family of probability distributions and the loss function, we\ngive an information-theoretic characterization of achievable predictor\nperformance in terms of conditional distortion-rate functions. The ideas are\nillustrated on the e

In [75]:
dataset2 = Dataset.from_pandas(df)

### Prepare the Model

In [76]:
# The model that you want to train from the Hugging Face hub
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"


# Fine-tuned model name
new_model = "Tinyllama-1.1B-ML-ArXiv-Papers-1"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results2"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 4

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

### Model Training

In [77]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training





The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [78]:

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset2,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)



Map:   0%|          | 0/1000 [00:00<?, ? examples/s]



{'loss': 2.486, 'learning_rate': 0.0001975746552556772, 'epoch': 0.1}
{'loss': 2.4308, 'learning_rate': 0.00018550053929480202, 'epoch': 0.2}
{'loss': 2.2466, 'learning_rate': 0.00016449948488669639, 'epoch': 0.3}
{'loss': 2.3234, 'learning_rate': 0.000136764169663272, 'epoch': 0.4}
{'loss': 2.1874, 'learning_rate': 0.00010519038181318999, 'epoch': 0.5}
{'loss': 2.2553, 'learning_rate': 7.307467669163655e-05, 'epoch': 0.6}
{'loss': 2.1855, 'learning_rate': 4.377019014049223e-05, 'epoch': 0.7}
{'loss': 2.2713, 'learning_rate': 2.03365443542764e-05, 'epoch': 0.8}
{'loss': 2.1713, 'learning_rate': 5.22039891260262e-06, 'epoch': 0.9}
{'loss': 2.2642, 'learning_rate': 0.0, 'epoch': 1.0}
{'train_runtime': 204.6009, 'train_samples_per_second': 4.888, 'train_steps_per_second': 1.222, 'train_loss': 2.282176528930664, 'epoch': 1.0}


In [79]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Reinforcement learning solution for solving mission critical problems?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])



[INST] Reinforcement learning solution for solving mission critical problems? [/INST] 
In the field of mission critical problems, the solution of the problem is critical for the success of the mission. The solution should be
solved in a timely and efficient manner. The solution should be robust and should be able to handle various types of
problems. The solution should be scalable and should be able to handle large volumes of data. The solution should be
efficient and should not consume too much computational resources. The solution should be able to handle various
environments and should be able to adapt to changing environments. The solution should be able to handle various
types of data and should be able to handle various types of problems. The solution should be able to handle various
types of data and should be able to handle various types of problems. The solution should be able to handle various
types of data and should be able to handle various types of problems. The solution 

In [81]:
import warnings
warnings.filterwarnings('ignore')
logging.set_verbosity(logging.CRITICAL)
# Run text generation pipeline with our next model
prompt = "Reinforcement learning solution for solving mission critical problems?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200,repetition_penalty=1.1)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] Reinforcement learning solution for solving mission critical problems? [/INST] 
In the field of mission critical problems, there are many situations where a decision-making process is required. For example, in the healthcare sector, decisions such as whether to perform an operation or not, whether to administer a drug or not, and whether to intervene in a patient's condition are examples of such situations. In these cases, the decision-maker must make a choice based on incomplete information about the problem. The decision-maker may have limited resources, such as time, money, or human resources, which can affect the decision-making process. Therefore, it is necessary to develop a solution that can solve such complex decision-making problems.

In this paper, we propose a reinforcement learning solution for solving mission critical problems. Our proposed solution uses a deep neural network (DNN) to learn the decision-making process from data. We use the


In [82]:

# Empty VRAM
del model
del pipe
del trainer
import gc
gc.collect()
gc.collect()

14443

In [83]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


In [84]:
###Use below code if not getting "UTF-8" on above print

import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [85]:
warnings.filterwarnings('ignore')
logging.set_verbosity(logging.CRITICAL)
# Run text generation pipeline with our next model
prompt = "Reinforcement learning solution for solving mission critical problems?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200,repetition_penalty=1.1)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] Reinforcement learning solution for solving mission critical problems? [/INST] 
The problem of finding the optimal solution to a complex and mission-critical problem is a challenging task. The complexity of the problem can be attributed to its high dimensionality, nonlinearity, and interdependence with other variables. In this paper, we propose a reinforcement learning (RL) approach to solve such complex problems. We use RL to learn a policy that optimizes the objective function of the problem. Our proposed approach is based on the Q-learning algorithm, which has been shown to be effective in solving many real-world problems. We demonstrate the effectiveness of our approach by simulating a complex problem and comparing it with a state-of-the-art solution. Our results show that our approach outperforms the state-of-the-art solution in terms of both efficiency and accuracy.

[/INST]



In [None]:
!huggingface-cli login

In [87]:
model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)

pytorch_model.bin:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/epsil/Tinyllama-1.1B-ML-ArXiv-Papers-1/commit/364cc53fbee53ce029c4c6a3ae593a37173127dd', commit_message='Upload tokenizer', commit_description='', oid='364cc53fbee53ce029c4c6a3ae593a37173127dd', pr_url=None, pr_revision=None, pr_num=None)