In [1]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!

!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

In [2]:
from unsloth import FastLanguageModel, is_bfloat16_supported
from trl import SFTTrainer
from transformers import TrainingArguments, TextStreamer
import torch
from datasets import load_dataset, Dataset
from huggingface_hub import notebook_login
import time

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
max_seq_length = 512  # Adjust as needed
dtype = torch.float16  # Ensure this is a valid dtype
load_in_4bit = True  # Ensure this is True for 4-bit loading
token = "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"  # Your token

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = token,
)

==((====))==  Unsloth 2024.8: Fast Mistral patching. Transformers = 4.43.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/137k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

Unsloth: Will load unsloth/mistral-7b-instruct-v0.3-bnb-4bit as a legacy tokenizer.


In [6]:
dataset = load_dataset("MattCoddity/dockerNLcommands", split="train")
dataset

Downloading readme:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/543k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2415 [00:00<?, ? examples/s]

Dataset({
    features: ['input', 'output', 'instruction'],
    num_rows: 2415
})

**Fine tune in your Data**

In [57]:
# import pandas as pd

# data_dir = r".\data.Docker_dataset.csv"

# df = pd.DataFrame(dataset)
# df.to_csv(data_dir, index=False)
# df.head()

In [58]:
# dataset = Dataset.from_csv(data_dir)

In [8]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth",
    # random_state = 3407,
    use_rslora = False,
    loftq_config = None, # And LoftQ
)

Unsloth 2024.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


**Fit Data to Fine Tune**

In [9]:
alpaca_prompt = """You are an expert in Docker commands.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

dataset = dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/2415 [00:00<?, ? examples/s]

In [10]:
dataset["text"][1]

"You are an expert in Docker commands.\n\n### Instruction:\ntranslate this sentence in docker command\n\n### Input:\nFetch the containers that have exited with a status code of 1.\n\n### Response:\ndocker ps -a --filter 'status=exited' --filter 'exited=1'</s>"

In [11]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # max_steps = 60, # Set num_train_epochs = 1 for full training runs
        num_train_epochs = 2,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 74,
        output_dir = "outputs",
    )
)

Map (num_proc=2):   0%|          | 0/2415 [00:00<?, ? examples/s]

In [12]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 2,415 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 604
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,2.5297
2,2.405
3,2.2582
4,1.9916
5,1.5506
6,1.1788
7,0.8539
8,0.8716
9,0.7387
10,0.7367


In [22]:
def calculate_time(start_time, end_time):
  elapsed_time = end_time - start_time

  hours = int(elapsed_time // 3600)
  minutes = int((elapsed_time % 3600) // 60)
  seconds = elapsed_time % 60

  print(f"Elapsed time: {hours}h {minutes}m {seconds:.2f}s")

In [59]:
# alpaca_prompt = Copied from above
start_time = time.time()

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "translate this sentence in docker command.", # instruction
        "Fetch the containers that have exited with a status code of 1.", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
command = tokenizer.batch_decode(outputs)

print(command[0].split("### Response:\n")[-1][:-4])


end_time = time.time()

calculate_time(start_time, end_time)


docker ps -a --filter 'status=exited' --filter 'exited=1'
Elapsed time: 0h 0m 1.66s


In [60]:
print(command[0].split("### Response:\n")[-1][:-4])

docker ps -a --filter 'status=exited' --filter 'exited=1'


In [61]:
# alpaca_prompt = Copied from above
start_time = time.time()

# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "translate this sentence in docker command.", # instruction
        "Fetch the containers that have exited with a status code of 1.", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
outputs = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)
command = tokenizer.batch_decode(outputs)


print(command[0].split("### Response:\n")[-1][:-4])


end_time = time.time()

# Print the time taken
calculate_time(start_time, end_time)

<s>You are an expert in Docker commands.

### Instruction:
translate this sentence in docker command.

### Input:
Fetch the containers that have exited with a status code of 1.

### Response:
docker ps -a --filter 'status=exited' --filter 'exited=1'</s>
docker ps -a --filter 'status=exited' --filter 'exited=1'
Elapsed time: 0h 0m 2.36s


In [18]:
out_dir = "/content/drive/MyDrive/Docker_lora_model"

model.save_pretrained(out_dir) # Local saving
tokenizer.save_pretrained(out_dir)

('/content/drive/MyDrive/Docker_lora_model/tokenizer_config.json',
 '/content/drive/MyDrive/Docker_lora_model/special_tokens_map.json',
 '/content/drive/MyDrive/Docker_lora_model/tokenizer.model',
 '/content/drive/MyDrive/Docker_lora_model/added_tokens.json')

**True to load you model**

In [19]:
if False:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = out_dir, # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
        token = "hhf_JRHjivRGwxgWeZOaOfEIXiWoiRtMEhAOvv",
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

# alpaca_prompt = You MUST copy from above!

inputs = tokenizer(
[
    alpaca_prompt.format(
        "translate this sentence in docker command.", # instruction
        "Fetch the containers that have exited with a status code of 1.", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)

_ = model.generate(input_ids = inputs.input_ids, attention_mask = inputs.attention_mask, top_k=30, top_p=0.85,
                   streamer = text_streamer, max_new_tokens = 128, pad_token_id = tokenizer.eos_token_id, temperature=0.7)

==((====))==  Unsloth 2024.8: Fast Mistral patching. Transformers = 4.43.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth: Will load unsloth/mistral-7b-instruct-v0.3-bnb-4bit as a legacy tokenizer.


docker ps -a --filter 'status=exited' --filter 'exited=1'</s>


In [48]:
from transformers import TextStreamer, GenerationConfig
from unsloth import FastLanguageModel

out_dir = "/content/drive/MyDrive/Docker_lora_model"

model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = out_dir, # YOUR MODEL YOU USED FOR TRAINING
            max_seq_length = max_seq_length,
            dtype = dtype,
            load_in_4bit = load_in_4bit,
            token = "hhf_JRHjivRGwxgWeZOaOfEIXiWoiRtMEhAOvv",
            )
FastLanguageModel.for_inference(model) # Enable native 2x faster inference



generation_config = GenerationConfig(
    max_new_tokens=128,  # Maximum number of new tokens to generate
    temperature=0.7,     # Controls randomness in generation
    top_k=50,            # Only consider the top_k options for each token
    top_p=0.9,           # Only consider tokens with cumulative probability >= top_p
    repetition_penalty=1.2,  # Penalize repeated tokens
    num_beams=1,         # Use beam search with a single beam for compatibility with streamer
    early_stopping=True  # Stop early when enough tokens are generated
)

==((====))==  Unsloth 2024.8: Fast Mistral patching. Transformers = 4.43.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth: Will load unsloth/mistral-7b-instruct-v0.3-bnb-4bit as a legacy tokenizer.


In [62]:
def get_command(input_text):
  inputs = tokenizer(
  [
      alpaca_prompt.format(
          "translate this sentence in docker command.", # instruction
          f"{input_text}", # input
          "", # output - leave this blank for generation!
      )
  ], return_tensors = "pt").to("cuda")

  outputs = model.generate(**inputs, generation_config=generation_config)
  command = tokenizer.batch_decode(outputs)

  return command[0].split("### Response:\n")[-1][:-4]


In [63]:
get_command("Fetch the containers that have exited with a status code of 1.")

"docker ps -a --filter 'status=exited' --filter 'exited=1'"

In [66]:
get_command("Give me a list of all containers, indicating their status as well.")

'docker ps -a'