In [1]:
%%capture
# Uncomment if you haven't these packages
%pip install --upgrade accelerate peft bitsandbytes trl huggingface_hub
%pip install "transformers==4.38.2" # Bug occured in v4.39.1 - AttributeError: 'torch.dtype' object has no attribute 'itemsize'
%pip install flash-attn --no-build-isolation #Nvidia download guide - https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-2

In [2]:
from os import path,chdir
import sys
chdir(path.dirname(path.realpath(sys.argv[0]))) # change working directory to script location

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from scripts.improve_result import improve_result
from scripts.jsonl_parser import read_jsonl, write_jsonl
from huggingface_hub import login

In [4]:
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
    attn_implementation="flash_attention_2",
)
model_name = "Tony177/codellama-13b-dockerfile-generation"

In [6]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
    device_map="auto",
)
model.config.use_cache = True
model.config.pretraining_tp = 1 # Setting config.pretraining_tp to a value different than 1 will activate the more accurate but slower computation of the linear layers, which should better match the original logits.
model.enable_input_require_grads() # Warning about gradients during generation

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
# Load the tokenizer from Hugginface and set padding_side to “right” to fix the issue with fp16
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [8]:
forced_words_dockerfile = ["```dockerfile", "```Dockerfile"]
forced_words_from = ["FROM", "from"]

forced_words_ids = [
    tokenizer(forced_words_dockerfile, add_special_tokens=False).input_ids,
    tokenizer(forced_words_from, add_special_tokens=False).input_ids,
]

bad_words = ["apk", "\begin(code)", "\\end(code)","EOF", "exit", "ONBUILD"]
bad_words_ids = tokenizer(bad_words, add_special_tokens=False).input_ids

In [9]:
def generate_text(tokenizer, model, prompt: str) -> str:
    prompt = "<s>[INST] " + prompt + " [/INST]"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda") # Added last part to avoid crash to KeyError: 'shape'
    # beam-search multinomial sampling if num_beams>1 and do_sample=True
    gen_tokens = model.generate(input_ids, bad_words_ids=bad_words_ids , force_words_ids=forced_words_ids, max_new_tokens=512, num_beams=5, pad_token_id=tokenizer.eos_token_id)
    result = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0] # One element list, just the response
    return improve_result(prompt, result)

In [10]:
def generate_text_INST(tokenizer, model, prompt: str) -> str:
    prompt = "<s>[INST] " + prompt + " [/INST]"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda") # Added last part to avoid crash to KeyError: 'shape'
    # beam-search multinomial sampling if num_beams>1 and do_sample=True
    gen_tokens = model.generate(input_ids, max_new_tokens=512, num_beams=5, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id)
    result = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0] # One element list, just the response
    return improve_result(prompt, result)

In [11]:
def generate_text_FORCED(tokenizer, model, prompt: str) -> str:
    prompt = "<s>[INST] " + prompt + " [/INST]"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda") # Added last part to avoid crash to KeyError: 'shape'
    # beam-search multinomial sampling if num_beams>1 and do_sample=True
    gen_tokens = model.generate(input_ids, bad_words_ids=bad_words_ids , force_words_ids=forced_words_ids, max_new_tokens=512, num_beams=5, early_stopping=False,pad_token_id=tokenizer.eos_token_id)
    result = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0] # One element list, just the response
    return improve_result(prompt, result)

In [12]:
print(generate_text(tokenizer, model, "Generate a dockerfile of Wordpress 5.7"))

[INST] Generate a dockerfile of Wordpress 5.7 [/INST] ```dockerfile

# Use the official PHP 7.4 image as the base image
FROM ubuntu:20.04
ENV debian_frontend=noninteractive

# Set the maintainer information

# Update the package lists for upgrades and new package installations
RUN apt-get update

# Install necessary packages for Wordpress
RUN apt-get install -y --no-install-recommends \
        libzip-dev \
        zip \
        unzip \
        libpng-dev \
        libjpeg-dev \
        libpng-dev \
        libzip-dev \
        zip \
        unzip \
        libpng-dev \
        libjpeg-dev \
        libpng-dev \
        libzip-dev \
        zip \
        unzip \
        libpng-dev \
        libjpeg-dev \
        libpng-dev \
        libzip-dev \
        zip \
        unzip \
        libpng-dev \
        libjpeg-dev \
        libpng-dev \
        libzip-dev \
        zip \
        unzip \
        libpng-dev \
        libjpeg-dev \
        libpng-dev \
        libzip-dev \
        zip \


In [13]:
print(generate_text(tokenizer, model, "Generate a dockerfile of Python 3.7"))

# Start from the official Python 3.7 image
FROM ubuntu:20.04
ENV debian_frontend=noninteractive

# Set the working directory to /usr/src/app
WORKDIR /usr/src/app

# Copy the current directory contents into the container at /usr/src/app
# COPY . /usr/src/app

# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Make port 80 available to the world outside this container
EXPOSE 80

# Define environment variable
ENV NAME World

# Run app.py when the container launches
CMD ["python", "app.py"]


In [14]:
print(generate_text(tokenizer, model, "Generate a dockerfile of Ruby 3.2.1"))

# Start from the official Ruby 3.2.1 image
FROM ubuntu:20.04
ENV debian_frontend=noninteractive

# Set the working directory to /app
WORKDIR /app

# Copy the Gemfile and Gemfile.lock to the working directory
# COPY Gemfile Gemfile.lock ./

# Install the dependencies
RUN bundle install

# Copy the rest of the application code to the working directory
# COPY . .

# Expose port 3000 for the application
EXPOSE 3000

# Set the default command to run when the container starts
CMD ["rails", "server", "-b", "0.0.0.0"]


In [16]:
from tqdm import tqdm

input_list, output_list = read_jsonl("../dataset.jsonl")
codellama_output_list = []
for e in tqdm(input_list):
    codellama_output_list.append(generate_text(tokenizer, model, e))
write_jsonl(input_list, output_list, codellama_output_list, "../dataset_llm.jsonl")

100%|██████████| 80/80 [3:07:39<00:00, 140.75s/it]  
