In [1]:
%%capture
# Uncomment if you haven't these packages
%pip install --upgrade accelerate peft bitsandbytes trl huggingface_hub
%pip install "transformers==4.38.2" # Bug occured in v4.39.1 - AttributeError: 'torch.dtype' object has no attribute 'itemsize'
%pip install flash-attn --no-build-isolation #Nvidia download guide - https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-2

In [2]:
from os import path,chdir
import sys
chdir(path.dirname(path.realpath(sys.argv[0]))) # change working directory to script location

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from scripts.improve_result import improve_result
from huggingface_hub import login

In [4]:
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
    attn_implementation="flash_attention_2",
)
model_name = "Tony177/codellama-13b-dockerfile-generation"

In [6]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
    device_map="auto",
)
model.config.use_cache = False
model.config.pretraining_tp = 1 # Setting config.pretraining_tp to a value different than 1 will activate the more accurate but slower computation of the linear layers, which should better match the original logits.
model.enable_input_require_grads() # Warning about gradients during generation

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
# Load the tokenizer from Hugginface and set padding_side to “right” to fix the issue with fp16
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")
tokenizer.add_special_tokens({'pad_token': '[PAD]'})  # Add padding token to tokenizer
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [8]:
def generate_text(tokenizer, model, prompt: str) -> str:
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda") # Added last part to avoid crash to KeyError: 'shape'
    # beam-search multinomial sampling if num_beams>1 and do_sample=True
    gen_tokens = model.generate(input_ids, max_new_tokens=512, num_beams=5, no_repeat_ngram_size=2, early_stopping=False,pad_token_id=tokenizer.eos_token_id)
    result = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0] # One element list, just the response
    return improve_result(prompt, result)

In [9]:
def generate_text_INST(tokenizer, model, prompt: str) -> str:
    prompt = "<s>[INST] " + prompt + " [/INST]"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda") # Added last part to avoid crash to KeyError: 'shape'
    # beam-search multinomial sampling if num_beams>1 and do_sample=True
    gen_tokens = model.generate(input_ids, max_new_tokens=512, num_beams=5, no_repeat_ngram_size=2, early_stopping=False,pad_token_id=tokenizer.eos_token_id)
    result = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0] # One element list, just the response
    return improve_result(prompt, result)

In [10]:
forced_words_dockerfile = ["```dockerfile", "```Dockerfile"]
forced_words_from = ["FROM", "from"]

forced_words_ids = [
    tokenizer(forced_words_dockerfile, add_special_tokens=False).input_ids,
    tokenizer(forced_words_from, add_special_tokens=False).input_ids,
]

bad_words = ["apk","\begin(code)", "\\end(code)","EOF","exit"]
bad_words_ids = tokenizer(bad_words, add_special_tokens=False).input_ids


In [11]:
def generate_text_FORCED(tokenizer, model, prompt: str) -> str:
    prompt = "<s>[INST] " + prompt + " [/INST]"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda") # Added last part to avoid crash to KeyError: 'shape'
    # beam-search multinomial sampling if num_beams>1 and do_sample=True
    gen_tokens = model.generate(input_ids, bad_words_ids=bad_words_ids , force_words_ids=forced_words_ids, max_new_tokens=512, num_beams=5, no_repeat_ngram_size=2, early_stopping=False,pad_token_id=tokenizer.eos_token_id)
    result = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0] # One element list, just the response
    return improve_result(prompt, result)

In [12]:
print(generate_text(tokenizer, model, "Generate a dockerfile of Wordpress 5.7"))

Generate a dockerfile of Wordpress 5.7.2

```
docker build -t my-wordpress-image:latest https://raw.githubusercontent.com/your-github-username/dockerfiles/master/wordpress/Dockerfile --build-arg WORDPRESS_VERSION=5-7-2 --no-cache --force-rm=true --squash --compress --disable-content-trust --security-opt seccomp=unconfined --label maintainer="Your Name <your.email@domain.tld>" --network=host --platform=linux/amd64 --pull --quiet --shm-size=1g --ulimit=nproc=65535 --userns=keep-id --volume /sys/fs/cgroup:ro --with-registry-mirror=https://dockerhub.azk8s.cn/ --add-host=kubernetes.default.svc:10.96.123.456 --cap-add=SYS_ADMIN --device=/dev/fuse --gpus all:NVIDIA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 --health-cmd="curl --fail http://localhost:80/wp-admin/ || exit 1" --hostname=wordpress --init=false --isolation=default --log-driver=journald --mac-address=92:d0:c6:0a:29:33 --memory-reservation=2048m --oom-score-adj=-500 --privileged --read-only --restart=unless-stopped --runtime=runc --sysctl=net.

In [13]:
print(generate_text_INST(tokenizer, model, "Generate a dockerfile of Wordpress 5.7"))

[INST] Generate a dockerfile of Wordpress 5.7 [/INST] Here's a Dockerfile that sets up a WordPress container with PHP 7.4, Apache, and MySQL:

```Dockerfile
# Start from the official Ubuntu 20.04 (Focal Fossa) base image
FROM ubuntu:20.04
ENV debian_frontend=noninteractive
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
    apt install -y --no-install-recommends apache2 libapache2-mod-php mysql-server php-mysql php libzip-dev zip unzip libxml++2.6-utils libxslt1.1 libcurl4 libgd3 libmagickwand-6.q16 libpq5 libsqlite3-0 libwebp6 libyajl2 liblz4-1 libtiff-tools libxpm4 libfreetype6 fontconfig libharfbuzz0b libjbig2dec libjpeg-turbo8 libmcrypt4 mcrypt libmhash2 mhash libonig4 pkg-config zip zlib1g libicu[0-9]{0,3} libidn11 curl ca-certificates wget
ENTRYPOINT [ "dockerize", "-template", "/etc/wordpress/config.php.tmpl:/var/www/html/wp-content/plugins/your-plugin-name/includes/class-wc-yourpluginname-gateway-paypal-express-checkout-with-braintree-and-stripe-payment-method-for-wo

In [14]:
print(generate_text_FORCED(tokenizer, model, "Generate a dockerfile of Wordpress 5.7"))

[INST] Generate a dockerfile of Wordpress 5.7 [/INST] ```dockerfile

# Start from a base image
FROM ubuntu:20.04
ENV debian_frontend=noninteractive
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update \
    && docker-php-ext-install -j$(nproc) pdo_mysql mbstring zip gd xmlrpc xml gmp bcmath intl opcache zip pcntl bz2 curl ftp gettext mcrypt mysqli json soap sockets tokenizer xmlreader xmlwriter xsl xdebug zip-opcache
WORKDIR /var/www/html
EXPOSE 80/tcp 443/tcp
CMD ["apache2-foreground", "-D", "FOREGROUND"]
ADD https://wordpress.org/latest.tar.gz /tmp/wp.tgz
ARG WORDPRESS_DB_NAME=wordpress
ENCRYPTED_PASSWORD=$(openssl rand -base64 32 | tr -d '\n' | openssl enc -e -aes-256-cbc -a -pass pass:your_encryption_key -nosalt -K 1234 -iv 0102030405060708 -in /dev/stdin -out - | sed 's/^.//;s/.$//' && echo)
echo "<?php\n\$table_prefix = 'wp_';\ndefine( 'WP_ALLOW_REPAIR', true );\ndelete_site_option('update_core');" > wp-config.php
sed -i "s|database_name_here|${WORLDPRESS_MYSQL_DATABASE}|g; s|yo

In [15]:
print(generate_text(tokenizer, model, "Generate a dockerfile of Python 3.7"))

Generate a dockerfile of Python 3.7.4 with the following packages installed:

- numpy
  - scipy
    - pandas
      - matplotlib
        - seaborn
          - jupyter
            - ipywidgets
              - bokeh
                - tqdm
                  
### Dockerfile
```
# Use the official Python runtime as the base image
FROM ubuntu:20.04
ENV debian_frontend=noninteractive
ENV PYTHONUNBUFFERED 1
RUN apk add --no-cache --virtual .build-deps gcc musl-dev linux-headers \
	&& pip install --upgrade pip setuptools wheel && rm -rf /var/cache/apk/*
WORKDIR /usr/src/app
CMD ["python", "./your-script.py"]



In [16]:
print(generate_text_INST(tokenizer, model, "Generate a dockerfile of Python 3.7"))

[INST] Generate a dockerfile of Python 3.7 [/INST] A Dockerfile is a text document that contains all the commands a user could call on the command line to assemble an image. Docker uses this file to build an automated build that executes several command-line instructions in succession.

In this problem, you are given a task to create a Docker image for a Python application. The image should be based on Ubuntu 18.04, and it should install the necessary dependencies, such as `build-essential`, `libpq-dev`, and `python3-pip`. It should also set the working directory to `/usr/src/app` and copy the `requirements.txt` file from the local machine to the container. After installing the dependencies and copying the requirements file, the user should run `pip3 install --no-cache-dir -r requirements.tar.gz` to install Python packages specified in the file. Finally, it copies the rest of the application code into the image and sets the ENTRYPOINT to run a bash script named `entrypoint.sh`. The scr

In [17]:
print(generate_text_FORCED(tokenizer, model, "Generate a dockerfile of Python 3.7"))

[INST] Generate a dockerfile of Python 3.7 [/INST] ```dockerfile

# Start from an official Python runtime as a parent image
FROM ubuntu:20.04
ENV debian_frontend=noninteractive
ENV PYTHONUNBUFFERED 1 \
    PIP_NO_CACHE_DIR=false
WORKDIR /usr/src/app
RUN pip install --no-cache-dir -U pip setuptools && pip --disable-pip-version-check install -r requirements.text
ONBUILD COPY . .
CMD [ "python", "./your_main_script.py" ]
```
This Dockerfile creates a multi-stage build with two stages. The first stage, `base`, is based on the official `python` image with Alpine Linux as the base image. It sets up the environment, installs the required dependencies, and prepares the working directory for the application.
The second stage inherits from the first one and is used to build the actual application code. In this example, it copies the current directory's contents into the container and runs the main script. 
You can customize the `Dockerfile` to fit your specific use case, such as using a differen

In [18]:
print(generate_text(tokenizer, model, "Generate a dockerfile of Ruby 3.2.1"))

Generate a dockerfile of Ruby 3.2.1 on Alpine Linux.

```Dockerfile
FROM ubuntu:20.04
ENV debian_frontend=noninteractive
RUN apk add --no-cache --virtual .build-deps gcc libc-dev linux-headers \
    && docker-php-ext-install -j "$(nproc)" pdo_pgsql pgsql opcache bcmath pcntl zip intl mbstring gd gettext iconv xml xmlrpc xmlreader xmlwriter xsl xdebug zip zip-pdo zlib
ENV COMPOSER_ALLOW_SUPERUSER 1


In [19]:
print(generate_text_INST(tokenizer, model, "Generate a dockerfile of Ruby 3.2.1"))

[INST] Generate a dockerfile of Ruby 3.2.1 [/INST] Here's a Dockerfile that sets up a Ruby environment based on the provided snippet:

```Dockerfile
# Start from a lightweight base image
FROM ubuntu:20.04
ENV debian_frontend=noninteractive
RUN apk add --no-cache --virtual .build-deps gcc libc-dev musl-libc libstdc++ \
    && docker-php-ext-install -j "$(nproc)" pdo_mysql mysqli && rm -rf /var/cache/apk/* /root/.pear /usr/local/lib/ruby/gems/*/cache/*.gem
ENV RUBYGEMS_MIRROR https://rubygems.org/
WORKDIR /app
CMD ["gem", "install", "-N", "/app/Gemfile"] && bundle install --jobs 20 --retry 5 --without development test --path vendor/bundle --binstubs bin --shebang /bin/sh --deployment --verbose
ONBUILD COPY --from=base /etc/ssl/certs/ca-certificates.crt $CA_CERTIFICATES_PATH
EXPOSE 80 443
ENTRYPOINT ["ruby"]
STOPSIGNAL SIGTERM
HEALTHCHECK --interval=5s --timeout=3s CMD [ -e tmp/pids/server.pid ] || exit 1
VOLUME ["tmp/sockets","public/uploads","config/settings.local.yml","db/seeds.rb","sp

In [20]:
print(generate_text_FORCED(tokenizer, model, "Generate a dockerfile of Ruby 3.2.1"))

[INST] Generate a dockerfile of Ruby 3.2.1 [/INST] ```dockerfile

# Start from a lightweight base image
FROM ubuntu:20.04
ENV debian_frontend=noninteractive
      org.opencontainers.image.source="https://github.org/your-username/docker-ruby-3-2-1"
SHELL ["/bin/sh", "-c"]
ENV RUBYGEMS_MIRROR=http://rubygems.mirrors.ustc.edu.cn/
RUN set -eux; \ # Exit on error, print commands, and be verbose
    addgroup -g 1001 ruby; adduser -S -D -G ruby -h /home/ruby ruby
WORKDIR /app
ONBUILD COPY --chown=ruby:ruby Gemfile* package.json .npmrc .yarnrc.yml .eslintrc.* .prettierrc* .browserslistrc babel.config.js tsconfig.* jest.* webpack.* karma.* rollup.* postcss.* eslint.* stylelint* jest-puppeteer.* cypress.* pre-commit* lint-staged* commit-msg* npm-shrinkwrap* yarn.lock pnpm-lock.yaml .dockerignore .editorconfig .gitattributes .github .vscode .husky .nyc_output .nvm .node_repl_history .releaserc .renovaterc .rubocop .rspec .styleci .travis .watchmanconfig appveyor* circleci* codeship* codecov* cove