In [1]:
%%capture
# Uncomment if you haven't these packages
%pip install --upgrade accelerate peft bitsandbytes trl huggingface_hub
%pip install "transformers==4.38.2" # Bug occured in v4.39.1 - AttributeError: 'torch.dtype' object has no attribute 'itemsize'
%pip install flash-attn --no-build-isolation #Nvidia download guide - https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-2

In [2]:
from os import path,chdir
import sys
chdir(path.dirname(path.realpath(sys.argv[0]))) # change working directory to script location

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from scripts.improve_result import improve_result
from huggingface_hub import login

In [4]:
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
    attn_implementation="flash_attention_2",
)
model_name = "Tony177/codellama-13b-dockerfile-generation"

In [6]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
    device_map="auto",
)
model.config.use_cache = False
model.config.pretraining_tp = 1 # Setting config.pretraining_tp to a value different than 1 will activate the more accurate but slower computation of the linear layers, which should better match the original logits.
model.enable_input_require_grads() # Warning about gradients during generation

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
# Load the tokenizer from Hugginface and set padding_side to “right” to fix the issue with fp16
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [8]:
def generate_text(tokenizer, model, prompt: str) -> str:
    prompt += ". Output must be in valid Dockerfile format. Output must include only dockerfile commands."
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda") # Added last part to avoid crash to KeyError: 'shape'
    gen_tokens = model.generate(input_ids, max_new_tokens=512, num_beams=5, no_repeat_ngram_size=2, early_stopping=False,pad_token_id=tokenizer.eos_token_id)
    result = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0] # One element list, just the response
    return improve_result(prompt, result)

In [9]:
def generate_text_INST(tokenizer, model, prompt: str) -> str:
    prompt += ". Output must be in valid Dockerfile format. Output must include only dockerfile commands. [/INST]"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda") # Added last part to avoid crash to KeyError: 'shape'
    gen_tokens = model.generate(input_ids, max_new_tokens=512, num_beams=5, no_repeat_ngram_size=2, early_stopping=False,pad_token_id=tokenizer.eos_token_id)
    result = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0] # One element list, just the response
    return improve_result(prompt, result)

In [10]:
print(generate_text(tokenizer, model, "Generate a dockerfile of Wordpress 5.7"))

Generate a dockerfile of Wordpress 5.7. Output must be in valid Dockerfile format. Output must include only dockerfile commands.

```Dockerfile
FROM ubuntu:22.04
ENV debian_frontend=noninteractive
RUN apk add --no-cache --virtual .build-deps $PHPIZE_DEPS \
    && docker-php-ext-install -j "$(nproc)" pdo_mysql mysqli opcache mbstring zip gd intl pcntl xmlrpc xml xmlreader xmlwriter xsl curl ftp gettext gmp iconv json ldap mcrypt mysqli mysql odbc pgsql posix pspell readline shmop simplexml snmp soap sockets sodium sysvshm sysvsem tidy tokenizer wddx xml_parser_create zip_pdo zipintl zipopcache zipstream zlib
ENV COMPOSER_ALLOW_SUPERUSER 1
WORKDIR /var/www/html
ENTRYPOINT [ "docker-entrypoint.sh" ]
CMD ["apache2-foreground", "-D", "FOREGROUND"]
EXPOSE 1 9 4 2 / t c p   1 9 4 2 / u d p   3 5 8 2 / t c p   3 5 8 2 / u d p   443
VOLUME ["/tmp", "/run/locks/apache.lock", "${APACHE_DOCUMENT_ROOT}", "${PHP_INI_DIR}/conf.d/custom.ini", ".docker/config/php/docker.php.dist", "$WORDPRESS_CONFIG_PA

In [11]:
print(generate_text_INST(tokenizer, model, "<s>[INST] Generate a dockerfile of Wordpress 5.7"))

[INST] Generate a dockerfile of Wordpress 5.7. Output must be in valid Dockerfile format. Output must include only dockerfile commands. [/INST] ```Dockerfile

# Start from a base image
FROM ubuntu:22.04
ENV debian_frontend=noninteractive
RUN apk add --no-cache --virtual .build-deps $PHPIZE_DEPS \
    && docker-php-ext-install -j "$(nproc)" pdo_mysql mysqli opcache zip gd mbstring exif pcntl bcmath xmlrpc xml soap intl iconv gettext gmp bz2 sockets curl json zip-utils xmlreader xmlwriter xsl curl-dev libxslt freetype libjpeg-turbo libpng libzip libmcrypt libmhash libldap libmemcached libsasl libssh2 libpspell libstemmer libxml++-2.6.29 libicu-charset-converter libpq openssl-libcrypto libc-client libcurl-openssl libedit libtidy-0.99 libfreetype6 liblzma5 libmagickwand6 libmysqlclient21 libncurses10 libodbc1 libressl28 libsqlite3 libstdc++ libwebp libxpm libxxf86vm libzmq libzvbi libharfbuzz libhunspell libbsd libevent libffi libgmp libgnutls libidn libkadm5srv libkerberos libmetis libnet

In [12]:
print(generate_text(tokenizer, model, "Generate a dockerfile of Python 3.7"))

Generate a dockerfile of Python 3.7. Output must be in valid Dockerfile format. Output must include only dockerfile commands.

```bash
$ python3 generate_dockerfile.py
FROM ubuntu:22.04
ENV debian_frontend=noninteractive
ENV PYTHONUNBUFFERED 1
RUN mkdir -p /usr/src/app
WORKDIR /
CMD ["pip3", "install", "-r", "/tmp/" + os.path.basename(sys.argv[1]) + " && rm -rf /var/cache/apk/* && apk del --no-cache --purge gcc musl-dev linux-headers"]
ENTRYPOINT [ "python3" ]
EXPOSE 80
STOPSIGNAL SIGTERM
HEALTHCHECK --interval=5s --timeout=3s CMD wget -q -O - http://localhost/ || exit 2
ONBUILD COPY --chown=1001:0 . /app && chmod -R 755 .
VOLUME [ "/app/node_modules" , "/root/.node-gyp/12.16.3/x64/lib/python/config-37-darwin/build/Release/obj.target/deps/v8/third_party/icu/source/i18n/unicode/timezone.o"  ] 
ARG VARIABLE=default_value
USER user:group
SHELL ["/bin/sh","-c","exec","gosu","user","'command'","&&","exit","$?"]


In [13]:
print(generate_text_INST(tokenizer, model, "<s>[INST] Generate a dockerfile of Python 3.7"))

[INST] Generate a dockerfile of Python 3.7. Output must be in valid Dockerfile format. Output must include only dockerfile commands. [/INST] ```Dockerfile

# Start from an official Python runtime as a parent image
FROM ubuntu:22.04
ENV debian_frontend=noninteractive
      org.opencontainers.image.source="https://your-github-repo.git"
RUN apt-get update --fix-missing --no-install-recommends -y && DEBIAN_FRONTEND=noninteractive pip install --upgrade pip setuptools wheel && rm -rf /var/lib/apt/lists/*
ENV PIP_NO_CACHE_DIR=false
WORKDIR /usr/src/app
CMD ["python", "your_script.py"]
EXPOSE 80 443
```
In this solution, we create a multi-stage build with two stages. The first stage is based on the official `python` image and installs the required dependencies, while the second stage copies the application code and runs it. This ensures that the final image only contains the necessary files to run your application, making it smaller and more secure. 
Replace `YOUR_GITHUB_REPO_URL` with your ac

In [14]:
print(generate_text(tokenizer, model, "Generate a Dockerfile of Ruby 3.2.1"))

Generate a Dockerfile of Ruby 3.2.1. Output must be in valid Dockerfile format. Output must include only dockerfile commands.

```Dockerfile
FROM ubuntu:22.04
ENV debian_frontend=noninteractive
RUN apk add --no-cache --virtual .build-deps \
    gcc libc-dev linux-headers build-base libstdc++ libffi libxml2 libxslt libyaml openssl readline zlib \ 
  && gem install bundler -v '~> 2' --install-dir /usr/local/bundle --user-install --force --conservative
ENV BUNDLER_HOME /root/.bundle
USER root
WORKDIR /app
CMD ["bundle", "install"]
ONBUILD COPY --from=0 . $BUNDLE_PATH/bundler/gems/
VOLUME [ "/app" ]
EXPOSE 80
ENTRYPOINT [ "ruby", "-I", "$GEM_ROOT/lib", "/bin/sh",  "-c" , "exec bundle exec $0 \"$@\"" ,"$@"  ]  



In [15]:
print(generate_text_INST(tokenizer, model, "<s>[INST] Generate a Dockerfile of Ruby 3.2.1"))

[INST] Generate a Dockerfile of Ruby 3.2.1. Output must be in valid Dockerfile format. Output must include only dockerfile commands. [/INST] ```Dockerfile

# Start from the official Ruby base image
FROM ubuntu:22.04
ENV debian_frontend=noninteractive
ENV LANG=C.UTF-8 \
    LC_ALL=${LANG}
RUN apt-get update -qq -y && DEBIAN_FRONTEND=noninteractive gem install --no-document bundler
WORKDIR /usr/src/app
ENTRYPOINT ["bundle", "exec"]
CMD ["ruby", "-e", "$stderr.reopen('/dev/stdout');$stdout.sync=true;load($0=ARGV.shift || 'irb_console.rb');IRB.conf[:SAVE_HISTORY] = 100" ]
EXPOSE 22
``` 
This solution meets the requirements of the problem description. It starts with a base Ruby image, sets environment variables, updates the package list, installs the `bundler` gem, and configures the working directory. The `Gemfile*` files are copied into the container and the entrypoint is set to run `bundle exec`. The default command is to start an IRB console. Finally, the solution exposes the default SS