In [None]:
# Download git-lfs for pytorch_model.bin, model.safetensors and training_args.bin
!sudo yum update -y
!sudo yum install amazon-linux-extras
!sudo amazon-linux-extras install epel -y
!sudo yum update -y
!sudo yum install git-lfs -y

In [None]:
# DOWNLOAD MODEL

import os
import tarfile
import torch
import boto3
from sagemaker.s3 import S3Uploader
from transformers import AutoModelForSeq2SeqLM, BertTokenizerFast 

# Define model to download
model_name = "mrm8488/bert2bert_shared-german-finetuned-summarization"

# Define s3 path to store the model
model_id = model_name.split("/")[-1]
s3_bucket = sess.default_bucket()
s3_path = f"s3://{s3_bucket}/origin"
s3_model_key = f"/origin/{model_id}.tar.gz"

s3_client = boto3.client('s3')

# check, if the model already exsists in the s3 bucket
try:
    response = s3_client.head_object(Bucket=s3_bucket, Key=s3_model_key)
    model_exists = True
    print("Model already exists in S3.")
except s3_client.exceptions.NoSuchKey:
    model_exists = False
    print(f"Model does not exist in S3 at {s3_model_key}. It will be uploaded.")
except ClientError as e:
    error_code = e.response['Error']['Code']
    if error_code == '404':
        print(f"Model doesn't exist in S3 at {s3_model_key}. It will be uploaded.")
        model_exists = False
    else:
        raise

if not model_exists:
    # load model and tokenizer
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    tokenizer = BertTokenizerFast.from_pretrained(model_name)
    
    # save artefacts temporary on local disk
    local_model_dir = f"./{model_id}"
    os.makedirs(local_model_dir, exist_ok=True)
    model.save_pretrained(local_model_dir)
    tokenizer.save_pretrained(local_model_dir)
    
    # create tar.gz
    tar_path = f"{model_id}.tar.gz"
    with tarfile.open(tar_path, "w:gz") as tar:
        tar.add(local_model_dir, arcname=os.path.basename(local_model_dir))
    
    # upload.tar.gz to s3
    s3_tar_path = f"{s3_path}/{tar_path}"
    s3_client.upload_file(tar_path, s3_bucket, f"{model_id}/{tar_path}")
    print(f"Model uploaded to S3: {s3_tar_path}")
    
    # clear local
    os.remove(tar_path)
    os.system(f"rm -rf {local_model_dir}")

In [None]:
# these versions support 8-bit and 4-bit
!pip install bitsandbytes>=0.39.0 accelerate>=0.20.0

In [None]:
# MODEL QUANTIZATION

from transformers import AutoModelForSeq2SeqLM, BertTokenizerFast
import bitsandbytes as bnb

# load and quantize model
def quantize_model(model_name, save_directory):
    model_name = 'mrm8488/bert2bert_shared-german-finetuned-summarization'
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    tokenizer = BertTokenizerFast.from_pretrained(model_name)
    # quantizes model via transformers, accelerate and bnb 
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True)
    
    # save model
    model.save_pretrained(save_directory)
    tokenizer.save_pretrained(save_directory)

quant_model_dir = f"8-bit-quant-{model_id}"

# save quantized model version on s3
try:
    s3.head_object(Bucket=sagemaker_session_bucket, Key=f"{quant_model_dir}/model.tar.gz")
    quant_model_exists = True
except:
    quant_model_exists = False

if not quant_model_exists:
    # Quantisiere und speichere das Modell lokal
    quantize_model(model_id, quant_model_dir)

    # Packe das quantisierte Modell als .tar.gz
    !tar -czvf 8-bit-quant-model.tar.gz -C {quant_model_dir} .
    
    # Pfad im S3 für das quantisierte Modell definieren
    s3_quant_model_path = f"s3://{sagemaker_session_bucket}/{quant_model_dir}/model.tar.gz"
    
    # Hochladen des gepackten quantisierten Modells in den S3-Bucket
    S3Uploader.upload('8-bit-quant-model.tar.gz', s3_quant_model_path)
