In [86]:
checkpoint_name = "checkpoint-9"
image_name = 'zindi-image:0.2.5'
model="/home/rana/Projects/zindi/models/marian/marian_output/"+checkpoint_name


In [None]:
output_dir="/home/rana/Projects/zindi/models/marian/ct/"+checkpoint_name
file_path = "/home/rana/Projects/zindi/deployment/docker-compose.yml"

In [87]:
%cd /home/rana/Projects/zindi/deployment

/home/rana/Projects/zindi/deployment


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [95]:
!rm -rf saved_model/
!mkdir saved_model/
# !cp -r {model} saved_model/
!cp -r {output_dir} saved_model/{checkpoint_name}/

In [96]:
import yaml    
with open(file_path, 'r') as file:
    yaml_content = yaml.safe_load(file)
    yaml_content['services']['translation_inference_util']['image'] = image_name

# Save the updated YAML content back to the file
    with open(file_path, 'w') as file:
        yaml.dump(yaml_content, file, default_flow_style=False)


In [97]:
%%writefile requirements.txt
# transformers
sacremoses>=0.1.1
pyyaml>=6.0.2
kserve>=0.13.1
ctranslate2==4.3.1
# sentencepiece==0.1.99
# kserve==0.11.2
# torch>=2.4.0
# accelerate = "^0.33.0"
sentencepiece>=0.2.0

Overwriting requirements.txt


In [141]:
filename = "main.py"
with open(filename, "w") as file:
    file.write(f"""
\"\"\"
KServe inference script for NLLB-200 translation model.
\"\"\"

import argparse
import os
from typing import List
from kserve import (InferOutput, InferRequest, InferResponse, Model, ModelServer, model_server)
from kserve.utils.utils import generate_uuid
#from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import ctranslate2
import sentencepiece as spm

# Constants
MODEL_DIR = "./saved_model/{checkpoint_name}"

class TranslationModel(Model):
    \"\"\"
    KServe inference implementation of NLLB-200 translation model.
    \"\"\"

    def __init__(self, name: str):
        \"\"\"
        Initialize the translation model.
        Args:
            name (str): Name of the model.
        \"\"\"        
        super().__init__(name)
        self.name = name
        self.ready = False
        self.model = None
        #self.tokenizer = None
        self.sp_source_model = None
        self.sp_target_model = None
        self.load()

    def load(self) -> None:
        \"\"\"
        Load model and tokenizer from disk.
        \"\"\"
        try:
            self.sp_source_model = spm.SentencePieceProcessor(model_file=MODEL_DIR+'/source.spm')
            self.sp_target_model = spm.SentencePieceProcessor(model_file=MODEL_DIR+'/target.spm')
            #self.tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
            # self.model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_DIR)
            self.model = ctranslate2.Translator(MODEL_DIR)
            print('Model and tokenizer loaded')
            self.ready = True
        except Exception as e:
            print('Error loading model: ', e)
            self.ready = False

    def preprocess(self, payload: InferRequest, *args, **kwargs) -> str:
        \"\"\"
        Preprocess inference request.

        Args:
            payload (InferRequest): The input payload containing the text to translate.

        Returns:
            str: Preprocessed text ready for translation.
        \"\"\"
        return payload.inputs[0].data[0].lower()

    def predict(self, data: str, *args, **kwargs) -> InferResponse:
        \"\"\"
        Make prediction using the model.
        Args:
            data (str): Preprocessed input text.

        Returns:
            InferResponse: KServe inference response containing the translated text.
        \"\"\"
        source_sentences = [data.strip()]
        print(source_sentences)
        translation = self._translate(self.model, source_sentences)[0]

        return self._create_response(translation)
    
    # def _translate(self, model, tokenizer, text):
    #     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=150)
    #     translated = model.generate(**inputs)
    #     return tokenizer.batch_decode(translated, skip_special_tokens=True)

    # Ctranslate2 translation
    def _translate(self, model, text):
        #tokens = tokenizer.tokenize(text)
        tokens = self.sp_source_model.encode(text, out_type=str)
        # print(tokens)
        tokens = ["dyu"] + tokens + ["</s>"] + ["fr"]
        try:
            results = model.translate_batch(tokens)
            # The translated results are token strings, so we need to convert them to IDs before decoding
            translations = []
            for translation in results:
                # Convert token strings to IDs before decoding
                #token_ids = tokenizer.convert_tokens_to_ids(translation.hypotheses[0])
                #decoded_text = tokenizer.decode(token_ids)
                decoded_text = self.sp_target_model.decode(translation.hypotheses[0])
                translations.append(decoded_text)
        except Exception as e:
            print(f"Translation error: ", e)
            translations = [""]  # Return empty string if translation fails
        return translations

    def _create_response(self, translation: str) -> InferResponse:
        \"\"\"
        Create InferResponse object.

        Args:
            translation (str): Translated text.

        Returns:
            InferResponse: KServe inference response object.
        \"\"\"
        return InferResponse(
            model_name=self.name,
            infer_outputs=[InferOutput(name=\"output-0\", shape=[1], datatype=\"STR\", data=[translation])],
            response_id=generate_uuid()
        )

def parse_arguments() -> argparse.Namespace:
    \"\"\"
    Parse command-line arguments.

    Returns:
        argparse.Namespace: Parsed command-line arguments.
    \"\"\"
    parser = argparse.ArgumentParser(parents=[model_server.parser])
    # Check if '--model_name' is already defined
    model_name_defined = any('--model_name' in action.option_strings for action in model_server.parser._actions)

    if not model_name_defined:
        model_server.parser.add_argument(
            '--model_name', 
            default='model', 
            help='The name that the model is served under.'
        )
    return parser.parse_args()

def main():
    \"\"\"
    Main function to start the model server.
    \"\"\"
    args = parse_arguments()
    model = TranslationModel(args.model_name)
    ModelServer().start([model])

if __name__ == "__main__":
    main()

""")

In [142]:
import getpass
import subprocess

def run_command(command_str):
    # Prompt for the sudo password
    sudo_password = getpass.getpass("Enter your sudo password: ")
    # Run the command with sudo, passing the password
    result = subprocess.run(f"echo {sudo_password} | sudo -S {command_str}", shell=True, check=True)
    # Check the result
    print(result)

In [143]:
run_command("docker compose down")

[sudo] password for rana:  Container translation_inference_ctutil  Stopping
 Container translation_inference_ctutil  Stopped
 Container translation_inference_ctutil  Removing
 Container translation_inference_ctutil  Removed
 Network deployment_default  Removing


CompletedProcess(args='echo 1234 | sudo -S docker compose down', returncode=0)


 Network deployment_default  Removed


In [144]:
run_command(f"docker build -t {image_name} .")

[sudo] password for rana: #0 building with "default" instance using docker driver

#1 [internal] load build definition from Dockerfile
#1 transferring dockerfile: 360B done
#1 DONE 0.2s

#2 [internal] load metadata for docker.io/library/python:3.10.14-slim
#2 DONE 1.4s

#3 [internal] load .dockerignore
#3 transferring context: 2B done
#3 DONE 0.2s

#4 [1/6] FROM docker.io/library/python:3.10.14-slim@sha256:8b3815a0a8f9a554c0f8c40af7dae424c0fd962819c787188ebc2574d909d2df
#4 DONE 0.0s

#5 [internal] load build context
#5 transferring context: 5.72kB done
#5 DONE 0.2s

#6 [2/6] WORKDIR /app
#6 CACHED

#7 [3/6] COPY ./requirements.txt .
#7 CACHED

#8 [4/6] RUN pip install --no-cache-dir -r requirements.txt
#8 CACHED

#9 [5/6] COPY ./saved_model /app/saved_model
#9 CACHED

#10 [6/6] COPY ./main.py /app/main.py
#10 DONE 1.3s

#11 exporting to image
#11 exporting layers
#11 exporting layers 0.6s done
#11 writing image sha256:6443a53733cb759b046327a50b9c25af452374e410cd93931008bc5da134c429 0.0

CompletedProcess(args='echo 1234 | sudo -S docker build -t zindi-image:0.2.5 .', returncode=0)


In [145]:
run_command(f"docker compose up -d")

[sudo] password for rana:  Network deployment_default  Creating
 Network deployment_default  Created
 Container translation_inference_ctutil  Creating
 Container translation_inference_ctutil  Created
 Container translation_inference_ctutil  Starting


CompletedProcess(args='echo 1234 | sudo -S docker compose up -d', returncode=0)


 Container translation_inference_ctutil  Started


In [148]:
!curl -X POST http://localhost:8080/v2/models/model/infer -H 'Content-Type: application/json' -d @./input.json

{"model_name":"model","model_version":null,"id":"27f4a593-3a91-4cd8-b969-4ae03605c55b","parameters":null,"outputs":[{"name":"output-0","shape":[1],"datatype":"STR","parameters":null,"data":["Une vallée------------- jusqu'à un yeeeee do au cou au cou tun bi à un yeye de yeeee yeye ye ye ye ye yeye yeye yeye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye ye y y y y ye y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y y"]}]}

In [147]:
run_command(f"sudo docker compose logs")

translation_inference_ctutil  | 2024-08-11 16:47:12.427 1 kserve INFO [model_server.py:register_model():384] Registering model: model
translation_inference_ctutil  | 2024-08-11 16:47:12.427 1 kserve INFO [model_server.py:start():254] Setting max asyncio worker threads as 5
translation_inference_ctutil  | 2024-08-11 16:47:12.428 1 kserve INFO [model_server.py:serve():260] Starting uvicorn with 1 workers
translation_inference_ctutil  | 2024-08-11 16:47:12.508 uvicorn.error INFO:     Started server process [1]
translation_inference_ctutil  | 2024-08-11 16:47:12.508 uvicorn.error INFO:     Waiting for application startup.
translation_inference_ctutil  | 2024-08-11 16:47:12.511 1 kserve INFO [server.py:start():63] Starting gRPC server on [::]:8081
translation_inference_ctutil  | 2024-08-11 16:47:12.512 uvicorn.error INFO:     Application startup complete.
translation_inference_ctutil  | 2024-08-11 16:47:12.512 uvicorn.error INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to q

[sudo] password for rana: 