In [16]:
%cd /home/rana/Projects/zindi
import yaml
with open('common/config.yaml', 'r') as f:
    config = yaml.load(f, Loader=yaml.SafeLoader)
checkpoint = config.get('checkpoint')
image_name = config.get('image_name')
file_path = config.get('file_path')
ct_model_path=config.get('ct_model_path')+checkpoint

with open(file_path, 'r') as file:
    yaml_content = yaml.safe_load(file)
    yaml_content['services']['translation_inference_util']['image'] = image_name

# Save the updated YAML content back to the file
    with open(file_path, 'w') as file:
        yaml.dump(yaml_content, file, default_flow_style=False)

/home/rana/Projects/zindi


In [17]:
!rm -rf deployment/saved_model/
!mkdir deployment/saved_model/
!cp -r {ct_model_path} deployment/saved_model/{checkpoint}/

In [18]:
%cd deployment

/home/rana/Projects/zindi/deployment


In [19]:
%%writefile requirements.txt
# transformers
# sacremoses>=0.1.1
pyyaml>=6.0.2
kserve>=0.13.1
ctranslate2==4.3.1
# sentencepiece==0.1.99
# kserve==0.11.2
# torch>=2.4.0
# accelerate = "^0.33.0"
sentencepiece>=0.2.0

Overwriting requirements.txt


In [54]:
filename = "main.py"
with open(filename, "w") as file:
    file.write(f"""
\"\"\"
KServe inference script for NLLB-200 translation model.
\"\"\"

import argparse
import os
from typing import List
from kserve import (InferOutput, InferRequest, InferResponse, Model, ModelServer, model_server)
from kserve.utils.utils import generate_uuid
#from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import ctranslate2
import sentencepiece as spm

# Constants
MODEL_DIR = "./saved_model/{checkpoint}"

class TranslationModel(Model):
    \"\"\"
    KServe inference implementation of NLLB-200 translation model.
    \"\"\"

    def __init__(self, name: str):
        \"\"\"
        Initialize the translation model.
        Args:
            name (str): Name of the model.
        \"\"\"        
        super().__init__(name)
        self.name = name
        self.ready = False
        self.model = None
        #self.tokenizer = None
        self.sp_source_model = None
        self.sp_target_model = None
        self.load()

    def load(self) -> None:
        \"\"\"
        Load model and tokenizer from disk.
        \"\"\"
        try:
            self.sp_source_model = spm.SentencePieceProcessor(model_file=MODEL_DIR+'/sentencepiece.bpe.model')
            self.sp_target_model = spm.SentencePieceProcessor(model_file=MODEL_DIR+'/sentencepiece.bpe.model')
            #self.tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
            # self.model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_DIR)
            self.model = ctranslate2.Translator(MODEL_DIR)
            print('Model and tokenizer loaded')
            self.ready = True
        except Exception as e:
            print('Error loading model: ', e)
            self.ready = False

    def preprocess(self, payload: InferRequest, *args, **kwargs) -> str:
        \"\"\"
        Preprocess inference request.

        Args:
            payload (InferRequest): The input payload containing the text to translate.

        Returns:
            str: Preprocessed text ready for translation.
        \"\"\"
        return payload.inputs[0].data[0].lower()

    def predict(self, data: str, *args, **kwargs) -> InferResponse:
        \"\"\"
        Make prediction using the model.
        Args:
            data (str): Preprocessed input text.

        Returns:
            InferResponse: KServe inference response containing the translated text.
        \"\"\"
        source_sentences = [data.strip()]
        print(source_sentences)
        translation = self._translate(self.model, source_sentences)[0]

        return self._create_response(translation)
    

    # Ctranslate2 translation
    def _translate(self, model, text):
        tokens = self.sp_source_model.encode(text, out_type=str)
        tokens[0].insert(0,"dyu_Latn")
        tokens[0].append("</s>")
        tokens[0].append("fra_Latn")
        # tokens = ["dyu_Latn"] + [[t] for t in tokens] + ["</s>"] + ["fra_Latn"]
        try:
            results = model.translate_batch(tokens)
            # The translated results are token strings, so we need to convert them to IDs before decoding
            translations = []
            for translation in results:
                # Convert token strings to IDs before decoding
                decoded_text = self.sp_target_model.decode(translation.hypotheses[0]).replace("fra_Latn ","")
                translations.append(decoded_text)
        except Exception as e:
            print(f"Translation error: ", e)
            translations = ["Error: "+str(e)]  # Return empty string if translation fails
        # translations = ["some thing"] 
        return translations

    def _create_response(self, translation: str) -> InferResponse:
        \"\"\"
        Create InferResponse object.

        Args:
            translation (str): Translated text.

        Returns:
            InferResponse: KServe inference response object.
        \"\"\"
        return InferResponse(
            model_name=self.name,
            infer_outputs=[InferOutput(name=\"output-0\", shape=[1], datatype=\"STR\", data=[translation])],
            response_id=generate_uuid()
        )

def parse_arguments() -> argparse.Namespace:
    \"\"\"
    Parse command-line arguments.

    Returns:
        argparse.Namespace: Parsed command-line arguments.
    \"\"\"
    parser = argparse.ArgumentParser(parents=[model_server.parser])
    # Check if '--model_name' is already defined
    model_name_defined = any('--model_name' in action.option_strings for action in model_server.parser._actions)

    if not model_name_defined:
        model_server.parser.add_argument(
            '--model_name', 
            default='model', 
            help='The name that the model is served under.'
        )
    return parser.parse_args()

def main():
    \"\"\"
    Main function to start the model server.
    \"\"\"
    args = parse_arguments()
    model = TranslationModel(args.model_name)
    ModelServer().start([model])

if __name__ == "__main__":
    main()

""")

In [55]:
import getpass
import subprocess

def run_command(command_str):
    # Prompt for the sudo password
    sudo_password = getpass.getpass("Enter your sudo password: ")
    # Run the command with sudo, passing the password
    result = subprocess.run(f"echo {sudo_password} | sudo -S {command_str}", shell=True, check=True)
    # Check the result
    print(result)

In [56]:
run_command("docker compose down")

[sudo] password for rana:  Container translation_inference_ctutil  Stopping
 Container translation_inference_ctutil  Stopped
 Container translation_inference_ctutil  Removing
 Container translation_inference_ctutil  Removed
 Network deployment_default  Removing


CompletedProcess(args='echo 1234 | sudo -S docker compose down', returncode=0)


 Network deployment_default  Removed


In [57]:
run_command(f"docker build -t {image_name} .")

[sudo] password for rana: #0 building with "default" instance using docker driver

#1 [internal] load build definition from Dockerfile
#1 transferring dockerfile: 360B done
#1 DONE 0.2s

#2 [internal] load metadata for docker.io/library/python:3.10.14-slim
#2 DONE 1.6s

#3 [internal] load .dockerignore
#3 transferring context: 2B done
#3 DONE 0.2s

#4 [1/6] FROM docker.io/library/python:3.10.14-slim@sha256:8666a639a54acc810408e505e2c6b46b50834385701675ee177f578b3d2fdef9
#4 DONE 0.0s

#5 [internal] load build context
#5 transferring context: 5.28kB done
#5 DONE 0.2s

#6 [2/6] WORKDIR /app
#6 CACHED

#7 [3/6] COPY ./requirements.txt .
#7 CACHED

#8 [4/6] RUN pip install --no-cache-dir -r requirements.txt
#8 CACHED

#9 [5/6] COPY ./saved_model /app/saved_model
#9 CACHED

#10 [6/6] COPY ./main.py /app/main.py
#10 DONE 1.6s

#11 exporting to image
#11 exporting layers
#11 exporting layers 0.7s done
#11 writing image sha256:d6043c2aa839c86c22c82bd6d5207435dc4b17985afe181816a5f5c3b6fa6ca1 0.1

CompletedProcess(args='echo 1234 | sudo -S docker build -t zindi-image:0.3.0 .', returncode=0)


In [58]:
run_command(f"docker compose up -d")

[sudo] password for rana:  Network deployment_default  Creating
 Network deployment_default  Created
 Container translation_inference_ctutil  Creating
 Container translation_inference_ctutil  Created
 Container translation_inference_ctutil  Starting


CompletedProcess(args='echo 1234 | sudo -S docker compose up -d', returncode=0)


 Container translation_inference_ctutil  Started


In [61]:
!curl -X POST http://localhost:8080/v2/models/model/infer -H 'Content-Type: application/json' -d @/home/rana/Projects/zindi/common/input.json

{"model_name":"model","model_version":null,"id":"b08d321d-34f1-461d-8ec0-d5360152d24d","parameters":null,"outputs":[{"name":"output-0","shape":[1],"datatype":"STR","parameters":null,"data":["Je suis sûr que j'aurais aimé."]}]}

In [62]:
run_command(f"sudo docker compose logs")

translation_inference_ctutil  | 2024-08-19 13:57:20.190 1 kserve INFO [model_server.py:register_model():384] Registering model: model
translation_inference_ctutil  | 2024-08-19 13:57:20.190 1 kserve INFO [model_server.py:start():254] Setting max asyncio worker threads as 5
translation_inference_ctutil  | 2024-08-19 13:57:20.191 1 kserve INFO [model_server.py:serve():260] Starting uvicorn with 1 workers
translation_inference_ctutil  | 2024-08-19 13:57:20.249 uvicorn.error INFO:     Started server process [1]
translation_inference_ctutil  | 2024-08-19 13:57:20.250 uvicorn.error INFO:     Waiting for application startup.
translation_inference_ctutil  | 2024-08-19 13:57:20.254 1 kserve INFO [server.py:start():63] Starting gRPC server on [::]:8081
translation_inference_ctutil  | 2024-08-19 13:57:20.255 uvicorn.error INFO:     Application startup complete.
translation_inference_ctutil  | 2024-08-19 13:57:20.255 uvicorn.error INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to q

[sudo] password for rana: 

In [63]:
!rm -rf ../submission
!mkdir ../submission
!mkdir ../submission/deployment
!cp Dockerfile ../submission/deployment/
!cp main.py ../submission/deployment/main.py
!cp requirements.txt ../submission/deployment/requirements.txt
# !cp image_name.txt ../submission/image_name.txt
!cp README.md ../submission/README.md

In [64]:
image_uri = "058264459447.dkr.ecr.eu-west-1.amazonaws.com/highwind/f463c7a9-f305-472f-8e49-5f602a7d7882/46c4085f-0ca6-4200-b853-3bf994d3b2d2:latest"

In [65]:
%cd ../submission/
!rm -rf image_name.txt
with open('image_name.txt', 'w') as f:
    f.write(image_uri)
!zip -r submission.zip .

/home/rana/Projects/zindi/submission
  adding: image_name.txt (deflated 18%)
  adding: deployment/ (stored 0%)
  adding: deployment/requirements.txt (deflated 29%)
  adding: deployment/Dockerfile (deflated 29%)
  adding: deployment/main.py (deflated 68%)
  adding: README.md (stored 0%)
