# Step3. Domain Adapted Retrieval Model

In [None]:
!pip install llama-index==0.10
!pip install lightning

## (1) Convert HF model to .nemo

In [None]:
HF_ENCODER_MODEL = "intfloat/e5-small-unsupervised"
HF_LLM_MODEL = "meta-llama/Llama-3.1-8B"
DATA_ROOT_DIR = "/work/Data"
MODEL_ROOT_DIR = "/work/Models"

In [None]:
embed_nemo_path = "/work/Models/e5-small.nemo"

!python /opt/NeMo/scripts/checkpoint_converters/convert_bert_hf_to_nemo.py \
       --input_name_or_path $HF_ENCODER_MODEL \
       --output_path $embed_nemo_path \
       --mcore True \
       --precision bf16

In [None]:
llm_nemo_path = "/work/Models/llama3-tiny.nemo"
precision = "bf16"

# Convert HF Model to NeMo
!python /opt/NeMo/scripts/checkpoint_converters/convert_llama_hf_to_nemo.py --input_name_or_path $HF_LLM_MODEL --output_path $llm_nemo_path --precision $precision --llama31 True 

## (2) Auto-Generated Domain-specific Retrieval Sample

In [None]:
vector_dir = f"{DATA_ROOT_DIR}/index"
data_dir = f"{DATA_ROOT_DIR}/docs"

!python /opt/NeMo/examples/nlp/rag/rag_indexing.py \
        trainer.devices=1 \
        trainer.precision='bf16-mixed' \
        indexing.embedder.model_path=$embed_nemo_path \
        indexing.embedder.embed_batch_size=128 \
        indexing.data.data_path=$data_dir \
        indexing.data.chunk_size=256 \
        indexing.data.chunk_overlap=10 \
        indexing.index_path=$vector_dir

In [None]:
data_dir = f"{DATA_ROOT_DIR}/retrieval"

!python /opt/NeMo/tutorials/llm/llama-3/dapt/code/rag_auto_generate_sample.py \
    --config-path=/opt/NeMo/examples/nlp/rag/conf \
    --config-name=rag_generating \
    indexing.index_path=$vector_dir \
    indexing.embedder.model_path=$embed_nemo_path \
    generating.llm.model_path=$llm_nemo_path \
    ++generating.top_k=4 \
    ++generating.num_random=4 \
    ++generating.output_dir=$data_dir \
    ++generating.num_sample=3000 \
    ++generating.prefix="train" 

In [None]:
!ls $data_dir

## (3) Finetuning Retrieval Model

In [None]:
data_file = data_dir + "/train_data.json"

!python /opt/NeMo/examples/nlp/information_retrieval/megatron_bert_embedding_finetuning.py \
    exp_manager.exp_dir=/work/log/retrieval \
    restore_from_path=$embed_nemo_path \
    trainer.devices=8 \
    trainer.precision=bf16 \
    trainer.max_epochs=1 \
    trainer.max_steps=-1 \
    trainer.val_check_interval=2 \
    trainer.limit_val_batches=8 \
    trainer.limit_test_batches=8 \
    model.micro_batch_size=1 \
    model.global_batch_size=64 \
    model.data.data_impl=jsonl \
    model.hidden_size=384 \
    model.num_layers=12 \
    model.ffn_hidden_size=1536 \
    model.data.data_train=$data_file