In [None]:
import os
import subprocess

import s3fs
from langchain_core.prompts import PromptTemplate
from transformers import AutoModelForCausalLM


def cache_model_from_hf_hub(
    model_name: str,
    s3_bucket: str = "models-hf",
    s3_cache_dir: str = "hf_hub/diffusion",
    s3_token: str = None,
    hf_token: str = None,
):
    """Use S3 as proxy cache from HF hub if a model is not already cached locally.

    Args:
        model_name (str): Name of the model on the HF hub.
        s3_bucket (str): Name of the S3 bucket to use.
        s3_cache_dir (str): Path of the cache directory on S3.
    """
    assert "MC_HOST_s3" in os.environ, "Please set the MC_HOST_s3 environment variable."

    # Local cache config
    LOCAL_HF_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "huggingface", "hub")
    model_name_hf_cache = "models--" + "--".join(model_name.split("/"))
    dir_model_local = os.path.join(LOCAL_HF_CACHE_DIR, model_name_hf_cache)

    # Remote cache config
    fs = s3fs.S3FileSystem(endpoint_url="https://minio.lab.sspcloud.fr")
    available_models_s3 = [os.path.basename(path) for path in fs.ls(os.path.join(s3_bucket, s3_cache_dir))]
    dir_model_s3 = os.path.join(s3_bucket, s3_cache_dir, model_name_hf_cache)
    print(dir_model_s3)

    if model_name_hf_cache not in os.listdir(LOCAL_HF_CACHE_DIR):
        # Try fetching from S3 if available
        if model_name_hf_cache in available_models_s3:
            print(f"Fetching model {model_name} from S3.")
            cmd = ["mc", "cp", "-r", f"s3/{dir_model_s3}", f"{LOCAL_HF_CACHE_DIR}/"]
            with open("/dev/null", "w") as devnull:
                subprocess.run(cmd, check=True, stdout=devnull, stderr=devnull)
        # Else, fetch from HF Hub and push to S3
        else:
            print(f"Model {model_name} not found on S3, fetching from HF hub.")
            AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", token=hf_token)
            print(f"Putting model {model_name} on S3.")
            cmd = [
                "mc",
                "cp",
                "-r",
                f"{dir_model_local}/",
                f"s3/{dir_model_s3}",
            ]
            with open("/dev/null", "w") as devnull:
                subprocess.run(cmd, check=True, stdout=devnull, stderr=devnull)
    else:
        print(f"Model {model_name} found in local cache. ")
        if model_name_hf_cache not in available_models_s3:
            # Push from local HF cache to S3
            print(f"Putting model {model_name} on S3.")
            cmd = [
                "mc",
                "cp",
                "-r",
                f"{dir_model_local}/",
                f"s3/{dir_model_s3}",
            ]
            with open("/dev/null", "w") as devnull:
                subprocess.run(cmd, check=True, stdout=devnull, stderr=devnull)


def create_prompt_from_instructions(system_instructions: str, question_instructions: str) -> PromptTemplate:
    template = f"""
    {system_instructions}

    {question_instructions}
    """

    custom_rag_prompt = PromptTemplate.from_template(template)

    return custom_rag_prompt


def format_docs(docs: list):
    return "\n\n".join(
        [
            f"""
            Doc {i + 1}:\n:
            Content:\n{doc.page_content}
            """
            for i, doc in enumerate(docs)
        ]
    )

In [None]:
cache_model_from_hf_hub("OrdalieTech/Solon-embeddings-large-0.1")

In [None]:
from openai import OpenAI

client = OpenAI(
    base_url="https://projet-llm-insee-open-data-227689-0.user.lab.sspcloud.fr/proxy/3000/",
    api_key="EMPTY",
)

completion = client.chat.completions.create(
    model="mistralai/Mistral-Small-24B-Instruct-2501", messages=[{"role": "user", "content": "Hello!"}]
)

completion
# print(completion.choices[0].message)

In [None]:
from openai import OpenAI

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"

client = OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key=openai_api_key,
    base_url=openai_api_base,
)

models = client.models.list()
model = models.data[0].id

responses = client.embeddings.create(
    input=["Hello my name is", "The best thing about vLLM is that it supports many different models"],
    model=model,
)

for data in responses.data:
    print(data.embedding)  # List of float of len 4096

In [None]:
from litellm import embedding

response = embedding(
    model="hosted_vllm/intfloat/e5-mistral-7b-instruct",
    # add `openai/` prefix to model so litellm knows to route to OpenAI
    api_base="http://localhost:8000/v1",
    # set API Base of your Custom OpenAI Endpoint
    input=["good morning from litellm"],
)

# response.data[0]["embedding"]

In [None]:
from litellm import completion

response = completion(
    model="hosted_vllm/mistralai/Mistral-Small-24B-Instruct-2501",
    # add `openai/` prefix to model so litellm knows to route to OpenAI
    api_base="http://localhost:3000/v1",
    # set API Base of your Custom OpenAI Endpoint
    messages=[{"role": "user", "content": "what llm are you"}],
)

response.choices[0]["message"]["content"]

In [None]:
import litellm
from litellm import CustomLLM, completion


class MyCustomLLM(CustomLLM):
    def completion(self, *args, **kwargs) -> litellm.ModelResponse:
        return litellm.completion(
            model="hosted_vllm/mistralai/Mistral-Small-24B-Instruct-2501",
            api_base="http://localhost:3000/v1",
            messages=[{"role": "user", "content": "Hello world"}],
        )  # type: ignore

    async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse:
        return litellm.completion(
            model="hosted_vllm/mistralai/Mistral-Small-24B-Instruct-2501",
            api_base="http://localhost:3000/v1",
            messages=[{"role": "user", "content": "Hello world"}],
        )  # type: ignore

    def embedding(self, *args, **kwargs) -> litellm.ModelResponse:
        return litellm.embedding(
            model="hosted_vllm/intfloat/e5-mistral-7b-instruct",
            # add `openai/` prefix to model so litellm knows to route to OpenAI
            api_base="http://localhost:8000/v1",
            # set API Base of your Custom OpenAI Endpoint
            input=["good morning from litellm"],
        )  # type: ignore

    async def aembedding(self, *args, **kwargs) -> litellm.ModelResponse:
        return litellm.aembedding(
            model="hosted_vllm/intfloat/e5-mistral-7b-instruct",
            # add `openai/` prefix to model so litellm knows to route to OpenAI
            api_base="http://localhost:8000/v1",
            # set API Base of your Custom OpenAI Endpoint
            input=["good morning from litellm"],
        )  # type: ignore


my_custom_llm = MyCustomLLM()

litellm.custom_provider_map = [  # 👈 KEY STEP - REGISTER HANDLER
    {"provider": "my-custom-llm", "custom_handler": my_custom_llm}
]

resp = completion(
    model="my-custom-llm/my-fake-model",
    messages=[{"role": "user", "content": "fais le chien"}],
)

emb = embedding(
    model="my-custom-llm/my-fake-model",
    input=["good morning from litellm"],
)

# resp.choices[0].message.content

In [None]:
import pandas as pd

s3_path = "s3://projet-llm-insee-open-data/data/raw_data/applishare_solr_joined.parquet"

filesystem = s3fs.S3FileSystem(endpoint_url="https://minio.lab.sspcloud.fr")
df = pd.read_parquet(s3_path, engine="pyarrow", filesystem=filesystem)

In [None]:
from langchain_community.llms import VLLMOpenAI
from langchain_openai import OpenAIEmbeddings

os.environ["OPENAI_API_KEY"] = "EMPTY"

llm_completion = VLLMOpenAI(
    openai_api_key="EMPTY",
    openai_api_base="http://localhost:3000/v1",
    model_name="mistralai/Mistral-Small-24B-Instruct-2501",
    model_kwargs={"stop": ["."]},
)

emb_model = OpenAIEmbeddings(
    model="intfloat/e5-mistral-7b-instruct", openai_api_base="http://localhost:8000/v1", openai_api_key="EMPTY"
)

print(emb_model.embed_query("A sentence to encode.")[:5])
print(llm_completion.invoke("Rome is"))

In [None]:
from langchain_openai import OpenAIEmbeddings
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper

generator_llm = LangchainLLMWrapper(llm_completion)
generator_embeddings = LangchainEmbeddingsWrapper(emb_model)

In [None]:
from langchain_community.document_loaders import DataFrameLoader

loader = DataFrameLoader(df.head(2), page_content_column="xml_content")
docs = loader.load()

In [None]:
docs

In [None]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs[:3], testset_size=1)