Notebook for setting up LLMs and embedding models.

In [None]:
from langchain.llms import Replicate
from langchain.chat_models import ChatOpenAI
from transformers import pipeline
from mistralai.client import MistralClient
import os


def setup_llm(model_provider, model_name, temperature):
    """
    Method for returning the specified LLM. Possible combinations are (modelProvider: modelName):
    - Replicate: lLama2-7b-chat | lLama2-13b-chat | lLama2-70b-chat [https://replicate.com/blog/run-llama-2-with-an-api]
    - OpenAI: gpt-3.5-turbo | gpt-3.5-turbo-16k | gpt-4 [https://platform.openai.com/docs/models]
    - HuggingFace: flan-t5-large [https://huggingface.co/docs/transformers/model_doc/flan-t5]
    """
    if model_provider == "Replicate":
        if temperature == 0:
            temperature += 0.01
        if model_name == "lLama2-7b-chat":
            llm = Replicate(
                model="meta/llama-2-7b-chat:13c3cdee13ee059ab779f0291d29054dab00a47dad8261375654de5540165fb0",
                model_kwargs={"temperature": temperature, "max_new_tokens": 300, "max_length": 300},
            )
            max_context_size = 4096
            return llm, max_context_size
        elif model_name == "lLama2-13b-chat":
            llm = Replicate(
                model="meta/llama-2-13b-chat:f4e2de70d66816a838a89eeeb621910adffb0dd0baba3976c96980970978018d",
                model_kwargs={"temperature": temperature, "max_new_tokens": 300, "max_length": 300},
            )
            max_context_size = 4096
            return llm, max_context_size
        elif model_name == "lLama2-70b-chat":
            llm = Replicate(
                model="meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3",
                model_kwargs={"temperature": temperature, "max_new_tokens": 300, "max_length": 300},
            )
            max_context_size = 4096
            return llm, max_context_size

    elif model_provider == "OpenAI":
        if model_name == "gpt-3.5-turbo":
            llm = ChatOpenAI(model_name="gpt-3.5-turbo-0613", temperature=temperature, max_tokens=300)
            max_context_size = 4096
            return llm, max_context_size
        elif model_name == "gpt-3.5-turbo-16k":
            llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=temperature, max_tokens=300)
            max_context_size = 16385
            return llm, max_context_size
        elif model_name == "gpt-4":
            llm = ChatOpenAI(model_name="gpt-4-0613", temperature=temperature, max_tokens=300)
            max_context_size = 8192
            return llm, max_context_size

    elif model_provider == "HuggingFace":
        if model_name == "flan-t5-large":
            llm = pipeline("text2text-generation", model="google/flan-t5-large")
            max_context_size = 2048
            return llm, max_context_size

    elif model_provider == "Mistral":
        if model_name == "mixtral-8x7B-v0.1":
            api_key = os.environ["MISTRAL_API_KEY"]
            llm = MistralClient(api_key=api_key)
            return llm, 16000
    else:
        raise Exception("Error, raised exception: Wrong modelProvider or modelName")

In [None]:
def initialize_LLMs():
    """ 
    Initiliazes all available LLMs. Can be used before evaluating to reduce errors.
    """
    llms_dict = [
        {"model_provider": "Replicate", "model_name": "lLama2-7b-chat", "temperature": 0, "max_context_size": 4096},
        {"model_provider": "Replicate", "model_name": "lLama2-13b-chat", "temperature": 0, "max_context_size": 16385},
        {"model_provider": "Replicate", "model_name": "lLama2-70b-chat", "temperature": 0, "max_context_size": 4096},
        {"model_provider": "OpenAI", "model_name": "gpt-3.5-turbo", "temperature": 0, "max_context_size": 4096},
        {"model_provider": "OpenAI", "model_name": "gpt-3.5-turbo-16k", "temperature": 0, "max_context_size": 16385},
        {"model_provider": "OpenAI", "model_name": "gpt-4", "temperature": 0, "max_context_size": 8192},
        {"model_provider": "HuggingFace", "model_name": "flan-t5-large", "temperature": 0, "max_context_size": 2048},
        {"model_provider": "Mistral", "model_name": "mixtral-8x7B-v0.1", "temperature": 0, "max_context_size": 16000}]

    llms = []
    sizes = []
    for i in llms_dict:
        print(i["model_name"])
        llm, size = setup_llm(i["model_provider"], i["model_name"], 0)
        llms.append(llm)
        sizes.append(size)

    print(llms)
    print("\n")
    print(sizes)

In [None]:
from langchain.embeddings import CohereEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings import VoyageEmbeddings

def create_embedding_model(model_provider: str, model_name: str):
    """
    Creates the embedding model and returns it. Possible combinations are (modelProvider: modelName (embedding size, max input length))
    - Cohere: v2 (4096, 512) | v3 (1024, 512) [https://docs.cohere.com/reference/embed]
    - OpenAI: text-embedding-ada-002 (1536, 8191) [https://platform.openai.com/docs/guides/embeddings]
    - Voyage: voyage-lite-01 (1024, 4096) [https://docs.voyageai.com/embeddings/]
    - HuggingFace: 
        - all-mpnet-base-v2 (768, 384) [https://huggingface.co/sentence-transformers/all-mpnet-base-v2]
        - all-MiniLM-L6-v2 (384, 256) [https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2]
        - bge-large-en-v1.5 (1024, 512) [https://huggingface.co/BAAI/bge-large-en-v1.5]
        - SecRoBERTa (824, 512) [https://huggingface.co/jackaduma/SecRoBERTa]
    - Fine-tuned:
        - finetuned-ISO-27001_1024 (1024, 512) [https://huggingface.co/Basti8499/bge-large-en-v1.5-ISO-27001]
    """

    if model_provider == "Cohere":
        if model_name == "v2":
            embeddings = CohereEmbeddings(model="embed-english-v2.0")
            print("Cohere v2 embedding: Vector embedding size - 4096, input length: 512")
            return embeddings
        if model_name == "v3":
            embeddings = CohereEmbeddings(model="embed-english-v3.0")
            print("Cohere v3 embedding: Vector embedding size - 1024, input length: 512")
            return embeddings

    elif model_provider == "HuggingFace":
        if model_name == "all-mpnet-base-v2":
            embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
            print("HuggingFace all-mpnet-base-v2 embedding - Vector embedding size: 768, input length: 384")
            return embeddings
        if model_name == "all-MiniLM-L6-v2":
            embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
            print("HuggingFace all-MiniLM-L6-v2 embedding - Vector embedding size: 384, input length: 256")
            return embeddings
        if model_name == "bge-large-en-v1.5":
            embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
            print("HuggingFace BAAI/bge-large-en-v1.5 embedding - Vector embedding size: 1024, input length: 512")        
            return embeddings
        if model_name == "Contriever":
            embeddings = HuggingFaceEmbeddings(model_name = "facebook/contriever-msmarco")
            print("HuggingFace facebook/contriever-msmarco embedding - Vector embedding size: 768, input length: 512")   
            return embeddings     
        if model_name == "SecRoBERTa":
            embeddings = HuggingFaceEmbeddings(model_name="jackaduma/SecRoBERTa")
            print("HuggingFace jackaduma/SecRoBERTa embedding - Vector embedding size: 768, input length: 512")        
            return embeddings
        
    elif model_provider == "Voyage":
        if model_name == "voyage-2":
            embeddings = VoyageEmbeddings(model="voyage-2", show_progress_bar=True, batch_size=200)
            print("Voyage embedding - Vector embedding size: 1024, input length: 4096")
            return embeddings 
        
    elif model_provider == "OpenAI":
        if model_name == "text-embedding-ada":
            embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
            print("OpenAI embedding - Vector embedding size: 1536, input length: 8191")
            print("Tokenizer used: cl100k_base")
            return embeddings
        
    elif model_provider == "Fine-tuned":
        if model_name == "finetuned-ISO-27001_1024":
            embeddings = HuggingFaceEmbeddings(model_name="Basti8499/bge-large-en-v1.5-ISO-27001")
            print("Fine-tuned bge-large-en-v1.5 with ISO 27001 - Vector embedding size: 1024, input length: 512")   
            return embeddings                 
    else:
        raise Exception(
            "Error, raised exception: Wrong modelProvider or modelName provided.")