# Environment setup

In [1]:
!pip install -q -U datasets transformers tensorflow langchain playwright html2text sentence_transformers faiss-cpu
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.6/803.6 kB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.4/37.4 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m10.9 

## Import neccessary libraries

In [2]:
import os
import torch
from transformers import (
  AutoConfig,
  AutoTokenizer,
  AutoModelForCausalLM,
  pipeline
)

from transformers import BitsAndBytesConfig

In [3]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

import nest_asyncio

## Configuration

In [4]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.1'

model_config = AutoConfig.from_pretrained(
    model_name,
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

In [5]:
#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

In [6]:
#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

## Utils

In [7]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

# Experiment setup

## Model settings

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [9]:
#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [10]:
print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


In [11]:
inputs_not_chat = tokenizer.encode_plus("[INST] Tell me about fantasy football? [/INST]", return_tensors="pt")['input_ids'].to('cuda')

generated_ids = model.generate(inputs_not_chat,
                               max_new_tokens=1000,
                               do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [12]:
print(decoded)

['<s> [INST] Tell me about fantasy football? [/INST] Fantasy football is a popular game in which players create and manage virtual football teams, competing against each other in a fictional league. The game is based on real-life professional football, with players taking on the roles of team owners and managers.\n\nIn fantasy football, players can select their team members from a pool of real-life football players, and they earn points each week based on the performance of their players in real-life games. There are two main types of fantasy football leagues: PPR (points per reception) and PPRF (points per passing attempt/reception, field goal, etc.)\n\nIn PPR leagues, players receive points for each reception they make, in addition to points for each touchdown and field goal they score. In PPRF leagues, players receive points for each passing attempt they make, in addition to points for each reception, touchdown, and field goal they score.\n\nFantasy football leagues can be played wi

## Create vector database

In [11]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [12]:
!playwright install
!playwright install-deps

Downloading Chromium 121.0.6167.57 (playwright build v1097)[2m from https://playwright.azureedge.net/builds/chromium/1097/chromium-linux.zip[22m
[1G152.8 MiB [] 0% 10.2s[0K[1G152.8 MiB [] 0% 39.4s[0K[1G152.8 MiB [] 0% 34.4s[0K[1G152.8 MiB [] 0% 23.5s[0K[1G152.8 MiB [] 0% 14.9s[0K[1G152.8 MiB [] 0% 12.9s[0K[1G152.8 MiB [] 0% 12.1s[0K[1G152.8 MiB [] 1% 9.6s[0K[1G152.8 MiB [] 1% 7.9s[0K[1G152.8 MiB [] 2% 6.8s[0K[1G152.8 MiB [] 2% 6.9s[0K[1G152.8 MiB [] 2% 6.8s[0K[1G152.8 MiB [] 3% 6.5s[0K[1G152.8 MiB [] 3% 6.0s[0K[1G152.8 MiB [] 4% 5.7s[0K[1G152.8 MiB [] 4% 5.9s[0K[1G152.8 MiB [] 4% 5.6s[0K[1G152.8 MiB [] 5% 5.3s[0K[1G152.8 MiB [] 5% 5.0s[0K[1G152.8 MiB [] 6% 4.8s[0K[1G152.8 MiB [] 7% 4.5s[0K[1G152.8 MiB [] 7% 4.3s[0K[1G152.8 MiB [] 8% 4.3s[0K[1G152.8 MiB [] 8% 4.1s[0K[1G152.8 MiB [] 9% 4.0s[0K[1G152.8 MiB [] 10% 4.1s[0K[1G152.8 MiB [] 10% 3.9s[0K[1G152.8 MiB [] 11% 3.7s[0K[1G152.8 MiB [] 12% 3.6s[0K[1G152.8 MiB [] 13% 3.5s[0K

In [13]:
nest_asyncio.apply()

articles = ["https://www.fantasypros.com/2023/11/rival-fantasy-nfl-week-10/",
            "https://www.fantasypros.com/2023/11/5-stats-to-know-before-setting-your-fantasy-lineup-week-10/",
            "https://www.fantasypros.com/2023/11/nfl-week-10-sleeper-picks-player-predictions-2023/",
            "https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/",
            "https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/"]

# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()

In [14]:

# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=800,
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever(k=3)



.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


In [15]:
query = "What did Laporta say?"
docs = db.similarity_search(query)
print(docs[0].page_content)

, CeeDee Lamb

, and Amon-Ra St. Brown

. Finally, at tight end, I have to decide between Mark Andrews

and Sam LaPorta

. I tapped LaPorta due to a more favorable matchup.

**A Fantasy Sports Platform for Fantasy Players: Sign Up for RivalFantasy
Today!**

**Subscribe** : **Apple Podcasts** | **Spotify** | **Google Podcasts** |
**SoundCloud** | **iHeartRadio**

_Raju Byfield is a featured writer for FantasyPros. For more from Raju, check
out his **profile **and follow him **@FantasyContext**._

DFS NFL Josh Downs Keaton Mitchell Devon Achane Alexander Mattison Dalvin Cook
Christian McCaffrey Tyreek Hill A.J. Brown Josh Allen Tyler Bass Alvin Kamara
Stefon Diggs CeeDee Lamb Amon-Ra St. Brown Mark Andrews Sam LaPorta

Rate this article

 __Boom __Bust

__ YouTube

231 K

__ Follow


### Version 1

In [None]:
# text_generation_pipeline = pipeline(
#     model=model,
#     tokenizer=tokenizer,
#     task="text-generation",
#     temperature=0.2,
#     repetition_penalty=1.1,
#     return_full_text=True,
#     max_new_tokens=300,
# )

# prompt_template = """
# ### [INST]
# Instruction: Answer the question based on your
# fantasy football knowledge. Here is context to help:

# {context}

# ### QUESTION:
# {question}

# [/INST]
#  """

# mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# # Create prompt from prompt template
# prompt = PromptTemplate(
#     input_variables=["context", "question"],
#     template=prompt_template,
# )

# # Create llm chain
# llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

### Version 2
`standalone_query_generation_pipeline` uses a temperature of 0.0 instead of 0.2 for our `response_generation_pipeline`. I do this because I want to make sure there is as little chance for hallucinations when generating the standalone query since that impacts the application’s ability to retrieve relevant context.

In [16]:
from langchain.schema import format_document
from langchain_core.messages import get_buffer_string
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain.memory import ConversationBufferMemory
from langchain.prompts.prompt import PromptTemplate
from langchain_core.prompts.chat import ChatPromptTemplate

from operator import itemgetter

In [None]:
standalone_query_generation_pipeline = pipeline(
 model=model_name,
 tokenizer=tokenizer,
 task="text-generation",
 temperature=0.0,
 repetition_penalty=1.1,
 return_full_text=True,
 max_new_tokens=1000,
)
standalone_query_generation_llm = HuggingFacePipeline(pipeline=standalone_query_generation_pipeline)

response_generation_pipeline = pipeline(
 model=model_name,
 tokenizer=tokenizer,
 task="text-generation",
 temperature=0.2,
 repetition_penalty=1.1,
 return_full_text=True,
 max_new_tokens=1000,
)
response_generation_llm = HuggingFacePipeline(pipeline=response_generation_pipeline)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
_template = """
[INST]
Given the following conversation and a follow up question,
rephrase the follow up question to be a standalone question, in its original language,
that can be used to query a FAISS index. This query will be used to retrieve documents with additional context.

Let me share a couple examples.

If you do not see any chat history, you MUST return the "Follow Up Input" as is:
```
Chat History:
Follow Up Input: How is Lawrence doing?
Standalone Question:
How is Lawrence doing?
```

If this is the second question onwards, you should properly rephrase the question like this:
```
Chat History:
Human: How is Lawrence doing?
AI:
Lawrence is injured and out for the season.
Follow Up Input: What was his injury?
Standalone Question:
What was Lawrence's injury?
```

Now, with those examples, here is the actual chat history and input question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
[your response here]
[/INST]
"""

CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [None]:
template = """
[INST]
Answer the question based only on the following context:
{context}

Question: {question}
[/INST]
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [None]:
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

In [None]:
# Instantiate ConversationBufferMemory
memory = ConversationBufferMemory(
 return_messages=True, output_key="answer", input_key="question"
)

# First we add a step to load memory
# This adds a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)
# Now we calculate the standalone question
standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | standalone_query_generation_llm,
}
# Now we retrieve the documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}
# Now we construct the inputs for the final prompt
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}
# And finally, we do the part that returns the answers
answer = {
    "answer": final_inputs | ANSWER_PROMPT | response_generation_llm,
    "question": itemgetter("question"),
    "context": final_inputs["context"]
}
# And now we put it all together!
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [None]:
def call_conversational_rag(question, chain, memory):
    """
    Calls a conversational RAG (Retrieval-Augmented Generation) model to generate an answer to a given question.

    This function sends a question to the RAG model, retrieves the answer, and stores the question-answer pair in memory
    for context in future interactions.

    Parameters:
    question (str): The question to be answered by the RAG model.
    chain (LangChain object): An instance of LangChain which encapsulates the RAG model and its functionality.
    memory (Memory object): An object used for storing the context of the conversation.

    Returns:
    dict: A dictionary containing the generated answer from the RAG model.
    """

    # Prepare the input for the RAG model
    inputs = {"question": question}

    # Invoke the RAG model to get an answer
    result = chain.invoke(inputs)

    # Save the current question and its answer to memory for future context
    memory.save_context(inputs, {"answer": result["answer"]})

    # Return the result
    return result

In [None]:
question = "how is maholmes doing?"
call_conversational_rag(question, final_chain, memory)

## Inference

In [None]:
llm_chain.invoke({"context":"",
                  "question": "Should I pick up Alvin Kamara for my fantasy team?"})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': '',
 'question': 'Should I pick up Alvin Kamara for my fantasy team?',
 'text': "\nBased on your fantasy football knowledge, it depends on what specific league and position you are playing in, as well as the current roster of your team. If you have a need at the running back position and Alvin Kamara is available, he could be a valuable addition to your team due to his strong performance and potential for continued success. However, if you already have a solid running back lineup or if Kamara is not available in your league, it may not be worth picking him up. It's always important to consider the overall balance and strategy of your team when making decisions about adding new players."}

In [None]:
query = "Should I pick up Alvin Kamara for my fantasy team?"

retriever = db.as_retriever()

rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

rag_chain.invoke(query)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content='at quarterback. Now armed with an upgrade under Center and multiple weapons at\nwideout to keep the defensive attention off him, Kamara is back to elite\nreceiving numbers for a running back. Kamara has recorded 36 or more receiving\nyards in four of his last five games and has topped 33 or more in five of six\ngames this season. He is being peppered with targets regardless of opponent\nand should be leaned on again in Week 10. Tap the More on Kamara for Week 10.', metadata={'source': 'https://www.fantasypros.com/2023/11/nfl-week-10-sleeper-picks-player-predictions-2023/'}),
  Document(page_content='## **Sleeper Picks NFL Week 10**\n\n_Season record: 16-20_\n\n### **Alvin Kamara (RB – NO)', metadata={'source': 'https://www.fantasypros.com/2023/11/nfl-week-10-sleeper-picks-player-predictions-2023/'}),
  Document(page_content=', as he should have plenty of field goal opportunities to both miss and make.\nAt running back, I will be going with CMC and Al

In [None]:
query = "I have Josh Jacobs, should I trade him for Kareem Hunt?"

rag_chain.invoke(query)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content='“ **Josh Jacobs** is coming off his second-best game of the season in week\nnine. While Jacobs has had more success, he still runs at 3.2 YPC. I can’t\nimagine the team competing with a rookie quarterback. Jocabs has the sixth-\nhardest remaining schedule of the season for fantasy running backs. Let’s not\nforget he still has a bye week to deal with. I’d be looking to sell high on\nJacobs to acquire like De’Von Achane or Tony Pollard', metadata={'source': 'https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/'}),
  Document(page_content='Kareem Hunt is the fourth highest when it relates to inside the 5 rushing\nattempt percentage in the ENTIRE NFL. He has played six games and didn’t even\nstart on a roster to begin the 2023 season. With the Cleveland Browns, Jerome\nFord has been pushed into the “Between the 20s” running back who needs a\nhomerun shot to bring your fantasy team a ceiling play. Kareem Hun

In [None]:
query = "What are your thoughts on Ja'Marr Chase going into this week?"

rag_chain.invoke(query)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content='### **Ja’Marr Chase (WR – CIN)\n\n**', metadata={'source': 'https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/'}),
  Document(page_content='. Chase has game-breaking size and speed with the talent to dominate a slate\nin one of the highest implied totals on the slate.', metadata={'source': 'https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/'}),
  Document(page_content='### **Cincinnati Bengals**\n\n  * **Ja’Marr Chase (WR – CIN)\n\n**\n\n  * **Bengals D/ST**', metadata={'source': 'https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/'}),
  Document(page_content='This section doesn’t need a plethora of stats. If Ja’Marr Chase plays, he is\nalways an option in your player pool. If Chase plays and Tee Higgins', metadata={'source': 'https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy

In [None]:
query = "Should I trade Saquon Barkley? What are some alternatives."

rag_chain.invoke(query)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content='out for the year, and yet, somehow, Saquon is still relatively high on several\ntrade value charts I’ve looked at. Yes, I know about Saquon’s volume, but what\nseparates a championship fantasy lineup from the runner-up are TDs, and the\nGiants will struggle to score. As for Chase, he was in severe pain during that\nBills’ game, and back injuries usually feel worse afterward. The idea of my\nstar WR, with a shaky back, getting blasted by defenders every time he gets\nthrown a pass doesn’t inspire much confidence. Who would I try to get in\nreturn for Saquon Barkley? RB Derrick Henry', metadata={'source': 'https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/'}),
  Document(page_content='“One player I recommend selling high right now is **Saquon Barkley**. He keeps\ngetting hurt, even if he finds a way back onto the field. He also plays in an\nawful offense, and he’s asked to do too much with very little t

# Reference
1. https://medium.com/@thakermadhav/build-your-own-rag-with-mistral-7b-and-langchain-97d0c92fa146
2. https://medium.com/@thakermadhav/part-2-build-a-conversational-rag-with-langchain-and-mistral-7b-6a4ebe497185