In [14]:
%%capture
!pip install langchain huggingface_hub transformers accelerate langchain-groq langchain_community
!pip install langchain-huggingface langchain_openai
!pip install python-dotenv

!pip install ipywidgets widgetsnbextension pandas-profiling

Installation took around 5min

Load  configuration from environment variables

In [17]:
import os
from dotenv import dotenv_values
config = dotenv_values(".env")
os.environ["GROQ_API_KEY"] = config["GROQ_API_KEY"]
os.environ["HUGGINGFACEHUB_API_TOKEN"] = config["HUGGINGFACEHUB_API_TOKEN"]
#print(config)
#print(os.environ)

 **A few words on LLM models**:

 -Mistral-7B-v0.1: This is a base language model from Mistral AI with 7 billion parameters, designed for general language understanding tasks without any specialized fine-tuning.

-Mistral-7B-Instruct-v0.2: An instruction-tuned variant of Mistral-7B, fine-tuned on a range of instruction-following datasets to better perform on tasks where following explicit instructions is required.

-Mixtral-8x7b-32768: A composite model made up of eight Mistral-7B models, optimized for parallel processing on hardware like the GroqChip, suitable for large-scale inference with a focus on performance and efficiency.

-LLaMA3-8B-8192: A third-generation LLaMA model with 8 billion parameters, designed for more efficient context window management and capable of handling longer sequences of up to 8,192 tokens.

In [4]:
hg_base_model_id     = "mistralai/Mistral-7B-v0.1"
hg_instruct_model_id = "mistralai/Mistral-7B-Instruct-v0.2"

groq_model_mixtral   = "mixtral-8x7b-32768"
groq_model_llama3    = "llama3-8b-8192"

-do_sample -  parameter determines whether the model should use sampling for text generation.
 When do_sample is set to True, the model generates text using sampling methods like temperature sampling, top-k sampling, or top-p (nucleus) sampling. This introduces randomness into the output, making it more diverse and creative. This is often used in creative writing task
 When do_sample is set to False, the model generates text deterministically by always choosing the token with the highest probability (i.e., greedy decoding). This results in more predictable and consistent outputs but can be less creative.

 -The early_stopping parameter controls whether the generation process should stop when the end-of-sequence token (EOS) is generated.

-The temperature parameter controls the randomness
  Lower values (e.g., temperature=0.1): The model becomes more conservative, meaning it is more likely to select the highest probability 
  Higher values (e.g., temperature=1.0 or temperature=1.5): The model becomes more random and exploratory in its outputs. making the text more diverse and creative. However, very high temperatures can lead to outputs that are less coherent and more erratic.

-The max_length parameter defines the maximum number of tokens the model is allowed to generate in a single output sequence.

In [33]:
def make_model_params(temperature,max_length,do_sample,early_stopping) -> dict:
        model_kwargs={
            "temperature": temperature,
            "max_length": max_length,
            "do_sample":do_sample,
            "early_stopping":early_stopping
            }
        return model_kwargs
    
math_params_llm    = make_model_params(0.1,125,False,True)

general_params_llm = make_model_params(0.7,125,False,True)

story_params_llm   = make_model_params(1.0,400,True,False)


**Math LLM Params**

 - By setting do_sample=False, temperature to a low value, early_stopping=True, and choosing an appropriate max_length, you optimize 
 the model for generating precise and accurate answers to math or logic questions.
 
**General LLM Params**
 - do_sample=False ensures more deterministic outputs that stick to the most likely responses.
 - A moderate temperature (around 0.7) balances creativity with coherence, making the output informative but not too random or repetitive.
 - early_stopping: True
  Why: Ensures that the response ends at a logical conclusion rather than generating excessive text.

**StoryLine LLM params**
 - do_sample Enables the model to use sampling techniques to create diverse and creative outputs, which is essential for storytelling.
 - temperature 1.0  A higher temperature encourages more creativity and variability in the text, making stories more engaging and less predictable.
 - early_stopping: False . Allows the story to develop more fully without being prematurely cut off by an end-of-sequence token






🔁 **Workflow Overview**:
   - Select model type (LLM or Chat) for your task.
   - Design a prompt to guide the model.
   - Run input through your chosen model.
   - Use Output Parser for neat results.

In [None]:
from langchain_community.llms import HuggingFaceHub
from langchain_huggingface import HuggingFaceEndpoint

def run_hg_llm_chain(repo_id: str,
                  question: str,
                  model_prams) -> str:
    """
    Initializes a HuggingFaceHub with specified parameters and runs an LLMChain with a given question.

    Args:
    repo_id (str): The repository ID for the HuggingFace model.
    question (str): The question to be passed to the LLMChain.
    

    Returns:
    str: The result from running the LLMChain.
    """
    # Define the prompt 
    
    #llm = HuggingFaceHub(
    #    repo_id=repo_id,
    #    model_kwargs=model_kwargs
    #)
    model_kwargs2={            
            "early_stopping":model_prams["early_stopping"]
            }
    llm = HuggingFaceEndpoint(
        repo_id=repo_id,
        max_new_tokens = model_prams["max_length"],        
        temperature= model_prams["temperature"],
        do_sample= model_prams["do_sample"],
        model_kwargs=model_kwargs2               
    )
    return llm.invoke(question)

#Logic question
_question= "Question:Bob is in the living room. He walks to the kitchen, carrying a cup. He puts a ball in the cup and carries the cup to the bedroom. He turns the cup upside down, then walks to the garden. He puts the cup down in the garden, then walks to the garage. Where is the ball?"
print("mistral intruct model")
print("-------------------")
print(run_hg_llm_chain(hg_instruct_model_id,_question,math_params_llm))
print("-- change llm params")
print(run_hg_llm_chain(hg_instruct_model_id,_question,story_params_llm))
print("-------------------")





In [None]:
print("mistral based model")
print("-------------------")
print(run_hg_llm_chain(hg_base_model_id,_question,math_params_llm))

Lets look at grok responses

In [None]:
from langchain_groq import ChatGroq
from langchain_core.messages import AIMessage, HumanMessage,SystemMessage

llm_groq = ChatGroq(model=groq_model_llama3)

messages = [
    SystemMessage(content="When I ask for help to write something, you will reply with a document that contains at least one joke"),
    HumanMessage(content="write a cpp hello world.")
]

response = llm_groq.invoke(messages)
print(type(response))
print(response.content)


# Output parsers

- Output parsers shape the AI's text output into a more usable form, like a database entry or a JSON object.

**Main Uses:**

1. They turn a block of text into organized data.
2. They can guide the AI on how to format its responses for consistency and ease of use.

This stripped-down explanation keeps the focus on the purpose and function of output parsers, suitable for a quick overview during a presentation.

In [8]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate
from langchain.output_parsers.list import ListOutputParser

Without parsing output

In [None]:

prompt = PromptTemplate(
    template="List 3 {things}",
    input_variables=["things"])

response = llm_groq.invoke(input=prompt.format(things="sports that don't use balls"))
print(response.content)

Instantiate output parser

In [None]:
output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()

print(format_instructions)

Now let's see how to use the parsers instructions in the prompt.

In [None]:
prompt = PromptTemplate(
    template="List 3 {things}.\n {format_instructions} reply only results",
    input_variables=["things"],
    partial_variables={"format_instructions": format_instructions})

output = llm_groq.predict(text=prompt.format(things="sports that don't use balls"))
print(type(output))
print(output)


Finally, we can parse the output to a list (Python object)

In [None]:
output_parser.parse(output)

**LangChain Expression Language (LCEL) Overview**:
   - **Purpose**: Simplify building complex chains from basic components.
   - **Features**: Supports streaming, parallelism, and logging.

### 🛠️ Basic Use Case: Prompt + Model + Output Parser
   - **Common Approach**: Link a prompt template with a model.
   - **Chain Mechanism**: Using the `|` symbol, like a Unix pipe, to connect components.
   - **Process Flow**: User input → Prompt Template → Model → Output Parser.

In [15]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

output_parser = CommaSeparatedListOutputParser()

format_instructions = output_parser.get_format_instructions()
prompt = PromptTemplate(
    template="List five {subject}.\n{format_instructions}",
    input_variables=["subject"],
    partial_variables={"format_instructions": format_instructions},
)


model = ChatGroq(model=groq_model_llama3,temperature=0)

chain = prompt | model | output_parser

In [None]:
chain.invoke({"subject": "ice cream flavors"})