In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'

In [None]:
from langchain_huggingface import ChatHuggingFace
from langchain_huggingface import HuggingFaceEndpoint

# First, create the base HuggingFace endpoint
base_llm = HuggingFaceEndpoint(
    repo_id='HuggingFaceH4/zephyr-7b-beta',
    task='text-generation',
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03
)

# Then use it to create the ChatHuggingFace instance
llm = ChatHuggingFace(llm=base_llm)
print(llm)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


llm=HuggingFaceEndpoint(repo_id='HuggingFaceH4/zephyr-7b-beta', repetition_penalty=1.03, stop_sequences=[], server_kwargs={}, model_kwargs={}, model='HuggingFaceH4/zephyr-7b-beta', client=<InferenceClient(model='HuggingFaceH4/zephyr-7b-beta', timeout=120)>, async_client=<InferenceClient(model='HuggingFaceH4/zephyr-7b-beta', timeout=120)>, task='text-generation') tokenizer=LlamaTokenizerFast(name_or_path='HuggingFaceH4/zephyr-7b-beta', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='left', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>', 'additional_special_tokens': ['<unk>', '<s>', '</s>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToke

In [3]:
result = llm.invoke("define gradient descent")

In [4]:
print(result)

content='Gradient descent is an iterative algorithm used in optimization problems to find the local or global minimum of a function by iteratively moving in the direction of steepest descent. In simpler terms, it is a iterative process of making small adjustments to the values of the parameters of a function in order to minimize a cost function or objective function. The algorithm follows this basic process:\n\n1. Start with an initial guess or starting point for the parameters of the function.\n2. Calculate the gradient, which is the slope or steepness of the function at the current point.\n3. Take a small step in the direction opposite to the gradient, which is also known as the "descent direction."\n4. Update the current point with the new values found in step 3.\n5. Repeat steps 2-4 until a predefined stopping criterion is met, such as achieving a minimum error or reaching a maximum number of iterations.\n\nGradient descent is often used in machine learning and deep learning algori

# Chat Prompt Template
It is a structured way to define prompts when using chat models. Instead of writing one big prompt, you can build a message-based prompt with roles like "system","user","assistant"

In [6]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        ("system","You are an expert AI Engineer. Provide me answers based on the questions"),
        ("user","{input}")
    ]
)

In [7]:
chain = prompt|llm
response = chain.invoke({"input":"Can you tell me about Langsmith?"})
print(response)

content="I do not have personal beliefs, but I can provide you with information about langsmith, a uk-based software company that specializes in developing ai technologies focused on natural language processing (nlp). Langsmith's solutions enable intelligent analysis and processing of unstructured text data, such as social media feeds, customer support transcripts, and news articles. Their aim is to help businesses gain insights, automate tasks, and deliver better customer experiences through ai-powered text understanding and generation." additional_kwargs={} response_metadata={'token_usage': ChatCompletionOutputUsage(completion_tokens=102, prompt_tokens=59, total_tokens=161), 'model': '', 'finish_reason': 'stop'} id='run-76e8a19c-0a22-4121-baad-dca3db0614a1-0'


## Output Parsers
Output parsers are essential components of the LangChain ecosystem 
that allow users to interact with and utilize the platform's
capabilities by parsing and interpreting the output generated by the        
AI models. They play a crucial role in bridging the gap between the AI      
models and the external world, enabling users to input, process, and        
receive information and data in a structured format.

In [8]:
## stroutput
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()
chain = prompt|llm|output_parser 
response = chain.invoke({"input":"Can you tell me something about langsmith?"})
print(response)

Sure! Langsmith is an AI language model developed by the Allen Institute for Artificial Intelligence (AI2). It is trained on a large dataset of text, which enables it to generate human-like responses to various text prompts or questions. Langsmith uses a transformer-based architecture, which is a type of neural network commonly used in natural language processing tasks. Langsmith's training has focused on achieving high accuracy in a variety of NLU (Natural Language Understanding) tasks, such as question answering, sentiment analysis, and text classification. Langsmith's performance is constantly being improved and expanded through ongoing research and development at AI2.
