In [1]:
import os
import torch
from dotenv import load_dotenv
from transformers import pipeline
from huggingface_hub import login
from langchain.memory import ConversationBufferMemory
from langchain import LLMChain, PromptTemplate
from langchain_huggingface import HuggingFacePipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load_dotenv()
# login(token=os.getenv("HUGGINGFACE_TOKEN"))

In [None]:
model_id = "meta-llama/Meta-Llama-3-8B"

pipeline = pipeline(
    task="text-generation",
    model=model_id,
    model_kwargs={
        "torch_dtype": torch.bfloat16,
    },
    device_map="auto",
    max_new_tokens=150,
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00,  2.13s/it]


In [4]:
pipeline("Hey how are you doing today?")

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[{'generated_text': 'Hey how are you doing today? My name is Steve and I’m the owner of this website. I’m a 23 year old guy from the Netherlands. I love writing about all kinds of things. I hope you enjoy my website and please feel free to contact me with any questions'}]

# Langchain Components

In [39]:
# Set up the Huggingface pipeline in Langchain
llm = HuggingFacePipeline(
    pipeline=pipeline,
    model_kwargs={"temperature": 0},
)

In [40]:
# Set up the langchain memory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
)

In [None]:
# Define a prompt template suitable for chat
# prompt_template = (
#     "The following is a conversation between a helpful AI assistant and a user.\n"
#     "User: {input}\n"
#     "AI:"
# )

prompt_template = "{input}\n"

prompt = PromptTemplate(
    input_variables=["user_input"],
    template=prompt_template,
)

In [42]:
# Set up the Langchain
llm_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    memory=memory,
    verbose=True,
)

In [43]:
llm_chain.run({"input": "What is the weather like today?"})

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWhat is the weather like today?
[0m

[1m> Finished chain.[0m


'What is the weather like today?\nThe weather today in the UK is mostly sunny, with a few clouds. The temperature is around 15 degrees Celsius, and there is a light breeze. It should be a pleasant day for most people, although those with allergies may want to stay indoors'