In [None]:
#Install LangChain and openai libraries.
!pip install -q langchain==0.1.4
!pip install -q transformers==4.37.1
!pip install -q accelerate==0.26.1

In [None]:
!pip install huggingface_hub==0.20.2



In [None]:
from getpass import getpass
hf_key = getpass("Hugging Face Key: ")

Hugging Face Key: ··········


In [None]:
!huggingface-cli login --token $hf_key

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
from langchain import PromptTemplate
#from langchain.chains import LLMChain
from langchain_core.output_parsers import StrOutputParser

from langchain.llms import HuggingFacePipeline
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

In [None]:
import torch
import os
import numpy as np
from torch import cuda, bfloat16

In [None]:

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

In [None]:
print(device)

cuda:0


In [None]:
model_id = "meta-llama/Llama-2-7b-chat-hf"


In [None]:
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token="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"
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    device_map='auto',
    use_auth_token="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"

)
model.eval()
print(f"Model loaded on {device}")


model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]



Model loaded on cuda:0


In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id,
                                          use_auth_token=hf_key)



tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=100,
    temperature=0.1,
    #do_sample=False,
    top_p=0,
    #trust_remote_code=True,
    repetition_penalty=1.1,
    return_full_text=True,
    device_map='auto'
)

assistant_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
# Instruction how the LLM must respond the comments,
assistant_template = """
[INST]<<SYS>>You are {sentiment} assistant that responds to user comments,
using similar vocabulary than the user.
Stop answering text after answer the first user.<</SYS>>

User comment:{customer_request}[/INST]
assistant_response:
"""

In [None]:
#Create the prompt template to use in the Chain for the first Model.
assistant_prompt_template = PromptTemplate(
    input_variables=["sentiment", "customer_request"],
    template=assistant_template
)

Now we create a First Chain. Just chaining the assistant_prompt_template and the model. The model will receive the prompt generated with the prompt_template.

In [None]:
#OLD CODE USING CHAINS
#assistant_chain = LLMChain(
#    llm=assistant_llm,
#    prompt=assistant_prompt_template,
#    output_key="assistant_response",
#    verbose=False
#)

#NEW CODE USING LCEL
output_parser = StrOutputParser()
assistant_chain = assistant_prompt_template | assistant_llm | output_parser


In [None]:
#Support function to obtain a response to a user comment.
def create_dialog(customer_request, sentiment):
    #calling the .invoke method from the chain created Above.
    assistant_response = assistant_chain.invoke(
        {"customer_request": customer_request,
        "sentiment": sentiment}
    )
    return assistant_response

In [None]:
# This the customer request, or customere comment in the forum moderated by the agent.
# feel free to update it.
customer_request = """Your product is a piece of shit. I want my money back!"""

In [None]:
# Our assistatnt working in 'nice' mode.
assistant_response=create_dialog(customer_request, "nice")
print(assistant_response)

In [None]:
#Our assistant running in rude mode.
assistant_response = create_dialog(customer_request, "most rude possible assistant")
print(assistant_response)

In [None]:
#The moderator prompt template
moderator_template = """
[INST]<<SYS>>You are the moderator of an online forum, you are strict and will not tolerate any negative comments.
You will receive an original comment and if it is impolite you must transform into polite.
Try to mantain the meaning when possible.<</SYS>>

Original comment: {comment_to_moderate}/[INST]
"""

# We use the PromptTemplate class to create an instance of our template that will use the prompt from above and store variables we will need to input when we make the prompt.
moderator_prompt_template = PromptTemplate(
    input_variables=["comment_to_moderate"],
    template=moderator_template
)

In [None]:
moderator_llm = assistant_llm

In [None]:
#We build the chain for the moderator.

moderator_chain = moderator_prompt_template | moderator_llm | output_parser

In [None]:
 # To run our chain we use the .invoke() command
moderator_says = moderator_chain.invoke({"comment_to_moderate": assistant_response})

print(moderator_says)

In [None]:
#OLD CODE WITH CHAINS
#from langchain.chains import SequentialChain

# Creating the SequentialChain class indicating chains and parameters.
#assistant_moderated_chain = SequentialChain(
#    chains=[assistant_chain, moderator_chain],
#    input_variables=["sentiment", "customer_request"],
#    verbose=True,
#)

#NEW LCEL CODE
assistant_moderated_chain = (
    {"comment_to_moderate":assistant_chain}
    |moderator_chain
)

In [None]:
# We can now run the chain.
from langchain.callbacks.tracers import ConsoleCallbackHandler
assistant_moderated_chain.invoke({"sentiment": "really rude", "customer_request": customer_request},
                                 config={'callbacks':[ConsoleCallbackHandler()]})