In [1]:
!pip -q install langchain huggingface_hub transformers sentence_transformers

## HuggingFace

There are two Hugging Face LLM wrappers, one for a local pipeline and one for a model hosted on Hugging Face Hub. Note that these wrappers only work for models that support the following tasks: text2text-generation, text-generation


In [4]:
import os
from dotenv import load_dotenv
load_dotenv()

HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
os.environ['HUGGINGFACEHUB_API_TOKEN'] = HUGGINGFACEHUB_API_TOKEN

## Use the HuggingFaceHub

In [6]:
from langchain import PromptTemplate, HuggingFaceHub, LLMChain

template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])
prompt

PromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, template="Question: {question}\n\nAnswer: Let's think step by step.", template_format='f-string', validate_template=True)

## Run a query against LLM.

In [7]:
import torch
import time
# import evaluate
import pandas as pd
import numpy as np
# from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    GenerationConfig,
    TrainingArguments,
    Trainer,
)

2023-09-24 15:35:26.182219: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# tokenizer.to(device)

AttributeError: ignored

In [26]:
llm_chain = LLMChain(prompt=prompt,
                     llm=HuggingFaceHub(repo_id="google/flan-t5-xl",
                                        model_kwargs={"temperature":0,
                                                      "max_length":64}))



ValidationError: ignored

In [9]:
question = "What is the capital of France?"
prompt=f"Question: {question}\n\nAnswer: Let's think step by step."
prompt


"Question: What is the capital of France?\n\nAnswer: Let's think step by step."

In [19]:
question = "What is the capital of France?"
prompt=f"Translate to French:\n {question}"
prompt

'Translate to French:\n What is the capital of France?'

In [20]:
input_text = prompt
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

outputs = model.generate(input_ids, max_new_tokens=200,)
# output = tokenizer.decode(
#     original_model.generate(
#         inputs["input_ids"],
#         max_new_tokens=200,
#     )[0],
#     skip_special_tokens=True,
# )

print(tokenizer.decode(outputs[0],skip_special_tokens=True,), ) #skip_special_tokens=True,

Quelle est la capitale de France?


In [24]:
from huggingface_hub import InferenceClient

# client = InferenceClient(model="prompthero/openjourney-v4")
# # client.text_to_image("Orange and grey kittens are riding a bike")
# client = InferenceClient()
# client.text_to_image("Black-white kittens are riding a bike", model="prompthero/openjourney-v4")

client = InferenceClient(model="google/flan-t5-base")
client.text_generation(prompt=prompt)

HfHubHTTPError: ignored

In [25]:
question = "What is the capital of France?"

print(llm_chain.run(question))

NameError: ignored

In [None]:
question = "What area is best for growing wine in France?"

print(llm_chain.run(question))

## BlenderBot

Doesn't work on the Hub

In [27]:
blenderbot_chain = LLMChain(prompt=prompt,
                     llm=HuggingFaceHub(repo_id="facebook/blenderbot-1B-distill",
                                        model_kwargs={"temperature":0,
                                                      "max_length":64}))

ValidationError: ignored

In [None]:
# question = "What is the capital of France?"
# question = "What area is best for growing wine in France?"

# print(blenderbot_chain = LLMChain(prompt=prompt,
# .run(question))

## With Local model from HF

### Why would you want to use local mode?

- fine-tuned models
- GPU hosted etc
- some models only work locally

## T5-Flan - Encoder-Decoder

In [28]:
%pip install -q accelerate

NotImplementedError: ignored

In [36]:
from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

model_id = 'google/flan-t5-large'# go for a smaller model if you dont have the VRAM
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=torch.bfloat16) #, load_in_8bit=True)




In [44]:
pipe = pipeline(
    "text2text-generation",
    temperature = .7,
    model=model,
    tokenizer=tokenizer,
    max_length=300,
    do_sample=True
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [45]:
print(local_llm('What is the capital of France? '))

paris


In [48]:
# llm_chain = LLMChain(prompt=prompt,
#                      llm=local_llm
#                      )

question = "What is a bacteria? Explaine the concept"

print(local_llm(question).capitalize())

Bacteria are microscopic organisms that are a part of the cell cycle.


## GPT2-medium - Decoder Only Model

microsoft/DialoGPT-large

In [49]:
model_id = "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100
)

local_llm = HuggingFacePipeline(pipeline=pipe)

Downloading (…)lve/main/config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [50]:
llm_chain = LLMChain(prompt=prompt,
                     llm=local_llm
                     )

question = "What is the capital of France?"

print(llm_chain.run(question))

ValidationError: ignored

## BlenderBot - Encoder-Decoder

In [None]:
from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

model_id = 'facebook/blenderbot-1B-distill'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100
)

local_llm = HuggingFacePipeline(pipeline=pipe)

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/127k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/62.9k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/16.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.87G [00:00<?, ?B/s]

In [None]:
llm_chain = LLMChain(prompt=prompt,
                     llm=local_llm
                     )

question = "What area is best for growing wine in France?"

print(llm_chain.run(question))

## SentenceTransformers

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"

hf = HuggingFaceEmbeddings(model_name=model_name)

In [None]:
hf.embed_query('this is an embedding')

In [None]:
hf.embed_documents(['this is an embedding','this another embedding'])

In [None]:


hf = HuggingFaceHubEmbeddings(
    repo_id=model_name,
    task="feature-extraction",
    # huggingfacehub_api_token="my-api-key",
)