In [None]:
import os
import yaml
import torch
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

### 2 Ways to do this.
        - Use HuggingFaceHub (But it doesn't support all the models)
        - Local Pipeline

In [None]:
with open('cadentials.yaml') as f:
    credentials = yaml.load(f, Loader=yaml.FullLoader)

os.environ['HUGGINGFACEHUB_API_TOKEN'] = credentials['HUGGINGFACEHUB_API_TOKEN']

### 1. HuggingFaceHub

#### 1.1 Flang-T5

In [None]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

llm_chain = LLMChain(
                    prompt=prompt, 
                    llm=HuggingFaceHub(
                                        repo_id="google/flan-t5-xl", 
                                        model_kwargs={
                                                    "temperature":0, 
                                                    "max_length":64
                                                    }
                                        )
                    )

In [None]:
question = "What is the capital of France?"

print(llm_chain.run(question))

#### 1.2 BlenderBot

In [None]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

try:
    blenderbot_chain = LLMChain(
                                prompt=prompt, 
                                llm=HuggingFaceHub(
                                                    repo_id="facebook/blenderbot-1B-distill", 
                                                    model_kwargs={
                                                                "temperature":0, 
                                                                "max_length":64
                                                                }
                                                ))

except:
    print("Blenderbot is not available on HuggingFaceHub")

#### SEEEEEEEEEEEEEEEE !!!!!!!

### 2. Local Pipeline

#### 2.1 Flang-T5 text2text generation (Encoder-Decoder)

In [None]:
model_id = 'google/flan-t5-large'# go for a smaller model if you dont have the VRAM
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, load_in_8bit=True)

pipe = pipeline(
                "text2text-generation",
                model=model, 
                tokenizer=tokenizer, 
                max_length=100
                )

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
print(local_llm('What is the capital of France? '))

#### 2.2 GPT2-medium text generation (Decoder Only)

In [None]:
model_id = "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

pipe = pipeline(
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
    max_length=100
)

local_llm = HuggingFacePipeline(pipeline=pipe)

Downloading (…)"pytorch_model.bin";:   1%|          | 10.5M/1.52G [00:14<01:47, 14.0MB/s]

In [None]:
print(local_llm('What is the capital of France? '))