In [None]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [None]:
!pip install bitsandbytes
!pip install -q datasets loralib sentencepiece
!pip install -q git+https://github.com/zphang/transformers@c3dc391
!pip install -q git+https://github.com/huggingface/peft.git


In [None]:
!pip -q install huggingface chromadb sentence-transformers

In [None]:
!pip -q install configparser langchain
!pip -q install transformers huggingface_hub 

In [None]:
"""Initiating the Wrapper around HuggingFace Pipeline APIs."""
import importlib.util
from typing import Any, List, Mapping, Optional
from peft import PeftModel
from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig
from pydantic import BaseModel, Extra

from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens


In [None]:
DEFAULT_MODEL_ID = "llama-7b-hf"
DEFAULT_TASK = "text-generation"
VALID_TASKS = ("text2text-generation", "text-generation")


In [None]:
class HuggingFacePipeline(LLM, BaseModel):
    """Wrapper around HuggingFace Pipeline API.

    To use, you should have the ``transformers`` python package installed.

    Only supports `text-generation` and `text2text-generation` for now.

    Example using from_model_id:
        .. code-block:: python

            from langchain.llms import HuggingFacePipeline
            hf = HuggingFacePipeline.from_model_id(
                model_id="gpt2", task="text-generation"
            )
    Example passing pipeline in directly:
        .. code-block:: python

            from langchain.llms import HuggingFacePipeline
            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

            model_id = "gpt2"
            tokenizer = AutoTokenizer.from_pretrained(model_id)
            model = AutoModelForCausalLM.from_pretrained(model_id)
            pipe = pipeline(
                "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10
            )
            hf = HuggingFacePipeline(pipeline=pipe)
    """

    pipeline: Any  #: :meta private:
    model_id: str = DEFAULT_MODEL_ID
    """Model name to use."""
    model_kwargs: Optional[dict] = None
    """Key word arguments to pass to the model."""

    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid

    @classmethod
    def from_model_id(
        cls,
        model_id: str,
        task: str,
        device: int = -1,
        model_kwargs: Optional[dict] = None,
        **kwargs: Any,
    ) -> LLM:
        """Construct the pipeline object from model_id and task."""
        try:
            from transformers import (
                LLaMAForCausalLM,
                LLaMATokenizer,
            )
            from transformers import pipeline as hf_pipeline

        except ImportError:
            raise ValueError(
                "Could not import transformers python package. "
                "Please it install it with `pip install transformers`."
            )

        _model_kwargs = model_kwargs or {}
        tokenizer = LLaMATokenizer.from_pretrained(model_id, **_model_kwargs)

        try:
            if task == "text-generation":
                model = LLaMAForCausalLM.from_pretrained(model_id,
                                                         load_in_8bit=True,
                                                         device_map='auto')
            else:
                raise ValueError(
                    f"Got invalid task {task}, "
                    f"currently only {VALID_TASKS} are supported"
                )
        except ImportError as e:
            raise ValueError(
                f"Could not load the {task} model due to missing dependencies."
            ) from e

        if importlib.util.find_spec("torch") is not None:
            import torch

            cuda_device_count = torch.cuda.device_count()
            if device < -1 or (device >= cuda_device_count):
                raise ValueError(
                    f"Got device=={device}, "
                    f"device is required to be within [-1, {cuda_device_count})"
                )
            if device < 0 and cuda_device_count > 0:
                print(
                    "Device has %d GPUs available. "
                    "Provide device={deviceId} to `from_model_id` to use available"
                    "GPUs for execution. deviceId is -1 (default) for CPU and "
                    "can be a positive integer associated with CUDA device id.",
                    cuda_device_count,
                )

        pipeline = hf_pipeline(
            task=task,
            model=model,
            tokenizer=tokenizer,
            device=device,
            model_kwargs=_model_kwargs,
        )
        if pipeline.task not in VALID_TASKS:
            raise ValueError(
                f"Got invalid task {pipeline.task}, "
                f"currently only {VALID_TASKS} are supported"
            )
        return cls(
            pipeline=pipeline,
            model_id=model_id,
            model_kwargs=_model_kwargs,
            **kwargs,
        )

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {
            **{"model_id": self.model_id},
            **{"model_kwargs": self.model_kwargs},
        }

    @property
    def _llm_type(self) -> str:
        return "huggingface_pipeline"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        response = self.pipeline(prompt)
        if self.pipeline.task == "text-generation":
            # Text generation return includes the starter text.
            text = response[0]["generated_text"][len(prompt) :]
        elif self.pipeline.task == "text2text-generation":
            text = response[0]["generated_text"]
        else:
            raise ValueError(
                f"Got invalid task {self.pipeline.task}, "
                f"currently only {VALID_TASKS} are supported"
            )
        if stop is not None:
            # This is a bit hacky, but I can't figure out a better way to enforce
            # stop tokens when making calls to huggingface_hub.
            text = enforce_stop_tokens(text, stop)
        return text

In [None]:
from peft import PeftModel
from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig

model_id = "decapoda-research/llama-7b-hf"
tokenizer = LLaMATokenizer.from_pretrained(model_id)


Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

In [None]:
#Start with llama model

llama_model = LLaMAForCausalLM.from_pretrained(model_id,
                                         load_in_8bit=True,
                                         device_map='auto')

In [None]:
#Fine tune using alpaca data

lora_model = PeftModel.from_pretrained(llama_model, 
                                         "tloen/alpaca-lora-7b")

Downloading (…)/adapter_config.json:   0%|          | 0.00/399 [00:00<?, ?B/s]

Downloading adapter_model.bin:   0%|          | 0.00/67.2M [00:00<?, ?B/s]

In [None]:
from transformers import pipeline

#Use lora_model in Transformers pipeline

lora_pipe = pipeline(
    "text-generation", model=lora_model, 
    tokenizer=tokenizer, 
    max_new_tokens=100
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LLaMAForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForCausalLM', 'RoCBertForCausalLM', 'RoFormerForCausalLM', 'Speech2Text2ForCausalLM', 

In [None]:
#Then convert the transformers pipeline into lora_llm 

lora_llm = HuggingFacePipeline(pipeline=lora_pipe)

In [None]:
from langchain import PromptTemplate, HuggingFaceHub, LLMChain

template = """Question: {question}

Answer: Let's think step by step."""

prompt_lora = PromptTemplate(template=template, input_variables=["question"])

lora_chain = LLMChain(prompt=prompt_lora,llm = lora_llm)

In [None]:
question = "What NFL team won the Super Bowl in the year Justin Beiber was born?"

print(lora_chain.run(question))



 Justin Beiber was born in 1994, so the Super Bowl was in 2015. The team that won the Super Bowl in 2015 was the New England Patriots.


In [None]:
lora_chain.save("lora_chain.json")

1) Need to save the above chain and review json. Serialising chains.

2) Review using the llm with retrievalQA with single document

3) Then club multiple documents for retrieval QA

4) Re-writing chains with your LLMs

In [None]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
recurSplitter = RecursiveCharacterTextSplitter(chunk_size=100,
                                               chunk_overlap=20,
                                               length_function=len)

In [None]:
with open('/content/linux_play.txt') as lin:
  txt_lin = lin.read()

linux_docs = recurSplitter.create_documents([txt_lin])

In [None]:
with open('/content/space_shortened.csv') as spc:
  txt_spc = spc.read()

space_docs = recurSplitter.create_documents([txt_spc])

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
hfEmbed = HuggingFaceEmbeddings()

In [None]:
from langchain.vectorstores import Chroma
space_directory = 'space_db'

space_chroma = Chroma.from_documents(documents=space_docs,
                                     embeddings=hfEmbed,
                                     persist_directory=space_directory)

In [None]:
lin_directory = 'lin_db'

lin_chroma = Chroma.from_documents(documents=linux_docs,
                                     embeddings=hfEmbed,
                                     persist_directory=lin_directory)



In [None]:
from langchain.agents.agent_toolkits import (
    create_vectorstore_router_agent,
    VectorStoreRouterToolkit,
    VectorStoreInfo,
)

In [None]:
space_vectorstore_info = VectorStoreInfo(
    name="space",
    description="Used when need to answer about Space fares",
    vectorstore=space_chroma
)

In [None]:
linux_vectorstore_info = VectorStoreInfo(
    name="linux",
    description="Used when need to answer about playbook",
    vectorstore=lin_chroma
)

In [None]:
router_toolkit = VectorStoreRouterToolkit(
    vectorstores=[space_vectorstore_info,
                  linux_vectorstore_info],
    llm=lora_llm
)
agent_executor = create_vectorstore_router_agent(
    llm=lora_llm,
    toolkit=router_toolkit,
    verbose=True
)

In [None]:
query = """How many Space fares are there?"""

In [None]:
agent_executor.run(query)

In [None]:
agent_executor.save_agent("multi_doc_agent.json")