In [1]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"]='0,1'

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.agents import load_tools, initialize_agent, Tool

device = "cuda" if torch.cuda.is_available() else "cpu"
#Need only 1 GPU if loading 8-bit model
print(device)

print("Using %d GPUs" %torch.cuda.device_count())

import gradio as gr
import time
name = "APS AI Assistant"



cuda
Using 4 GPUs


### Vicuna tokenizer and model setup

In [2]:
model_name = "eachadea/vicuna-13b-1.1"
tokenizer_path = "../tokenizer/"

#Create a local tokenizer copy the first time
if os.path.isdir(tokenizer_path):
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
else:
    tokenizer = AutoTokenizer.from_pretrained("model_name")
    os.mkdir(tokenizer_path)
    tokenizer.save_pretrained(tokenizer_path)

In [3]:
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")#, load_in_8bit=True)
pipe = pipeline(
    "text-generation",
    model=model, 
    tokenizer=tokenizer, 
    max_length=2048,
    temperature=0.6,
    top_p=0.95,
    repetition_penalty=1.2
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
local_llm = HuggingFacePipeline(pipeline=pipe)

### Setup LangChain agent, memory and tools

In [5]:
from langchain import Wikipedia
from langchain.agents.react.base import DocstoreExplorer

def docstore_agent():
    docstore = DocstoreExplorer(Wikipedia())
    docstore_tools = [
        Tool(name="Search", func=docstore.search, description="Search wikipedia"),
        Tool(
            name="Lookup",
            func=docstore.lookup,
            description="Lookup a wikipedia page",
        ),
    ]
    docstore_llm = local_llm
    docstore_agent = initialize_agent(
        docstore_tools, docstore_llm, agent="react-docstore", verbose=True
    )
    return docstore_agent

def llm_agent():
    #LLM tool for everything else
    #Why does this not need a detailed prompt like you are an AI....?
    prompt = PromptTemplate(
        input_variables=["query"],
        template="{query}"
    )

    llm_chain = LLMChain(llm=local_llm, prompt=prompt)

    return llm_chain


docstore_agent = docstore_agent()
llm_agent = llm_agent()
tools = [
    Tool(
        name="WikipediaSearch",
        description="Useful for answering a wide range of factual, scientific, academic, political and historical questions.",
        func=docstore_agent.run,
    ),
    Tool(
        name='Language Model',
        func=llm_agent.run,
        description='Use this tool for general purpose queries and logic'
    )
]



In [6]:
# We are going to set the memory to go back N turns
memory = ConversationBufferWindowMemory(k=12, memory_key="chat_history")

conversation = initialize_agent(
    agent='conversational-react-description', 
    tools=tools, 
    llm=local_llm,
    verbose=True,
    max_iterations=3,
    memory=memory,
)
print(conversation.agent.llm_chain.prompt.template)

Assistant is a large language model trained by OpenAI.

Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.

Overall, Assistant is a powerful tool that can help with a wide range of tasks 

In [8]:
conversation("Who is Alejandro Strachan?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: Do I need to use a tool? Yes
Action: WikipediaSearch
Action Input: "Alejandro Strachan"[0m

[1m> Entering new AgentExecutor chain...[0m


OutputParserException: Could not parse LLM Output: Thought: This seems like a person's name but there isn

In [10]:
from langchain.utilities import WikipediaAPIWrapper
wikipedia = WikipediaAPIWrapper()

In [11]:
wikipedia.run('Alejandro Strachan')

'Page: Alejandro Strachan\nSummary: Alejandro Strachan is a scientist in the field of computational materials and a professor of materials engineering at Purdue University. Before joining Purdue University, he was a staff member at Los Alamos National Laboratory.\n\nPage: William Andrew Goddard III\nSummary: William Andrew Goddard III (born March 29, 1937) is the Charles and Mary Ferkel Professor of Chemistry and Applied Physics, and Director of the Materials and Process Simulation Center at the California Institute of Technology.\n\nPage: NanoHUB\nSummary: nanoHUB.org is a science and engineering gateway comprising community-contributed resources and geared toward education, professional networking, and interactive simulation tools for nanotechnology.  Funded by the United States National Science Foundation (NSF), it is a product of the Network for Computational Nanotechnology (NCN).\nNCN supports research efforts in nanoelectronics; nanomaterials; nanoelectromechanical systems (NEMS)