In [1]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"]='0,1'

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from langchain.agents import initialize_agent, Tool, AgentType
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings 

from pydantic import BaseModel, Field

device = "cuda" if torch.cuda.is_available() else "cpu"
#Need only 1 GPU if loading 8-bit model
print(device)

print("Using %d GPUs" %torch.cuda.device_count())

import gradio as gr
import time
name = "APS AI Assistant"



cuda
Using 4 GPUs


In [2]:
model_name = "eachadea/vicuna-13b-1.1"
tokenizer_path = "./tokenizer/"

#Create a local tokenizer copy the first time
if os.path.isdir(tokenizer_path):
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
else:
    tokenizer = AutoTokenizer.from_pretrained("model_name")
    os.mkdir(tokenizer_path)
    tokenizer.save_pretrained(tokenizer_path)

In [3]:
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")#, load_in_8bit=True)
pipe = pipeline(
    "text-generation",
    model=model, 
    tokenizer=tokenizer, 
    max_length=2048,
    temperature=0.6,
    top_p=0.95,
    repetition_penalty=1.2
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
#Load embedding model and use that to embed text from source

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

with open("APS-Science-Highlight/2021-07-06_better-educated-neural-networks-for-nanoscale-3-d-coherent-x-ray.txt") as f:
    book = f.read()

print(book)


Better-Educated Neural Networks for Nanoscale 3-D Coherent X-ray Imaging

One of the inescapable realities of various imaging techniques is called the "phase problem," which simply refers to the loss of phase information inherent in the nature of imaging methods such as x-ray diffraction.  Though it might be inconvenient, it can be dealt with by using various mathematical methods to retrieve the phase data from the image with inverse computation.  Such methods, however, are not only time-consuming but require a great deal of computer power as they must run through multiple iterations to converge on a solution, which prevents real-time imaging.  A group of researchers working at the U.S. Department of Energy’s Advanced Photon Source (APS) has demonstrated a new approach to this perennial obstacle by using a deep-learning neural network trained and optimized for enhanced accuracy to perform fast three-dimensional (3-D) nanoscale imaging from coherent x-ray data.  The work was published i

In [5]:
text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
texts = text_splitter.split_text(book)
docsearch = Chroma.from_texts(
    texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]
)

Using embedded DuckDB without persistence: data will be transient


In [11]:
query = "What are the researchers from?"
docs = docsearch.similarity_search_with_score(query, k=3)
for doc in docs:
    print(doc)

(Document(page_content="Argonne National Laboratory seeks solutions to pressing national problems in science and technology. The nation's first national laboratory, Argonne conducts leading-edge basic and applied scientific research in virtually every scientific discipline. Argonne researchers work closely with researchers from hundreds of companies, universities, and federal, state and municipal agencies to help them solve their specific problems, advance America's scientific leadership and prepare the nation for a better future. With employees from more than 60 nations, Argonne is managed by UChicago Argonne, LLC, for the U.S. DOE Office of Science.\n\nThe U.S. Department of Energy's Office of Science is the single largest supporter of basic research in the physical sciences in the United States and is working to address some of the most pressing challenges of our time. For more information, visit the Office of Science website.\n\nPublished Date\n\n07.06.2021", metadata={'source': '1

### Code below doesn't work
-- plan to use above as context in a prompt entry

In [7]:

class SearchInEmbeddings(BaseModel):
    query: str = Field()

def search(search_input: SearchInEmbeddings):
    docs = docsearch.similarity_search_with_score(search_input, k=1)
    return docs

tools = [
    Tool(
        name="Search",
        func=search,
        description="Useful for when you need to answer questions",
    )
]

In [8]:
print("Initializing VicunaLLMClient")
memory = ConversationBufferWindowMemory(memory_key="chat_history", k=6)
local_llm = HuggingFacePipeline(pipeline=pipe)
agent = initialize_agent(
    tools, local_llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, memory=memory
)

Initializing VicunaLLMClient


In [9]:
agent.run(input=query)



[1m> Entering new AgentExecutor chain...[0m


KeyboardInterrupt: 