In [3]:
import streamlit as st
from langchain.llms import LlamaCpp
from langchain import PromptTemplate
from langchain.embeddings import LlamaCppEmbeddings
from langchain.chains import LLMChain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma


In [4]:
st.set_page_config(page_title="LIFE", layout = "wide")
st.markdown(f"""
            <style>
            .stApp {{background-image: url("https://images.unsplash.com/photo-1509537257950-20f875b03669?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1469&q=80"); 
                     background-attachment: fixed;
                     background-size: cover}}
         </style>
         """, unsafe_allow_html=True)

DeltaGenerator()

In [5]:
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}
Question: {question}
Answer:"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])


In [6]:
llm = LlamaCpp(model_path = "./model/ggml-model-q4_0.bin")
embeddings = LlamaCppEmbeddings(model_path = "./model/ggml-model-q4_0.bin")
llm_chain = LLMChain(llm=llm, prompt=prompt)

llama.cpp: loading model from ./model/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.07 MB
llama_model_load_internal: mem required  = 5407.71 MB (+ 1026.00 MB per state)
...................................................................................................
llama_init_from_file: kv self size  =  256.00 MB
AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F1

In [5]:
st.title("QnA")

loader = TextLoader("./Csscheat.txt")
docs = loader.load()    
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
texts = text_splitter.split_documents(docs)
db = Chroma.from_documents(texts, embeddings)    

Created a chunk of size 443, which is longer than the specified 100
Created a chunk of size 120, which is longer than the specified 100
Created a chunk of size 556, which is longer than the specified 100
Created a chunk of size 364, which is longer than the specified 100
Created a chunk of size 506, which is longer than the specified 100
Created a chunk of size 352, which is longer than the specified 100

llama_print_timings:        load time =  2127.46 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  2127.33 ms /     8 tokens (  265.92 ms per token,     3.76 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  2131.05 ms

llama_print_timings:        load time =  2127.46 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 

In [7]:
question = st.text_input("Ask something related", placeholder="Find something similar to: ....this.... in the text?")    

similar_doc = db.similarity_search(question, k=1)
context = similar_doc[0].page_content
query_llm = LLMChain(llm=llm, prompt=prompt)
response = query_llm.run({"context": context, "question": question})        
st.write(response)       


llama_print_timings:        load time =  2127.46 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =     4.13 ms

llama_print_timings:        load time =  1992.67 ms
llama_print_timings:      sample time =     9.38 ms /    14 runs   (    0.67 ms per token,  1491.90 tokens per second)
llama_print_timings: prompt eval time = 46224.17 ms /   166 tokens (  278.46 ms per token,     3.59 tokens per second)
llama_print_timings:        eval time =  4736.19 ms /    13 runs   (  364.32 ms per token,     2.74 tokens per second)
llama_print_timings:       total time = 51089.11 ms


In [None]:
streamlit run

In [7]:
from langchain.schema import HumanMessage, SystemMessage, AIMessage


In [8]:
llm(
    [
        SystemMessage(content="You are a nice AI bot that helps a user figure out what to eat in one short sentence"),
        HumanMessage(content="I like tomatoes, what should I eat?")
    ]
)

ValueError: Argument `prompt` is expected to be a string. Instead found <class 'list'>. If you want to run the LLM on multiple prompts, use `generate` instead.

In [9]:
from langchain.schema import Document

In [10]:
Document(page_content="This is my document. It is full of text that I've gathered from other places",
         metadata={
             'my_document_id' : 234234,
             'my_document_source' : "The LangChain Papers",
             'my_document_create_time' : 1680013019
         })

Document(page_content="This is my document. It is full of text that I've gathered from other places", metadata={'my_document_id': 234234, 'my_document_source': 'The LangChain Papers', 'my_document_create_time': 1680013019})

In [11]:
llm("WHat comes after friday")


llama_print_timings:        load time =  1610.53 ms
llama_print_timings:      sample time =    24.87 ms /    46 runs   (    0.54 ms per token,  1849.47 tokens per second)
llama_print_timings: prompt eval time =  1610.43 ms /     7 tokens (  230.06 ms per token,     4.35 tokens per second)
llama_print_timings:        eval time = 12588.20 ms /    45 runs   (  279.74 ms per token,     3.57 tokens per second)
llama_print_timings:       total time = 14377.18 ms


'?\nI am a 32 year old woman and I was told i would be getting my period on or around this date, however it is not here yet. What do you think could be the cause of this?'