In [None]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain_community.chat_models.llamacpp import ChatLlamaCpp
from langchain.prompts import PromptTemplate
import warnings

warnings.filterwarnings("ignore")

In [None]:
llm = ChatLlamaCpp(
    model_path="./tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf",
    stop=['User', '\nUser','\n\n', '\nHuman', "</s>", " Human:"],
    temperature=0.8,
    streaming=True,
    max_tokens=256,
    top_p=0.9,
    verbose=False,
    n_ctx=2048,
    n_batch=32,
)

In [None]:
summary_prompt = PromptTemplate.from_template(
    (
        "Summarize the lines of conversation provided, adding onto the previous summary returning a new summary.\n"
        "Previous summary:\n"
        "{summary}\n\n"
        "New lines of conversation:\n"
        "{new_lines}\n\n"
        "New summary:\n"
    )
)

In [None]:
memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=100, prompt=summary_prompt)

In [None]:
prompt = PromptTemplate.from_template(
    (
        "This is a conversation between human and AI. Reply to the user in NO MORE THAN 30 words.\n"
        "History: {history}\n"
        "Human: {input}\n\n"
        # "assistant|>"
    )
)

In [None]:
from langchain.chains import ConversationChain

chain = ConversationChain(
    llm=llm,
    memory=memory,
    prompt=prompt,
    verbose = False
)

### Run (No streaming)

In [None]:
chain.run("Hello.")

In [None]:
chain.run("I will call you Jarvis.")

In [None]:
history = chain.memory.buffer
history

In [None]:
chain.run("What is your name?")

In [None]:
chain.run("Good. What are you doing today?")

In [None]:
chain.run("What is the project?")

In [None]:
chain.memory.buffer

### Streaming

In [None]:
memory.clear()
chain = ConversationChain(
    llm=llm,
    memory=memory,
    prompt=prompt,
    verbose = False
)

In [None]:
chain.stream(input={"input": "Hello."})

In [None]:
for token in chain.stream(input={"input": "Hello."}):
    print(token, end="", flush=True)

In [None]:
for token in chain.stream(input={"input": "I will call you Jarvis. Ok?"}):
    print(token, end="", flush=True)

In [None]:
for token in chain.stream(input={"input": "What is your name?"}):
    print(token, end="", flush=True)