In [1]:
# !pip install langchain
# !pip install pypdf
# !pip install python-dotenv
# !pip install chromadb
# !pip install panel
# !pip install glob

In [2]:
import os, openai, glob
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

vsPath = './docs/chroma_db/'

In [3]:
# part 1 - build a vector store of the lecture slides
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

In [4]:
# get lecture slides location
files = []
folderPath = "LessonMaterial"
fileNamingConvention = "02456_"
for week in [1,2,3,4,5,6]:
    files.append(glob.glob(os.path.join(folderPath, fileNamingConvention + str(week) + "*.pdf"), recursive = True))
print(files)

[['LessonMaterial\\02456_1Introduction.pdf'], ['LessonMaterial\\02456_2CNN.pdf', 'LessonMaterial\\02456_2CNN_2017news.pdf'], ['LessonMaterial\\02456_3RNN.pdf', 'LessonMaterial\\02456_3RNN_2017news.pdf', 'LessonMaterial\\02456_3RNN_2020news.pdf'], ['LessonMaterial\\02456_4TricksTrade.pdf', 'LessonMaterial\\02456_4TricksTrade_2020news.pdf'], ['LessonMaterial\\02456_5Unsupervised.pdf', 'LessonMaterial\\02456_5Unsupervised_2017News.pdf', 'LessonMaterial\\02456_5Unsupervised_2020News.pdf'], ['LessonMaterial\\02456_6ReinforcementLearning.pdf', 'LessonMaterial\\02456_6ReinforcementLearning_2017news.pdf']]


In [5]:
# load lecture slides
docs = []
for weekly_files in files:
    weekly_text = ""
    for file in weekly_files:
        loader = PyPDFLoader(file)
        pages = loader.load()
        for page in pages:
            weekly_text = weekly_text +'\n'+ page.page_content
    docs.append(weekly_text)

In [6]:
# split lecture slides into semantically meaningful chunks

In [7]:
# Embeddings and Vector Store using OpenAI
embedding = OpenAIEmbeddings()
persist_directory = './docs/chroma_db/'

for week in [0, 1, 2, 3, 4, 5]:
    print('week', week+1)
    text = docs[week]
    
    text_splitter = RecursiveCharacterTextSplitter(
        # Set a really small chunk size, just to show.
        chunk_size = 1500,
        chunk_overlap  = 500,
        length_function = len,
        is_separator_regex = False,
    )

    splits = text_splitter.split_text(text)
    len(splits)
    persist_directory = './docs/chroma_db/' + str(week+1).strip()
    print(persist_directory)
    
    vectordb = Chroma.from_texts(
        texts=splits,
        embedding=embedding,
        persist_directory=persist_directory
    )
    
    vectordb.persist()
    print(vectordb._collection.count())

week 1
./docs/chroma_db/1
15
week 2
./docs/chroma_db/2
13
week 3
./docs/chroma_db/3
25
week 4
./docs/chroma_db/4
32
week 5
./docs/chroma_db/5
26
week 6
./docs/chroma_db/6
13


In [219]:
# part 2 - Memory

In [373]:
from langchain.memory import ConversationBufferMemory
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
from langchain.schema import SystemMessage

In [374]:
default_prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content = "If the human_input is an answer to your question about deep learning, \
                        evaluate and provide personal feedback on how the student can improve on their understanding. \
                        Otherwise, just reply to the INPUT using a general approach."
        ), # Persistent system prompt

        MessagesPlaceholder(
            variable_name = "chat_history"
        ), # Memory store

        HumanMessagePromptTemplate.from_template(
                "{query}"
        ),  # Where the human input will injected

    ]
)

In [375]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [376]:
# part 3 - Route template

In [377]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains.router import MultiPromptChain
from langchain.chains.router.llm_router import LLMRouterChain,RouterOutputParser
from langchain.chains import LLMChain, RetrievalQA, SequentialChain
from langchain.schema import StrOutputParser

In [424]:
# templates for week 1 to 6
one_template = """You are a tutor and you will be assessing a student on their understanding on Feed Forward Neural Network in week 1.

THIS IS NOT THE STUDENT's ANSWER, THIS IS THE LECTURE MATERIAL: {context}.

Ask the student questions as per the lecture.

Always say "Thank you for using g51-1!" at the end of the answer.
"""

two_template = """You are a tutor and you will be assessing a student on their understanding on Convolutional Neural Network in week 2.

THIS IS NOT THE STUDENT's ANSWER, THIS IS THE LECTURE MATERIAL: {context}.

Ask the student questions as per the lecture.

Always say "Thank you for using g51-2!" at the end of the answer.
"""

three_template = """You are a tutor and you will be assessing a student on their understanding on Recurrent Neural Network in week 3.

THIS IS NOT THE STUDENT's ANSWER, THIS IS THE LECTURE MATERIAL: {context}.

Ask the student questions as per the lecture.

Always say "Thank you for using g51-3!" at the end of the answer.
"""

four_template = """You are a tutor and you will be assessing a student on their understanding on tricks and trade for increasing neural net accuracy in week 4.

THIS IS NOT THE STUDENT's ANSWER, THIS IS THE LECTURE MATERIAL: {context}.

Ask the student questions as per the lecture.

Always say "Thank you for using g51-4!" at the end of the answer.
"""

five_template = """You are a tutor and you will be assessing a student on their understanding on Unsupervised learning in week 5.

THIS IS NOT THE STUDENT's ANSWER, THIS IS THE LECTURE MATERIAL: {context}.

Ask the student questions as per the lecture.

Always say "Thank you for using g51-5!" at the end of the answer.
"""

six_template = """You are a tutor and you will be assessing a student on their understanding on Reinforcement learning in week 6.

THIS IS NOT THE STUDENT's ANSWER, THIS IS THE LECTURE MATERIAL: {context}.

Generate a few questions as per the lecture and ask the student.

Always say "Thank you for using g51-6!" at the end of the answer.
"""

In [425]:
# part 4 - Sequential Chain

In [426]:
# chains for different weeks - Chain subclass
embedding = OpenAIEmbeddings()
result_chains = []
all_templates = [one_template, two_template, three_template, four_template, five_template, six_template]

for week in [1, 2, 3, 4, 5, 6]:
    qa_chain = RetrievalQA.from_chain_type(
        llm = ChatOpenAI(model_name = "gpt-3.5-turbo", temperature=0),
        retriever = Chroma(persist_directory = './docs/chroma_db/' + str(week).strip(), embedding_function=embedding).as_retriever(),
        chain_type="stuff",
        output_key = "context",
        verbose = True
    )

    res_chain = LLMChain(
        llm = ChatOpenAI(model_name = "gpt-3.5-turbo", temperature=0),
        prompt = PromptTemplate.from_template(template=all_templates[week-1]),
        output_key = "text",
        verbose = True
    )

    over_chain = SequentialChain(
        chains = [qa_chain, res_chain],
        input_variables = ["query"],
        output_variables= ["text"],
        memory = memory,
        verbose = True
    )
    result_chains.append(over_chain)

In [427]:
prompt_infos = [
    {
        "name": "FFNN", 
        "description": "When asked to assess student's understanding of the deep learning introduction and Feed Forward Neural Network taught in week one.", 
        "prompt_template": all_templates[0],
        "chain": result_chains[0]
    },
    {
        "name": "CNN", 
        "description": "When asked to assess student's understanding of Convolutional Neural Network taught in week two.", 
        "prompt_template": all_templates[1],
        "chain": result_chains[1]
    },
    {
        "name": "RNN", 
        "description": "When asked to assess student's understanding of Recurrent Neural Network taught in week three.", 
        "prompt_template": all_templates[2],
        "chain": result_chains[2]
    },
    {
        "name": "TrickTrade", 
        "description": "When asked to assess student's understanding of Neural Network tricks and trade to improve performance taught in week four.", 
        "prompt_template": all_templates[3],
        "chain": result_chains[3]
    },
    {
        "name": "Unsupervised", 
        "description": "When asked to assess student's understanding of Unsupervised Learning taught in week five.", 
        "prompt_template": all_templates[4],
        "chain": result_chains[4]
    },
    {
        "name": "Reinforcement", 
        "description": "When asked to assess student's understanding of Reinforcement Learning taught in week six.", 
        "prompt_template": all_templates[5],
        "chain": result_chains[5]
    },
    
]

In [428]:
# part 5 - Router Chain and Routing

In [429]:
destination_chains = {}
for p_info in prompt_infos:
    name = p_info["name"]
    chain = p_info["chain"]
    destination_chains[name] = chain  
    
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)

In [430]:
default_chain = LLMChain(llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0301"), prompt=default_prompt, memory=memory, verbose=True)

In [431]:
MULTI_PROMPT_ROUTER_TEMPLATE = """Given a raw text input to a \
language model select the prompt best suited for the input. \
You will be given the names of the available prompts and a \
description of what the prompt is best suited for. \
You may also revise the original input if you think that revising\
it will ultimately lead to a better response from the language model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
```json
{{{{
    "destination": string \ name of the prompt to use or "DEFAULT"
    "next_inputs": string \ a potentially modified version of the original input
}}}}
```

REMEMBER: "destination" MUST be one of the candidate prompt \
names specified below OR it can be "DEFAULT" if the input is not\
well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input \
if you don't think any modifications are needed.

<< CANDIDATE PROMPTS >>
{destinations}

<< INPUT >>
{{input}}

<< OUTPUT (remember to include the ```json)>>"""

In [432]:
class RouterOutputParser2(RouterOutputParser):
    next_inputs_inner_key: str = "query"

In [433]:
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(
    destinations=destinations_str
)
router_prompt = PromptTemplate(
    template=router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser2(),
)

router_chain = LLMRouterChain.from_llm(llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0301"), prompt = router_prompt )

In [434]:
chain = MultiPromptChain(
    router_chain=router_chain, 
    destination_chains=destination_chains, 
    default_chain=default_chain,
    verbose = True
)

In [435]:
# part 6 - User Interface using Panel

In [436]:
import panel as pn
import param

In [437]:
class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    
    def __init__(self,  **params):
        super(cbfs, self).__init__( **params)
        self.panels = []
        self.qa = chain

    def convchain(self, query2):
        if not query2:
            return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
        # memory.add_user_message(query2) # memory is added here for user
        result = self.qa.run(query2)
        # memory.add_ai_message(query2) # memory is added here for AI
        self.chat_history.extend([(query2, result)])
        self.answer = result
        self.panels = [
            pn.Row('AI:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'})),
            pn.Row('User:', pn.pane.Markdown(query2, width=600))
        ] + self.panels
        inp.value = ''  #clears loading indicator when cleared
        return pn.WidgetBox(*self.panels,scroll=True)

    @param.depends('convchain', 'clr_history') 
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
        rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    def clr_history(self,count=0):
        self.chat_history = []
        memory.clear()
        return 

In [438]:
cb = cbfs()

inp = pn.widgets.TextInput(placeholder='Enter text here…')

button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory.on_click(cb.clr_history)

conversation = pn.bind(cb.convchain, inp) 

tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation,  loading_indicator=True, height=300),
    pn.layout.Divider(),
)

tab2= pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)

tab3=pn.Column(
    pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history.")),
    pn.layout.Divider(),
)

dashboard = pn.Column(
    pn.Row(pn.pane.Markdown('# Group51 - Tutor_Bot')),
    pn.Tabs(('Conversation', tab1), ('Chat History', tab2),('Configure', tab3))
)

pn.extension()
dashboard