# Import all your libraries

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.faiss import FAISS

In [None]:
import os

faq = None
for path in os.listdir():
    if path == "UTDFAQ.csv":
        faq = path
    else:
        print("UTDFAQ.csv not found in current directory")
        faq = "FAQ Dataset.csv"
print(faq)

# Load the Dataset

In [None]:
from datasets import load_dataset
from rich import print
from rich import print

faq_dataset = load_dataset(
    "csv", data_files=faq)
print(faq_dataset)
## remove all the None values
faq_dataset = faq_dataset.filter(lambda x: x['Question'] is not None and x['Answering'] is not None)
## reaplace all the word ARC to AccessAbility Resource Center and
'''
Office location: Administration Building, Room 2.224
Email: studentaccess@utdallas.edu (Do not email attachments, upload documents to utd.link/arcupload only.)
Phone: (972) 883-2098
Fax: Please don’t fax, use utd.link/arcupload
Mail: AD 30, 800 West Campbell Rd., Richardson TX 75080
'''
faq_dataset = faq_dataset.map(lambda x: {'Question': x['Question'].replace('ARC', 'AccessAbility Resource Center'), 'Answering': x['Answering'].replace('ARC', 'AccessAbility Resource Center')})

# Preprocess the dataset

## remove all the None values

In [None]:
## remove all the None values
faq_dataset = faq_dataset.filter(lambda x: x['Question'] is not None and x['Answering'] is not None)

## Replace all the ARC to Accessible Resource Center

In [None]:
# replace all the word UTD to University of Texas at Dallas
# replace all the ARC to AccessAbility Resource Center
faq_dataset = faq_dataset.map(lambda x: {'Question': x['Question'].replace('UTD', 'University of Texas at Dallas'), 'Answering': x['Answering'].replace('UTD', 'University of Texas at Dallas')})
faq_dataset = faq_dataset.map(lambda x: {'Question': x['Question'].replace('ARC', 'AccessAbility Resource Center'), 'Answering': x['Answering'].replace('ARC', 'AccessAbility Resource Center')})

# Load the Open AI Emebddigns

In [None]:
import openai
from dotenv import load_dotenv
import os
load_dotenv()  # take environment variables from .env.

OPENKEY_API = os.getenv("OPENAI_KEY")

#ORGANIZATION_ID = os.getenv("ORGANIZATION_ID")
#openai.organization = ORGANIZATION_ID
# get this from top-right dropdown on OpenAI under organization > settings
openai.api_key = OPENKEY_API
# get API key from top-right dropdown on OpenAI website
os.environ["OPENAI_API_KEY"] = OPENKEY_API
openai.Engine.list()  # check we have authenticated
print(openai.Engine.list())
## model of choices
MODEL = "text-embedding-ada-002"

# Ini the Pinecone Client

In [None]:
import pinecone
PINECONE_APIKEY = os.getenv("PINECONE_APIKEY")
if PINECONE_APIKEY is None:
    raise Exception("PINECONE_API_KEY not found in environment variables add the Pinecone API key to the .env file")
# initialize connection to pinecone (get API key at app.pinecone.io)
pinecone.init(
    api_key = PINECONE_APIKEY,
    environment="us-east-1-aws"
)

In [None]:
def set_env():
    #setting up OpenAI Api
    OPENKEY_API = os.getenv("OPENAI_KEY")
    ORGANIZATION_ID = os.getenv("ORGANIZATION_ID")
    os.environ["OPENAI_API_KEY"] = OPENKEY_API
    openai.organization = ORGANIZATION_ID
    # get this from top-right dropdown on OpenAI under organization > settings
    openai.api_key = OPENKEY_API
    # get API key from top-right dropdown on OpenAI website

    openai.Engine.list()  # check we have authenticated

    #setting pup pinecon
    PINECONE_APIKEY = os.getenv("PINECONE_APIKEY")
    # initialize connection to pinecone (get API key at app.pinecone.io)
    pinecone.init(
        api_key = PINECONE_APIKEY ,
        environment="us-east1-gcp" , 
    )
    assert len(pinecone.list_indexes()) > 0, "No indexes found in your Pinecone account. Please create an index and try again."

    print(pinecone.list_indexes())
    if pinecone.list_indexes()[0] != "utd-chatbot":
        print("Please create an index named 'utd-chatbot")


    print(f'The list of pinecone index {pinecone.list_indexes()}')
    if pinecone.list_indexes()[0] != "utd-chatbot":
        print("Please create an index named 'utd-chatbot")

    ## I the name of the vector database
    return  pinecone.Index(index_name = pinecone.list_indexes()[0]) , pinecone.list_indexes()[0] , pinecone.describe_index(pinecone.list_indexes()[0])

pinecone_index , index_name , index_description = set_env()

In [None]:
pinecone_index.describe_index_stats()

# Role Prompt

In [None]:
role_prompt = """
Please act as a University of Texas at Dallas Counselor. I will provide you with an individual 
looking for guidance at the University of Texas at Dallas, and your task is to help them 
solve their problem\n
"""

In [None]:
def post_process(ans):
    if "ARC" in ans:
        ans = ans.replace("ARC", "AccessAbility Resource Center")
        ans += """ 
        Office location: Administration Building, Room 2.224 \n
        Email: studentaccess@utdallas.edu (Do not email attachments, upload documents to utd.link/arcupload only.) \n
        Phone: (972) 883-2098 \n
        Fax: Please don’t fax, use utd.link/arcupload \n
        Mail: AD 30, 800 West Campbell Rd., Richardson TX 75080    Office location: Administration Building, Room 2.224\n
        """
    return ans

In [None]:

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pinecone import Pinecone

def query_vector_database(query):
    embeddings = OpenAIEmbeddings()

    # vectorstore = Pinecone.from_documents(documents, embeddings, index_name=INDEX_NAME) # only used for initial index creation, upserts document embeddings as well as the documents themselves
    vectorstore = Pinecone( pinecone_index, embeddings.embed_query, "text") # use this for subsequent runs

    docs = vectorstore.similarity_search_with_score(query)

    res = []
    for doc in docs:
      answer = faq_dataset['train'].filter(lambda x: x['Question'] == doc[0].page_content) # get row with the corresponding question in query
      print(answer)
      res.append({"Question": f"{answer['Question'][0]}", "Answer": f"{answer['Answering'][0]}", "URL": f"{answer['URL'][0]}"}) # adds a dictionary of the row to list
    return res

In [None]:
embeddings = OpenAIEmbeddings()
vectorstore = Pinecone( pinecone_index, embeddings.embed_query, "text") # use this for subsequent runs
docs = vectorstore.similarity_search_with_score("I have ADHD and I need help with my classes.")
print(docs)

In [None]:
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

def create_an_standard_qa_prompt(res):
    example_prompt = PromptTemplate(
        input_variables=["Question", "Answer", "URL"], 
        template="Question: {Question}\n{Answer}\nSource:{URL}"
    )

    fewShotPrompt = FewShotPromptTemplate(
        examples=res,
        example_prompt=example_prompt,
        suffix="Question: {input}",
        input_variables=["input"]
    )

    system_message_prompt = SystemMessage(content=role_prompt)
    human_message_prompt = HumanMessagePromptTemplate(prompt=fewShotPrompt)
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
    type(chat_prompt)
    return chat_prompt

In [None]:
from langchain.chat_models import ChatOpenAI

chat = ChatOpenAI(temperature=0)

In [None]:
from langchain import LLMChain

def called_llm_decoder_model(query, prompt):
  chain = LLMChain(llm=chat, prompt=prompt)
  output = chain.run(input=query)
  return output


# Gradio

In [None]:
import gradio as gr
questions = []
answers = []
import random
random_value = random.randint(0, 1000)

def utd_chatbot( question ):
    ## store the question in the csvs file
    #print(question)
    res = query_vector_database(question)
    #print(res)
    propmt = create_an_standard_qa_prompt(res)
    #print(propmt)
    ans = called_llm_decoder_model(question, propmt)
    # post_process(ans)
    return ans

In [None]:
## Lauch of Gradio
demo = gr.Interface(fn=utd_chatbot, inputs="text", outputs="text")
demo.launch( share = True )

In [39]:
# Close the a demo
#demo.close()

Closing server running on port: 7860


# Use Language Chain to Generate one Example

In [None]:
from langchain import PromptTemplate


template = """
I want you to act as a naming consultant for new companies.

Here are some examples of good company names:

- search engine, Google
- social media, Facebook
- video sharing, YouTube

The name should be short, catchy and easy to remember.

What is a good name for a company that makes {product}?
"""

prompt = PromptTemplate(
    input_variables=["product"],
    template=template,
)

from langchain.llms import OpenAI
llm = OpenAI(model_name="text-davinci-003", n=2, best_of=2)
from rich import print
print( llm("Tell me about yourself") ) 