In [19]:
import warnings 
warnings.filterwarnings("ignore") 

import numpy as np
import openai
import pandas as pd
import os
import pickle
os.environ['MKL_DEBUG_CPU_TYPE'] = '5'
os.environ['MKL_DISABLE_FAST_MM'] = '1'

openai.api_key = "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" # This key will expire after the workshop

COMPLETIONS_MODEL = "gpt-4" 
EMBEDDING_MODEL = "text-embedding-3-large"


df = pd.read_csv('services_temp.csv').dropna()
df = df.set_index("service")
df.head()

Unnamed: 0_level_0,description,link
service,Unnamed: 1_level_1,Unnamed: 2_level_1
Academic Advisory System,The academic advisory system is used to send e...,https://www.hmc.edu/cis/services/academic-advi...
Academic and Scientific Software Support,CIS maintains campus-wide license agreements f...,https://www.hmc.edu/cis/services/academic-and-...
Adobe Acrobat,Acrobat Pro DC is a software package that incl...,https://www.hmc.edu/cis/services/adobe-acrobat/
Anki Flash Cards,"Anki is a free, open source application that a...",https://www.hmc.edu/cis/services/anki-flash-ca...
Audiovisual Event Support,CIS (Computing and Information Services) provi...,https://www.hmc.edu/cis/services/audiovisual-e...


In [20]:
df.shape

(71, 2)

In [21]:
def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
    result = openai.Embedding.create(
      model=model,
      input=text
    )
    return result["data"][0]["embedding"]

def vector_similarity(x: list[float], y: list[float]) -> float:
    """
    Returns the similarity between two vectors.
    
    Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
    """
    return np.dot(np.array(x), np.array(y))

def select_document_section_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]:
    """
    Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
    to find the most relevant sections. 
    
    Return the list of document sections, sorted by relevance in descending order.
    """
    query_embedding = get_embedding(query)
    
    document_similarities = sorted([
        (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
    ], reverse=True)
    
    return document_similarities[0]

In [22]:
def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:
    """
    Create an embedding for each row in the dataframe using the OpenAI Embeddings API.
    
    Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.
    """
    return {
        idx: get_embedding(r.description) for idx, r in df.iterrows()
    }

document_embeddings = compute_doc_embeddings(df)



In [23]:
def construct_prompt(query: str, context_embeddings: dict, df: pd.DataFrame) -> str:
    """
    Fetch relevant 
    """
    _ , chosen_service = select_document_section_by_query_similarity(query, context_embeddings)
        
    service_description = df.loc[chosen_service].description.replace("\n", " ")
    introduction = "Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "
    introduction += "I could not find an answer to your question, please reach out to Helpdesk."
    question = f"\n\nQ: {query}"
    message = introduction + "\n* " + "\n\nContext:\n" + service_description + question
    link = df.loc[chosen_service].link
    return message, link

prompt = construct_prompt(
    "How many iClickers are there?",
    document_embeddings,
    df
)

print(prompt)

("Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say I could not find an answer to your question, please reach out to Helpdesk.\n* \n\nContext:\niClickers are a personal response system that can be used for a variety of purposes in the classroom. They use a wireless receiver to record student votes, and software on the instructor's laptop records and displays the votes. Students can receive points for participation and correctness of their responses. Grading information can be exported and uploaded into Sakai. Faculty can check out iClickers from CIS for the semester or a single class. CIS also provides training and software installation and configuration. ARCS can provide consultations on incorporating iClickers into syllabi. For more information, visit the iClickers website. Faculty, students and staff at HMC can check out sets of 50 iClickers from the CIS Help Desk, as well as the wireless base stat

In [29]:
def answer_query_with_context(
    query: str,
    df: pd.DataFrame,
    document_embeddings: dict[(str, str), np.array],
    show_prompt: bool = False
) -> str:
    prompt, link = construct_prompt(
        query,
        document_embeddings,
        df
    )
    
    if show_prompt:
        print(prompt)

    response = openai.ChatCompletion.create(
        model = COMPLETIONS_MODEL, # "gpt-3.5-turbo",
        messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
        ]
    )
    end_message = "\n\nPlease check out the relevant HMC service catalogue for more details: "+ link 
    end_message += """\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. """
    end_message += """Helpdesk representatives are also available for a remote chat session during normal hours on Monday - Friday, 8:00 AM - 5:00 PM PST via https://helpdesk.hmc.edu"""
    reply = response["choices"][0]["message"]["content"] + end_message
    return reply

In [30]:
answer_query_with_context("How to connect to WiFi?", df, document_embeddings)

"To connect to WiFi, you need to access the appropriate wireless settings pane on your device and select the network that's suitable for your use case. If you choose Claremont-WPA or Eduroam, you will need to authenticate with your HMC credentials. For Claremont-ETC, a shared password is required, which HMC users can request and non-HMC users must contact the CIS Help Desk to obtain. When prompted, you will enter the appropriate username and password. If you need assistance with this process, contact the Help Desk at helpdesk@hmc.edu or 909.607.7777; a CIS staff member will respond within one business day. You also have the option of using their form to submit a request for wireless help.\n\n\nPlease check out the relevant HMC service catalogue for more details: https://www.hmc.edu/cis/services/wireless-networks/\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. Helpdesk representatives are al

In [31]:
answer_query_with_context("How do I use iClickers?", df, document_embeddings)

"The text does not provide a specific step-by-step guide on how to use iClickers. However, it does mention that iClickers are a personal response system used in the classroom, which uses a wireless receiver to record student votes, with software that shows these votes on the instructor's laptop. To get more detailed instructions on how to use iClickers, you may want to reach out to the CIS Help Desk or visit the iClickers website as suggested in the context.\n\nPlease check out the relevant HMC service catalogue for more details: https://www.hmc.edu/cis/services/iclickers/\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. Helpdesk representatives are also available for a remote chat session during normal hours on Monday - Friday, 8:00 AM - 5:00 PM PST via https://helpdesk.hmc.edu"

In [None]:
# Hosting chatbot on Gradio

import gradio as gr

def chatbot(input):
    if input:
        reply = answer_query_with_context(input, df, document_embeddings)
        return reply

inputs = gr.inputs.Textbox(lines=7, label="Chat with AI")
outputs = gr.outputs.Textbox(label="Reply")

header_message = "Ask the chatbot about the following services provided by CIS at HMC."
gr.Interface(fn=chatbot, inputs=inputs, outputs=outputs, title="AI Chatbot",
             description=header_message,
             theme="compact").launch(share=True)

Running on local URL:  http://127.0.0.1:7861
