In [1]:
pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.54.4
    Uninstalling openai-1.54.4:
      Successfully uninstalled openai-1.54.4
Successfully installed openai-0.28.0


In [4]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import openai
import pandas as pd
import os
import pickle
os.environ['MKL_DEBUG_CPU_TYPE'] = '5'
os.environ['MKL_DISABLE_FAST_MM'] = '1'

openai.api_key = "****"
COMPLETIONS_MODEL = "gpt-4"
EMBEDDING_MODEL = "text-embedding-3-small"


df = pd.read_csv('https://raw.githubusercontent.com/AashitaK/datasets/main/services_temp.csv').dropna()
df = df.set_index("service")
df.head()

Unnamed: 0_level_0,description,link
service,Unnamed: 1_level_1,Unnamed: 2_level_1
Academic Advisory System,The academic advisory system is used to send e...,https://www.hmc.edu/cis/services/academic-advi...
Academic and Scientific Software Support,CIS maintains campus-wide license agreements f...,https://www.hmc.edu/cis/services/academic-and-...
Adobe Acrobat,Acrobat Pro DC is a software package that incl...,https://www.hmc.edu/cis/services/adobe-acrobat/
Anki Flash Cards,"Anki is a free, open source application that a...",https://www.hmc.edu/cis/services/anki-flash-ca...
Audiovisual Event Support,CIS (Computing and Information Services) provi...,https://www.hmc.edu/cis/services/audiovisual-e...


In [5]:
result = openai.Embedding.create(
      model=EMBEDDING_MODEL,
      input="How are you?")

In [6]:
len(result["data"][0]["embedding"])

1536

In [8]:
df.shape

(71, 2)

In [9]:
df.loc["iClickers", "description"]

"iClickers are a personal response system that can be used for a variety of purposes in the classroom. They use a wireless receiver to record student votes, and software on the instructor's laptop records and displays the votes. Students can receive points for participation and correctness of their responses. Grading information can be exported and uploaded into Sakai. Faculty can check out iClickers from CIS for the semester or a single class. CIS also provides training and software installation and configuration. ARCS can provide consultations on incorporating iClickers into syllabi. For more information, visit the iClickers website. Faculty, students and staff at HMC can check out sets of 50 iClickers from the CIS Help Desk, as well as the wireless base stations. For large lecture classes, students may be asked to rent or purchase their own iClickers from Huntley Bookstore, in which case faculty can just check out the wireless base stations they need. CIS will usually send an email 

In [10]:
def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
    result = openai.Embedding.create(
      model=model,
      input=text
    )
    return result["data"][0]["embedding"]

def vector_similarity(x: list[float], y: list[float]) -> float:
    """
    Returns the similarity between two vectors.

    Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.
    """
    return np.dot(np.array(x), np.array(y))

def select_document_section_by_query_similarity(query: str, contexts: dict[(str, str), np.array]) -> list[(float, (str, str))]:
    """
    Find the query embedding for the supplied query, and compare it against all of the pre-calculated document embeddings
    to find the most relevant sections.

    Return the list of document sections, sorted by relevance in descending order.
    """
    query_embedding = get_embedding(query)

    document_similarities = sorted([
        (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()
    ], reverse=True)

    return document_similarities[0]

In [11]:
def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:
    """
    Create an embedding for each row in the dataframe using the OpenAI Embeddings API.

    Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.
    """
    return {
        idx: get_embedding(r.description) for idx, r in df.iterrows()
    }

document_embeddings = compute_doc_embeddings(df)



In [12]:
def construct_prompt(query: str, context_embeddings: dict, df: pd.DataFrame) -> str:
    """
    Fetch relevant
    """
    _ , chosen_service = select_document_section_by_query_similarity(query, context_embeddings)

    service_description = df.loc[chosen_service].description.replace("\n", " ")
    introduction = "Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "
    introduction += "I could not find an answer to your question, please reach out to Helpdesk."
    question = f"\n\nQ: {query}"
    message = introduction + "\n* " + "\n\nContext:\n" + service_description + question
    link = df.loc[chosen_service].link
    return message, link

prompt = construct_prompt(
    "How many iClickers are there?",
    document_embeddings,
    df
)

print(prompt[0])

Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say I could not find an answer to your question, please reach out to Helpdesk.
* 

Context:
iClickers are a personal response system that can be used for a variety of purposes in the classroom. They use a wireless receiver to record student votes, and software on the instructor's laptop records and displays the votes. Students can receive points for participation and correctness of their responses. Grading information can be exported and uploaded into Sakai. Faculty can check out iClickers from CIS for the semester or a single class. CIS also provides training and software installation and configuration. ARCS can provide consultations on incorporating iClickers into syllabi. For more information, visit the iClickers website. Faculty, students and staff at HMC can check out sets of 50 iClickers from the CIS Help Desk, as well as the wireless base stations. 

In [13]:
def answer_query_with_context(
    query: str,
    df: pd.DataFrame,
    document_embeddings: dict[(str, str), np.array],
    show_prompt: bool = False
) -> str:
    prompt, link = construct_prompt(
        query,
        document_embeddings,
        df
    )

    if show_prompt:
        print(prompt)

    response = openai.ChatCompletion.create(
        model = COMPLETIONS_MODEL, # "gpt-3.5-turbo",
        messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
        ]
    )
    end_message = "\n\nPlease check out the relevant HMC service catalogue for more details: "+ link
    end_message += """\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. """
    end_message += """Helpdesk representatives are also available for a remote chat session during normal hours on Monday - Friday, 8:00 AM - 5:00 PM PST via https://helpdesk.hmc.edu"""
    reply = response["choices"][0]["message"]["content"] + end_message
    return reply

In [14]:
answer_query_with_context("How to connect to WiFi?", df, document_embeddings)

'To connect to WiFi, you will have access to the same wireless networks as you do already around campus. Please use Claremont-WPA for all your devices. For devices that do not support WPA2-Enterprise please use Claremont-ETC (reduced access but ideal for older devices and game consoles). If you have any issues or questions please contact the Help Desk at helpdesk@hmc.edu or 909.607.7777.\n\nPlease check out the relevant HMC service catalogue for more details: https://www.hmc.edu/cis/services/dorm-wifi-and-ethernet-ports/\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. Helpdesk representatives are also available for a remote chat session during normal hours on Monday - Friday, 8:00 AM - 5:00 PM PST via https://helpdesk.hmc.edu'

In [15]:
answer_query_with_context("How do I use iClickers?", df, document_embeddings)

'I could not find an answer to your question, please reach out to Helpdesk.\n\nPlease check out the relevant HMC service catalogue for more details: https://www.hmc.edu/cis/services/iclickers/\n\nIf not satisfied with the answer, please email helpdesk@hmc.edu, call 909.607.7777 or visit the Helpdesk located on the Sprague first floor. Helpdesk representatives are also available for a remote chat session during normal hours on Monday - Friday, 8:00 AM - 5:00 PM PST via https://helpdesk.hmc.edu'

In [17]:
pip install gradio

Collecting gradio
  Downloading gradio-5.6.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.3 (from gradio)
  Downloading gradio_client-1.4.3-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [18]:
# Hosting chatbot on Gradio

import gradio as gr

def chatbot(input):
    if input:
        reply = answer_query_with_context(input, df, document_embeddings)
        return reply

inputs = gr.Textbox(lines=7, label="Chat with AI")
outputs = gr.Textbox(label="Reply")

header_message = "Ask the chatbot about the following services provided by CIS at HMC."
gr.Interface(fn=chatbot, inputs=inputs, outputs=outputs, title="AI Chatbot",
             description=header_message,
             theme="compact").launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://239d5d8bc024b8dc55.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


