## Imports

In [1]:
import openai
from openai import OpenAI
from config import OPENAI_API_KEY
from heapq import nlargest
import numpy as np

client = OpenAI(
  api_key= OPENAI_API_KEY
)
import rag_utils
import pandas as pd
#import requests # only for ipynb
import json

embedding_model_name = "text-embedding-3-small"
file_name = 'all_courses_with_embeddings.csv'

## Reading in the Data

### (And potentially doing the embedding)

In [2]:
df = pd.read_csv(file_name)

if ('title_and_desc' not in df.columns) and ('embedding' not in df.columns):
    choice = input("Do you want to run the script to generate embeddings? Caution: this will take a while and burn your OpenAI credits. (yes/no)")
    if choice == 'yes':
        df = rag_utils.embed_all_courses(df)
else:
    print("Embeddings already exist in the dataframe")
    print("But we need to convert them into NP arrays")
    df['embedding'] = df['embedding'].apply(lambda x: np.array(json.loads(x)))
    print("Done")

Embeddings already exist in the dataframe
But we need to convert them into NP arrays
Done


## User Prompt and Input

In [3]:
user_prompt = "What are your learning goals for an online Computer Science course? (For example: I want to learn the basics of programming and how to code. I have an undegraduate degree in Psychology)"
user_input = "I want to learn the basics of natural language processing, what course should I take? I have an undegraduate degree in linguistics"

In [4]:
## Step 2: Write a similarity function that computes the similarity between query (user input) and document (course description)
def embedding_cosine_similarity(query, document, query_embedding=None):
    # First, embed the query
    if query_embedding is None:
        query_embedding = rag_utils.get_embedding(query)
    # Next, get the document embedding from the dataset
    document_embedding = document['embedding']
    # Next, compute the cosine similarity between the query and the document
    return np.dot(query_embedding, document_embedding) / (np.linalg.norm(query_embedding) * np.linalg.norm(document_embedding))

In [5]:
def jaccard_similarity(query, document, query_embedding=None):
    query = query.lower().split(" ")
    document = document['title_and_desc'].lower().split(" ")
    intersection = set(query).intersection(set(document))
    union = set(query).union(set(document))
    return len(intersection)/len(union)

def cosine_similarity(query, document, query_embedding=None):
    query = query.lower().split(" ")
    document = document['title_and_desc'].lower().split(" ")
    intersection = set(query).intersection(set(document))
    return len(intersection)/(len(query)*len(document))

def return_options(query, corpus, similarity_function=cosine_similarity):
    similarities = []
    options_returned = []
    relevant_doc_links = [] 
    query_embedding = rag_utils.get_embedding(query)
    for idx, doc in corpus.iterrows():
        similarity = similarity_function(user_input, doc, query_embedding)
        similarities.append(similarity)
    indices = list(range(len(similarities)))
    for i in nlargest(5, indices, key = lambda x : similarities[x]): # get top 5 similarities
        options_returned.append(corpus['title_and_desc'][i]) # return the document at the index of similarity i
        relevant_doc_links.append(corpus['Link'][i]) # CHANGED HERE: return the link of the document at the index of similarity i
    return options_returned, relevant_doc_links

## One-Shot Recommendation

In [7]:
# Create the prompt
relevant_documents, relevant_doc_links = return_options(user_input, df, similarity_function=embedding_cosine_similarity)
list_of_relevant_documents = [f"{i+1}. {doc}" for i, doc in enumerate(relevant_documents)]
list_of_relevant_documents_links = [f"{i+1}. {link}" for i, link in enumerate(relevant_doc_links)]
relevant_documents_text = "\n".join(list_of_relevant_documents)
relevant_documents_links_text = "\n".join(list_of_relevant_documents_links)
prompt = f"""
You are trying to help this user find an online Computer Science course
From my database of CS courses, here were some recommendations based on the user input: {relevant_documents_text}
The user input is: '{user_input}'
Compile a recommendation to the user based on the recommended Computer Science courses and the user input, 
returning the top 3 courses with their links embedded in the title: {relevant_documents_links_text} from the database, ranked in order of best fit for the user, and providing a brief explanation for why each course is a fit.
Additionally, ask the user a relevant question to gather more infomation about whether they possess the prerequisite knowledge to take the courses.
"""
#########

try:
    # Make the request to the OpenAI API
    response = client.chat.completions.create(
        model="gpt-4o-mini",  
        messages=[
            {"role": "system", "content": "You are a bot that makes recommendations for Computer Science courses."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=1000,
    )
    
    # Process the response
    chatgpt_response = response.choices[0].message.content
    print(chatgpt_response)

except openai.OpenAIError as e:
    print(f"OpenAI API error occurred: {e}")
except Exception as e:
    print(f"An error occurred: {e}")
#########


OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

## Iterative Prompting based on user response

In [7]:
user_response = "I took an intro programming class taught in Python in college, but do not have much experience with machine learning."

user_response_prompt = f"""
The user responded: '{user_response}'
Based on the user response, could you come up with a short (under 15 words) plain text query for me to run in my vector database so that we can recommend courses to the user to satisfy any missing prerequisites?
Return just the query.
"""

chat_so_far = [
            {"role": "system", "content": "You are a bot that makes recommendations for Computer Science courses."},
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": chatgpt_response},
        ]

chat_for_query = chat_so_far + [{"role": "user", "content": user_response_prompt}]

# Make the request to the OpenAI API
response = client.chat.completions.create(
    model="gpt-4o-mini",  
    messages=chat_for_query,
    max_tokens=1000,
)

# Process the response
chatgpt_query = response.choices[0].message.content
print(chatgpt_query)


"Python programming courses for beginners with no machine learning experience."


In [8]:
relevant_documents, relevant_doc_links = return_options(chatgpt_query, df, similarity_function=embedding_cosine_similarity)
list_of_relevant_documents = [f"{i+1}. {doc}" for i, doc in enumerate(relevant_documents)]
list_of_relevant_documents_links = [f"{i+1}. {link}" for i, link in enumerate(relevant_doc_links)]
relevant_documents_text = "\n".join(list_of_relevant_documents)
relevant_documents_links_text = "\n".join(list_of_relevant_documents_links)
prompt = f"""
You are trying to help this user find online Computer Science courses to satisfy any missing prerequisites
From my database of CS courses, here were some recommendations based on the user input: {relevant_documents_text}
The user input is: '{user_response}'
Compile a recommendation to the user based on the recommended Computer Science courses and the user input,
returning the top 2 courses with thier links embedded in the title: {relevant_documents_links_text} from the database, ranked in order of best fit for the user, and providing a brief explanation for why each course is a fit.
"""
chat_so_far.append({"role": "user", "content": prompt})

try:
    # Make the request to the OpenAI API
    response = client.chat.completions.create(
        model="gpt-4o-mini",  
        messages=chat_so_far,
        max_tokens=1000,
    )
    
    # Process the response
    chatgpt_response = response.choices[0].message.content
    print(chatgpt_response)

except openai.OpenAIError as e:
    print(f"OpenAI API error occurred: {e}")
except Exception as e:
    print(f"An error occurred: {e}")




Based on your previous experience with an introductory programming class in Python and your expressed interest in machine learning, here are the top two course recommendations that would help you build a solid foundation in the required prerequisites for exploring NLP and machine learning further:

1. [Introduction to Machine Learning with Python](https://www.coursera.org/learn/introduction-to-machine-learning-with-python)  
   This course is an excellent fit for you as it provides a comprehensive introduction to machine learning concepts while using Python. It covers various topics including supervised and unsupervised learning, deep learning, and model implementation using Python. Since you're looking to bridge the gap in your machine learning knowledge, this course will give you the foundational understanding needed to move forward in your studies.

2. [Practical Python for AI Coding 1: Introduction](https://www.coursera.org/learn/practical-python-for-ai-coding--preparation-for-codi

## EVALUATION comparing Gaita's first response to Gemini, Vanilla ChatGPT, and Claude?

In [12]:
# Add text to evaluate

user_input = "I want to learn how to build a RAG based chatbot"
gemini_answer = '''
    **Here are a few excellent courses to consider for learning how to build a RAG-based chatbot:**

    **Online Courses**

    - Coursera's "Natural Language Processing Specialization": This comprehensive specialization covers the fundamentals of NLP, including text processing, machine learning for NLP, and building NLP applications. While it doesn't specifically focus on RAG, it provides a strong foundation for understanding the underlying concepts.
    - Udacity's "AI Programming with Python": This course introduces you to AI programming using Python, covering topics like machine learning, deep learning, and natural language processing. Although not specifically about RAG, it's a great starting point for those new to AI and Python.

    **Specialized Courses**

    - Krisolis' "Build a Custom Chatbot in a Day with RAG": Similar to the DeepLearning.AI course, this one-day course focuses on building a RAG-based chatbot. It covers the theoretical concepts and practical implementation using Python.

    By combining these resources and tailoring your learning to your specific needs, you can effectively learn how to build a RAG-based chatbot.

    **Would you like me to provide more information about any of these courses or resources?**
    '''

gaita_answer = '''    
    **Here are some recommended courses for you, based on your interest in building a RAG-based chatbot:**
    
    1. [Intro to Mistral AI](https://www.coursera.org/learn/intro-to-mistral-ai): This course focuses on interacting with Mistral's AI models through JavaScript. It covers concepts that are applicable to various programming languages, making it beneficial for anyone looking to build intelligent applications, including chatbots that utilize retrieval-augmented generation (RAG) techniques.
    2. [Building AI Powered Chatbots Without Programming](https://www.coursera.org/learn/building-ai-powered-chatbots): This course teaches you how to create chatbots without needing to write code, using IBM Watson's Natural Language Processing capabilities. Given that RAG chatbots can be built with limited coding knowledge, this course serves as a great introduction to the concepts and planning necessary for chatbot development.
    3. [Learn to code with AI](https://www.coursera.org/learn/learn-to-code-with-ai): While this course is aimed at beginners, it empowers you to create functional projects with the help of AI tools like ChatGPT. Understanding how to work with AI will be beneficial as you develop your chatbot, especially if you plan to involve RAG methodologies in your design.
    
    **To better assist you, could you please share if you have any prior programming experience or familiarity with building chatbots? This will help me identify any gaps in your knowledge and suggest further courses to prepare you for these recommendations.**
'''

vanilla_gpt_answer = '''    
    To build a Retrieval-Augmented Generation (RAG) based chatbot, consider these online courses and resources:
    
    1. **Coursera: Natural Language Processing Specialization by Deeplearning.ai** - This series covers NLP fundamentals, which are essential for building chatbots.
    2. **edX: AI for Everyone by Andrew Ng** - A great introduction to AI concepts, including applications like chatbots.
    3. **Udemy: Building Chatbots with Python** - This course walks through creating chatbots and can provide foundational skills.
    
    **Choose one or a combination of these based on your current skill level and preferred learning style!**
'''



gen_answers = [gemini_answer, gaita_answer, vanilla_gpt_answer]

# Run evaluation

prompt=f"""
How relevant are each of the following recommendations for your query, on a scale of 1 to 10? \
Recommendation #1: {gen_answers[0]}
Recommendation #2: {gen_answers[1]}
Recommendation #3: {gen_answers[2]}

"""

new_chat = [
            {"role": "system", "content": f"You are a student who wants to learn Computer Science. You have a specific query that you've entered into three different course recommendation systems. Your query is '{user_input}'"},
            {"role": "user", "content": prompt}
        ]

try:
    # Make the request to the OpenAI API
    response = client.chat.completions.create(
        model="gpt-4o-mini",  
        messages=new_chat,
        max_tokens=1000,
    )
    
    # Process the response
    chatgpt_response = response.choices[0].message.content
    print(chatgpt_response)

except openai.OpenAIError as e:
    print(f"OpenAI API error occurred: {e}")
except Exception as e:
    print(f"An error occurred: {e}")

Here’s how I would rate the relevance of each recommendation for my query on building a RAG-based chatbot:

**Recommendation #1: 8/10**
- This recommendation includes some specific courses aimed at foundational skills in NLP and AI. The course "Build a Custom Chatbot in a Day with RAG" directly addresses my query about RAG-based chatbots. However, the other courses provided are not specifically about RAG, though they do offer essential knowledge that could be beneficial.

**Recommendation #2: 9/10**
- This recommendation offers a more tailored selection for my specific interest in RAG-based chatbots, particularly the "Intro to Mistral AI" course and the chatbot development course without programming. These courses are directly applicable to building RAG-based chatbots and seem very relevant to what I want to learn, providing a practical approach alongside foundational knowledge.

**Recommendation #3: 7/10**
- While this recommendation provides general courses that cover the fundamental