In [130]:
import os
from dotenv import load_dotenv
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec
from langchain_openai import ChatOpenAI
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings
load_dotenv()

False

In [123]:
pc = Pinecone(os.getenv("PINECONE_API_KEY"))
# pc.create_index(
#     name = "rmp-rag-chatbot",
#     dimension = 1536,
#     metric = "cosine",
#     spec = ServerlessSpec(
#         cloud = "aws",
#         region = "us-east-1"
#     )
# )

In [124]:
import json
f = open("data.json")
data = json.load(f)



In [132]:
processed_data = []
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=review["review"],
        model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append(
        {
            "values": embedding,
            "id": review["professorName"],
            "metadata": {
                "professorName": review["professorName"],
                "course": review["course"],
                "department": review["department"],
                "rating": review["rating"],
                "difficulty": review["difficulty"],
                "wouldTakeAgain": review["wouldTakeAgain"],
                "textbookUse": review["textbookUse"],
                "attendance": review["attendance"],
                "grade": review["grade"],
                "review": review["review"],
                "date": review["date"]
            }

        }
    )

In [126]:
index = pc.Index("rmp-rag-chatbot")
index.upsert(
    vectors = processed_data,
    namespace="ns1"
)

{'upserted_count': 30}

In [127]:
index.describe_index_stats()


{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 30}},
 'total_vector_count': 30}

In [141]:
query = index.query(
    vector=embedding, 
    namespace="ns1", 
    top_k=30,
    include_values=True,
    include_metadata=True
)
query

{'matches': [{'id': 'Professor Joseph Murphy',
              'metadata': {'attendance': 'Mandatory',
                           'course': 'CHEM401',
                           'date': '2024-07-09',
                           'department': 'Chemistry',
                           'difficulty': 5.0,
                           'grade': 'A',
                           'professorName': 'Professor Joseph Murphy',
                           'rating': 4.8,
                           'review': "Professor Curie's course on nuclear "
                                     'chemistry is enlightening. Labs are '
                                     'engaging and informative. Her passion '
                                     'for the subject is contagious.',
                           'textbookUse': 'Required',
                           'wouldTakeAgain': 'no'},
              'score': 0.99923569,
              'values': [-0.0153068462,
                         -0.00427237945,
                         

In [143]:
context = ""
for match in query['matches']:
    context += f"Professor: {match['metadata']['professorName']}\n"
    context += f"Department: {match['metadata']['department']}\n"
    context += f"Course: {match['metadata']['course']}\n"
    context += f"Rating: {match['metadata']['rating']}\n"
    context += f"Difficulty: {match['metadata']['difficulty']}\n"
    context += f"Would Take Again: {match['metadata']['wouldTakeAgain']}\n"
    context += f"Textbook Use: {match['metadata']['textbookUse']}\n"
    context += f"Attendance: {match['metadata']['attendance']}\n"
    context += f"Grade: {match['metadata']['grade']}\n"
    context += f"Review: {match['metadata']['review']}\n"
    context += f"Date: {match['metadata']['date']}\n\n"


# Prepare the prompt with the retrieved context
prompt = f"""Based on the following information about professors:

{context}

Which professor is best for computer science based of rating?"""

# Set up the chat completion
primer = """You are a helpful assistant for rate my professor websites. Use the provided information to answer questions about professors and courses. If the information is not sufficient to determine a best professor, explain why and what factors might be important to consider."""

res = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": primer},
        {"role": "user", "content": prompt}
    ]
)

openai_answer = res.choices[0].message.content
print("OpenAI Answer:")
print(openai_answer)

OpenAI Answer:
Based on the ratings provided, Professor Ada Lovelace is the best professor for computer science, with a rating of 4.7 for her course CS210. It's also worth noting that she has a difficulty rating of 4.0 and students indicated they would take her course again. 

In comparison, other professors in the Computer Science department include:

- Professor Alan Turing: Rating 4.7 (CS450)
- Professor Ferran Foster: Rating 4.3 (CS101)
- Professor Michael Chen: Rating 3.8 (CS202)
- Professor Kevin Zhang: Rating 4.1 (CS350)

While both Professor Ada Lovelace and Professor Alan Turing have the same rating of 4.7, Lovelace's course has a lower difficulty rating of 4.0 compared to Turing's which is 5.0, making her potentially a better choice for students seeking a challenging yet rewarding Computer Science experience.
