## Do all necessary imports and client setup 
###### -- use it to setup the python environment and install all packages using requirement.txt

In [1]:
import os
from typing import List, Tuple
from langchain_openai import AzureOpenAIEmbeddings
from langchain_openai import AzureChatOpenAI


llm_call = AzureChatOpenAI(api_key= os.getenv("AZURE_OPENAI_API_KEY"),
        azure_endpoint= os.getenv("AZURE_OPENAI_ENDPOINT"),
        api_version=os.getenv("OPENAI_API_VERSION"),
        model=os.getenv("MODEL"),
        )

emd_call = AzureOpenAIEmbeddings(api_key= os.getenv("AZURE_OPENAI_API_KEY"),
                                 azure_endpoint= os.getenv("AZURE_OPENAI_ENDPOINT"),
                                 api_version=os.getenv("OPENAI_API_VERSION"),
                                 model=os.getenv("EMB_MODEL"),)

## load the dataset containing the course information

In [2]:
from langchain_community.document_loaders  import DataFrameLoader
import pandas as pd

doc_path = "assignment2dataset.csv"

# Step 1: Load Excel using pandas
df = pd.read_csv(doc_path)  # Replace with your file path
df["text"] = df["title"] + ": " + df["description"]
df.drop(columns=["description"],axis=1,inplace=True) # Create a new column 'text' from 'description'
# df.rename(columns={"description":"text"}, inplace=True)  # Rename column to 'text'
# Step 2: Use PandasLoader to convert it into LangChain documents
loader = DataFrameLoader(df)
documents = loader.load()

# Step 3: Print the documents
docs = []
for doc in documents:
    docs.extend(doc)


In [3]:
# Extract all page_content values
page_contents = [value for key, value in docs if key == 'page_content']

# Extract all page_content values
metadata = [value for key, value in docs if key == 'metadata']
# Print the results
# for i, content in enumerate(page_contents, 1):
#     print(f"Page Content {i}:\n{content}\n")

In [5]:
# FOr all document, checking the average characters count
sum(len(pg) for pg in page_contents)/len(page_contents)

443.64

## Create vector store

In [4]:
len(documents)

25

In [7]:
from langchain_chroma import Chroma
vectorstore = Chroma.from_documents(collection_name="course_repo",
                                    documents=documents, embedding=emd_call)

python-dotenv could not parse statement starting at line 1
python-dotenv could not parse statement starting at line 2
python-dotenv could not parse statement starting at line 3
python-dotenv could not parse statement starting at line 5
python-dotenv could not parse statement starting at line 6
python-dotenv could not parse statement starting at line 8
python-dotenv could not parse statement starting at line 9
python-dotenv could not parse statement starting at line 1
python-dotenv could not parse statement starting at line 2
python-dotenv could not parse statement starting at line 3
python-dotenv could not parse statement starting at line 5
python-dotenv could not parse statement starting at line 6
python-dotenv could not parse statement starting at line 8
python-dotenv could not parse statement starting at line 9
python-dotenv could not parse statement starting at line 1
python-dotenv could not parse statement starting at line 2
python-dotenv could not parse statement starting at line

# create course recommendation

In [11]:
def recommend_courses(profile: str, 
                    #   completed_ids: List[str],
                      top_k:str = 5) -> List[Tuple[str, float]]:
    """
    Returns a list of (course_id, similarity_score) for the top-5 
    recommendations.
    """

    docs, score = zip(*vectorstore.similarity_search_with_score(query = profile,
                                                         k=top_k))
    top_recommendations = [(doc.metadata["course_id"],doc.metadata["title"], score) for doc, score in zip(docs, score)]
    # for doc, score in zip(docs, score):
    #     doc.metadata["score"] = score

    return top_recommendations



## Test Evaluation report
###### for all 5 question it does recommend the similar courses as per the need of the user

In [16]:
Q1 = "I’ve completed the ‘Python Programming for Data Science’ course and enjoy data visualization. What should I take next?"

In [17]:
recommend_courses(Q1)

[('C016', 'Python Programming for Data Science', 0.3308434784412384),
 ('C011', 'Big Data Analytics with Spark', 0.3907507658004761),
 ('C014', 'Data Visualization with Tableau', 0.39794573187828064),
 ('C004', 'Computer Vision and Image Processing', 0.41815078258514404),
 ('C017', 'R Programming and Statistical Analysis', 0.42748209834098816)]

In [14]:
Q2 = "I know Azure basics and want to manage containers and build CI/CD pipelines. Recommend courses"

In [18]:
recommend_courses(Q2)

[('C007', 'Cloud Computing with Azure', 0.31319066882133484),
 ('C009', 'Containerization with Docker and Kubernetes', 0.32507163286209106),
 ('C008', 'DevOps Practices and CI/CD', 0.3403666019439697),
 ('C010', 'APIs and Microservices Architecture', 0.39743226766586304),
 ('C025', 'MLOps: Productionizing Machine Learning', 0.40100717544555664)]

In [19]:
Q3 = "My background is in ML fundamentals; I’d like to specialize in neural networks and production workflows."

In [20]:
recommend_courses(Q3)

[('C025', 'MLOps: Productionizing Machine Learning', 0.3238612115383148),
 ('C002', 'Deep Learning with TensorFlow and Keras', 0.3684665858745575),
 ('C004', 'Computer Vision and Image Processing', 0.378982812166214),
 ('C003', 'Natural Language Processing Fundamentals', 0.3809294104576111),
 ('C001', 'Foundations of Machine Learning', 0.38297075033187866)]

In [21]:
Q4 = "I want to learn to build and deploy microservices with Kubernetes—what courses fit best?"

In [22]:
recommend_courses(Q4)

[('C009', 'Containerization with Docker and Kubernetes', 0.2381223887205124),
 ('C010', 'APIs and Microservices Architecture', 0.32079559564590454),
 ('C008', 'DevOps Practices and CI/CD', 0.3699899911880493),
 ('C007', 'Cloud Computing with Azure', 0.3801954388618469),
 ('C025', 'MLOps: Productionizing Machine Learning', 0.38485318422317505)]

In [23]:
Q5 = "I’m interested in blockchain and smart contracts but have no prior experience. Which courses do you suggest?"

In [24]:
recommend_courses(Q5)

[('C023', 'Blockchain Technology and Smart Contracts', 0.28837913274765015),
 ('C010', 'APIs and Microservices Architecture', 0.48249563574790955),
 ('C022', 'Internet of Things (IoT) Development', 0.48679864406585693),
 ('C013', 'NoSQL Databases and MongoDB', 0.4970172643661499),
 ('C009', 'Containerization with Docker and Kubernetes', 0.5001373887062073)]