In [None]:
import pandas as pd
from dotenv import load_dotenv
import os 
import weaviate
from weaviate.classes.init import Auth
from sentence_transformers import SentenceTransformer
import weaviate.classes as wvc
from weaviate.classes.config import Property, DataType, Configure, VectorDistances

In [5]:
courses_data = pd.read_csv('Assignment_02_data.csv')

In [11]:
# Load the .env file
load_dotenv()

True

In [15]:
# Best practice: store your weaviate credentials in environment variables
weaviate_url = os.getenv("WEAVIATE_URL")
weaviate_api_key = os.getenv("WEAVIATE_API_KEY")

In [22]:
client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=Auth.api_key(weaviate_api_key),
)

print(client.is_ready())  # Should print: `True`

# client.close()

True


In [27]:
courses_data.head()

Unnamed: 0,course_id,title,description
0,C001,Foundations of Machine Learning,Understand foundational machine learning algor...
1,C002,Deep Learning with TensorFlow and Keras,Explore neural network architectures using Ten...
2,C003,Natural Language Processing Fundamentals,Dive into NLP techniques for processing and un...
3,C004,Computer Vision and Image Processing,Learn the principles of computer vision and im...
4,C005,Reinforcement Learning Basics,Get introduced to reinforcement learning parad...


In [30]:
client.collections.delete_all()


In [None]:

client.collections.create(
    name="Course",
    properties=[
        Property(name="course_id", data_type=DataType.TEXT),
        Property(name="title", data_type=DataType.TEXT),
        Property(name="description", data_type=DataType.TEXT),
    ],
    vectorizer_config=Configure.Vectorizer.none(),
    vector_index_config=Configure.VectorIndex.hnsw(
        distance_metric=VectorDistances.COSINE  
    ),
)

collection = client.collections.get("Course")

# Embed and upload
model = SentenceTransformer("all-MiniLM-L6-v2")

for _, row in courses_data.iterrows():
    text_input = f"{row['title']}. {row['description']}"
    vector = model.encode(text_input).tolist()
    
    collection.data.insert(
        properties={
            "course_id": str(row["course_id"]),
            "title": row["title"],
            "description": row["description"]
        },
        vector=vector
    )


In [None]:
from weaviate.collections.classes.grpc import MetadataQuery
from sentence_transformers import SentenceTransformer

# Load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Get collection
collection = client.collections.get("Course")

def course_recommender():
    """
    Recommend courses based on user profile.
    
    Args:
        user_profile (str): User's learning interests and background.
        
    Returns:
        List of recommended courses.
    """

    user_profile = input("Enter your learning interests and background: ")
    # Ensure the user profile is not empty
    if not user_profile.strip():
        print("User profile cannot be empty.")
        return []
    
    print(f"\nUser Profile: {user_profile}\n")

    # Encode user profile
    query_vector = model.encode(user_profile).tolist()
    
    # Query Weaviate for similar courses
    results = collection.query.near_vector(
        near_vector=query_vector,
        limit=5,
        return_metadata=MetadataQuery(distance=True)
    )

    print("\nTop 5 Matching Courses:\n")
    for obj in results.objects:
        print(f"Title: {obj.properties['title']}")
        print(f"Description: {obj.properties['description']}")
        print(f"Course ID: {obj.properties['course_id']}")
        print(f"Cosine Distance: {obj.metadata.distance:.4f}")
        print("-" * 50)
    
    return results.objects


In [None]:
course_recommender()          


User Profile: I already know AWS basics and want to dive into cloud security and architecture.


Top 5 Matching Courses:

Title: Data Engineering on AWS
Description: Build scalable data pipelines using AWS services. This course covers S3 data lakes, AWS Glue ETL jobs, AWS Lambda for serverless transformations, Amazon Redshift for warehousing, and AWS Kinesis for streaming ingestion. You’ll design end-to-end pipelines, automate workflows with AWS Step Functions, and monitor performance using CloudWatch, enabling robust, cost-effective data engineering solutions on the AWS cloud.
Course ID: C006
Cosine Distance: 0.4411
--------------------------------------------------
Title: APIs and Microservices Architecture
Description: Design and implement RESTful and GraphQL APIs using Node.js, Express, or Python FastAPI. Learn microservices patterns: service discovery, circuit breakers, and API gateways. Topics include containerized deployment, versioning strategies, and security best practices (

[GenerativeObject(uuid=_WeaviateUUIDInt('726af503-c430-4c92-8da2-ee714ef4a217'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.4410693049430847, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'title': 'Data Engineering on AWS', 'description': 'Build scalable data pipelines using AWS services. This course covers S3 data lakes, AWS Glue ETL jobs, AWS Lambda for serverless transformations, Amazon Redshift for warehousing, and AWS Kinesis for streaming ingestion. You’ll design end-to-end pipelines, automate workflows with AWS Step Functions, and monitor performance using CloudWatch, enabling robust, cost-effective data engineering solutions on the AWS cloud.', 'course_id': 'C006'}, references=None, vector={}, collection='Course'),
 GenerativeObject(uuid=_WeaviateUUIDInt('83e5d808-ddcd-43a2-bb6a-7e19e19db3d4'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.5391038656234741, c

In [44]:
course_recommender()


User Profile: I'm curious about image recognition and deep learning models that work with visual data.


Top 5 Matching Courses:

Title: Computer Vision and Image Processing
Description: Learn the principles of computer vision and image processing. Topics include filtering, edge detection, feature extraction, image segmentation, object detection, and image classification using CNNs. Hands-on labs in Python leverage OpenCV, scikit-image, and TensorFlow. By project’s end, you will build a pipeline to analyze and classify images, detect objects, and perform real-time video processing.
Course ID: C004
Cosine Distance: 0.4464
--------------------------------------------------
Title: Deep Learning with TensorFlow and Keras
Description: Explore neural network architectures using TensorFlow and Keras frameworks. This course covers feedforward networks, convolutional neural networks, recurrent neural networks, and transfer learning. Learn to build, train, evaluate, and optimize deep learning m

[GenerativeObject(uuid=_WeaviateUUIDInt('4ed7dd79-e702-4d79-968c-7f55dcf110b3'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.4464082717895508, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'title': 'Computer Vision and Image Processing', 'description': 'Learn the principles of computer vision and image processing. Topics include filtering, edge detection, feature extraction, image segmentation, object detection, and image classification using CNNs. Hands-on labs in Python leverage OpenCV, scikit-image, and TensorFlow. By project’s end, you will build a pipeline to analyze and classify images, detect objects, and perform real-time video processing.', 'course_id': 'C004'}, references=None, vector={}, collection='Course'),
 GenerativeObject(uuid=_WeaviateUUIDInt('d7c27128-6210-47ae-8900-bcb0869a196c'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.4782135486602783, certa

In [45]:
course_recommender()


User Profile: I want to explore natural language understanding and text analytics.


Top 5 Matching Courses:

Title: Natural Language Processing Fundamentals
Description: Dive into NLP techniques for processing and understanding human language. You will learn tokenization, stemming, lemmatization, part-of-speech tagging, named entity recognition, and sentiment analysis. The course includes transformer architectures, attention mechanisms, and fine-tuning pre-trained language models. Hands-on Python labs use Hugging Face and spaCy for end-to-end natural language pipelines and projects.
Course ID: C003
Cosine Distance: 0.4399
--------------------------------------------------
Title: R Programming and Statistical Analysis
Description: Get introduced to R for statistical computing and graphics. Topics include data structures, control flow, and functional programming. Use tidyverse libraries—dplyr, ggplot2, tidyr—for data manipulation and visualization. Explore hypothesis testing, regressio

[GenerativeObject(uuid=_WeaviateUUIDInt('9432cf87-d3d9-4424-894a-dcf023a4b111'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.4398918151855469, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'title': 'Natural Language Processing Fundamentals', 'description': 'Dive into NLP techniques for processing and understanding human language. You will learn tokenization, stemming, lemmatization, part-of-speech tagging, named entity recognition, and sentiment analysis. The course includes transformer architectures, attention mechanisms, and fine-tuning pre-trained language models. Hands-on Python labs use Hugging Face and spaCy for end-to-end natural language pipelines and projects.', 'course_id': 'C003'}, references=None, vector={}, collection='Course'),
 GenerativeObject(uuid=_WeaviateUUIDInt('3c34a548-f976-49a3-b5bc-6038c394526e'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.6

In [46]:
course_recommender()


User Profile: I'm new to AI and just want a simple introduction to machine learning.


Top 5 Matching Courses:

Title: Reinforcement Learning Basics
Description: Get introduced to reinforcement learning paradigms, including Markov decision processes, Q-learning, policy gradients, and actor-critic methods. Learn to formulate environments, design reward functions, and implement agents using OpenAI Gym and TensorFlow. Through guided labs you’ll train agents for classic control tasks and grid-world scenarios, exploring exploration-exploitation trade-offs and model-free learning techniques.
Course ID: C005
Cosine Distance: 0.4542
--------------------------------------------------
Title: Deep Learning with TensorFlow and Keras
Description: Explore neural network architectures using TensorFlow and Keras frameworks. This course covers feedforward networks, convolutional neural networks, recurrent neural networks, and transfer learning. Learn to build, train, evaluate, and optimize deep learni

[GenerativeObject(uuid=_WeaviateUUIDInt('bb12767c-224f-4f76-82d7-34f771210ff3'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.4541811943054199, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'description': 'Get introduced to reinforcement learning paradigms, including Markov decision processes, Q-learning, policy gradients, and actor-critic methods. Learn to formulate environments, design reward functions, and implement agents using OpenAI Gym and TensorFlow. Through guided labs you’ll train agents for classic control tasks and grid-world scenarios, exploring exploration-exploitation trade-offs and model-free learning techniques.', 'title': 'Reinforcement Learning Basics', 'course_id': 'C005'}, references=None, vector={}, collection='Course'),
 GenerativeObject(uuid=_WeaviateUUIDInt('d7c27128-6210-47ae-8900-bcb0869a196c'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.5

In [47]:
course_recommender()


User Profile: I’m interested in how agents learn from actions in dynamic environments.


Top 5 Matching Courses:

Title: Reinforcement Learning Basics
Description: Get introduced to reinforcement learning paradigms, including Markov decision processes, Q-learning, policy gradients, and actor-critic methods. Learn to formulate environments, design reward functions, and implement agents using OpenAI Gym and TensorFlow. Through guided labs you’ll train agents for classic control tasks and grid-world scenarios, exploring exploration-exploitation trade-offs and model-free learning techniques.
Course ID: C005
Cosine Distance: 0.3669
--------------------------------------------------
Title: Deep Learning with TensorFlow and Keras
Description: Explore neural network architectures using TensorFlow and Keras frameworks. This course covers feedforward networks, convolutional neural networks, recurrent neural networks, and transfer learning. Learn to build, train, evaluate, and optimize deep lear

[GenerativeObject(uuid=_WeaviateUUIDInt('bb12767c-224f-4f76-82d7-34f771210ff3'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.366865873336792, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'title': 'Reinforcement Learning Basics', 'description': 'Get introduced to reinforcement learning paradigms, including Markov decision processes, Q-learning, policy gradients, and actor-critic methods. Learn to formulate environments, design reward functions, and implement agents using OpenAI Gym and TensorFlow. Through guided labs you’ll train agents for classic control tasks and grid-world scenarios, exploring exploration-exploitation trade-offs and model-free learning techniques.', 'course_id': 'C005'}, references=None, vector={}, collection='Course'),
 GenerativeObject(uuid=_WeaviateUUIDInt('d7c27128-6210-47ae-8900-bcb0869a196c'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=0.72