In [1]:
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Load course data from JSON file (make sure courses.json exists)
with open("courses.json", "r", encoding="utf-8") as f:
    courses = json.load(f)

# Initialize the SentenceTransformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Combine fields to form a single text input for each course
texts = [
    f"{course['Course Name']} - {course['Course Description']} - {', '.join(course['Course Curriculum'])}"
    for course in courses
]

# Generate embeddings for all courses
embeddings = model.encode(texts, convert_to_numpy=True)

# Create a FAISS index using L2 (Euclidean) distance
d = embeddings.shape[1]  # Dimension of embeddings
index = faiss.IndexFlatL2(d)
index.add(embeddings)

# Save the FAISS index
faiss.write_index(index, "courses_faiss.index")

# Save metadata for later retrieval (mapping each index to its course details)
metadata = {i: courses[i] for i in range(len(courses))}
with open("courses_metadata.json", "w", encoding="utf-8") as f:
    json.dump(metadata, f, indent=4)

print("Embeddings generated, FAISS index and metadata saved!")


  from .autonotebook import tqdm as notebook_tqdm


Embeddings generated, FAISS index and metadata saved!
