In [1]:
from typing import Dict

from keras.src.losses import cosine_similarity

In [8]:
# Step 1: Define a dictionary of resources
RESOURCE_DB = {
    "MAT201": {
        "title": "Integral Calculus for Engineers",
        "tags": ["integration", "calculus", "engineering math"],
        "links": ["https://youtu.be/calculus1", "https://drive.link/integration-pdf"]
    },
    "PHY101": {
        "title": "Mechanics Basics",
        "tags": ["force", "motion", "kinematics"],
        "links": ["https://youtu.be/mechanics", "https://pdf.link/mechanics"]
    },
    "CSC110": {
        "title": "Intro to Programming",
        "tags": ["python", "loops", "variables"],
        "links": ["https://youtu.be/python", "https://docs.link/programming"]
    },
}


In [9]:
# Step 2: Create the function
def get_resources(course_code: str) -> Dict[str, list]:
    course_code = course_code.upper().strip()
    return RESOURCE_DB.get(course_code, {
        "YouTube": [],
        "PDFs": [],
        "message": f"No resources found for {course_code}. Try another course."
    })

In [11]:
# Step 3: Demo/test run
if __name__ == "__main__":
    user_input = input("Enter course code (e.g., MAT201): ")
    result = get_resources(user_input)
    print("\nResources Found:\n")
    for key, links in result.items():
        if isinstance(links, list):
            print(f"{key} Links:")
            for link in links:
                print(f" - {link}")
        else:
            print(links)


Resources Found:

Integral Calculus for Engineers
tags Links:
 - integration
 - calculus
 - engineering math
links Links:
 - https://youtu.be/calculus1
 - https://drive.link/integration-pdf


In [12]:
from sentence_transformers import SentenceTransformer

In [13]:
model = SentenceTransformer('all-MiniLM-L6-v2')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [14]:
resource_texts = [
    # 1. From a YouTube Video Title + Description (e.g., "Intro to Programming with Python")
    "Video: Learn Python from scratch! This tutorial covers variables, data types, control flow (loops, conditionals), and basic functions. Ideal for CSC101 and beginners.",

    # 2. From a PDF Lecture Note (e.g., "Linear Algebra for Engineers")
    "Lecture Notes: MAT201 - Introduction to Linear Algebra. Topics include vectors, matrices, determinants, eigenvalues, and eigenvectors. Essential for engineering and computer science students.",

    # 3. From a Past Question Document (e.g., "CSC 411 - Operating Systems Exam Questions")
    "Past Questions: CSC411 - Operating Systems. Previous exam questions on process management, memory allocation, deadlocks, and file systems. University of Ibadan, 2023.",

    # 4. From an Article/Blog Post (e.g., "Career Paths in Data Science in Nigeria")
    "Article: Exploring Data Science careers in Nigeria. Discusses skills needed: Python, R, machine learning, statistics. Roles like Data Analyst, ML Engineer, AI Specialist. Useful for students planning their future.",

    # 5. From a Course Outline (e.g., "ECO305 - Principles of Econometrics")
    "Course Outline: ECO305 - Principles of Econometrics. Covers regression analysis, hypothesis testing, time series analysis, and panel data. Prerequisite: Statistics.",

    # 6. From a Scholarship Opportunity Description
    "Scholarship Alert: Shell Undergraduate Scholarship for Nigerian Students. Eligibility: 200L, good academic standing, studying Engineering, Sciences, or Social Sciences. Apply online.",

    # 7. From a SIWES/Internship Guideline Document
    "SIWES Guide: Industrial Training (IT) guidelines for students. How to find placements, logbook requirements, report writing, and supervisor responsibilities. For all engineering and science departments.",

    # 8. From a Specific Book Chapter Summary (e.g., "Calculus by James Stewart, Chapter 7")
    "Textbook Chapter: Calculus I - Techniques of Integration. Methods: substitution, integration by parts, trigonometric integrals, partial fractions. For MAT102/103 students.",

    # 9. From a Departmental Handout on Research Project
    "Research Project Guide: Guidelines for final year projects. Topic selection, literature review, methodology, data analysis, ethical considerations. For all 400L students.",

    # 10. From a YouTube Video Transcript Snippet (e.g., a specific section on "loops")
    "Video Snippet: This part of the Python tutorial explains 'for' loops and 'while' loops with practical examples. Understanding iterative statements is key for programming logic."
]

In [15]:
resource_embeddings = model.encode(resource_texts, convert_to_tensor=True)

In [18]:
resource_embeddings.shape

torch.Size([10, 384])

In [20]:
resource_embeddings[0][:5]

tensor([-0.0816, -0.0238, -0.0686,  0.0283, -0.0629])

In [30]:
from torch.nn.functional import cosine_similarity
from scipy.spatial.distance import cosine

In [33]:
while True:
    user_input = input("Enter your query")
    
    if user_input == "exit":
        print("Exiting...")
        break
    
    query_embedding = model.encode(user_input, convert_to_tensor=True)
    
    similarities = cosine_similarity(query_embedding, resource_embeddings)
    
    top_n = 3
    top_indices = similarities.argsort(descending=True)[:top_n].cpu().numpy()

    print(f"\nTop {top_n} resources for your query: '{user_input}'")
    print("--------------------------------------------------")
    
    
    for i, idx in enumerate(top_indices):
        score = similarities[idx].item() # .item() gets the scalar value from the tensor
        print(f"Rank {i+1} (Similarity: {score:.4f}):")
        print(f"  Resource Text: {resource_texts[idx]}")
        # In a real app, you'd show a link/title instead of the full text
        print("-" * 40)
    print("\n")


Top 3 resources for your query: 'data science'
--------------------------------------------------
Rank 1 (Similarity: 0.5425):
  Resource Text: Article: Exploring Data Science careers in Nigeria. Discusses skills needed: Python, R, machine learning, statistics. Roles like Data Analyst, ML Engineer, AI Specialist. Useful for students planning their future.
----------------------------------------
Rank 2 (Similarity: 0.3661):
  Resource Text: Video: Learn Python from scratch! This tutorial covers variables, data types, control flow (loops, conditionals), and basic functions. Ideal for CSC101 and beginners.
----------------------------------------
Rank 3 (Similarity: 0.2973):
  Resource Text: Past Questions: CSC411 - Operating Systems. Previous exam questions on process management, memory allocation, deadlocks, and file systems. University of Ibadan, 2023.
----------------------------------------


Exiting...
