 # Import Libraries

In [1]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib


Collecting streamlit
  Downloading streamlit-1.48.1-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.48.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m67.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m90.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.48.1


In [2]:
# Mount Google Drive (if files saved there)
from google.colab import drive
drive.mount('/content/drive')




Mounted at /content/drive


In [3]:
# Load students & courses data
students = pd.read_csv("/content/drive/MyDrive/Masai/MASAI-PROJECTS/LRS/students.csv")
courses = pd.read_csv("/content/drive/MyDrive/Masai/MASAI-PROJECTS/LRS/courses.csv")

print("Students shape:", students.shape)
print("Courses shape:", courses.shape)

students.head(), courses.head()

Students shape: (1000, 5)
Courses shape: (20, 5)


(   student_id    name             background  \
 0           1  Quinn1                Physics   
 1           2    Eva2  Computer Applications   
 2           3  Frank3                     IT   
 3           4  Oscar4       Computer Science   
 4           5  Frank5  Computer Applications   
 
                                        skills                   interests  
 0                      Statistics, TensorFlow               Cybersecurity  
 1                              Cloud, R, Java                    Robotics  
 2     Java, Python, Cybersecurity, Statistics                   Analytics  
 3                   VHDL, Python, ML, R, HTML  VLSI, Software Development  
 4  C++, TensorFlow, Power BI, JavaScript, CSS         Robotics, Analytics  ,
    course_id                   course_name            category  \
 0        101  Introduction to Data Science        Data Science   
 1        102  Machine Learning with Python        Data Science   
 2        103  Deep Learning Specializat

# Preprocessing

In [7]:
# Combine course attributes into a single string for vectorization
courses['combined'] = (
    courses['category'] + " " +
    courses['difficulty'] + " " +
    courses['skills_required']
)

# Example student profile (combine interests & skills)
students['profile'] = (
    students['interests'] + " " +
    students['skills']
)


# Recommendation Model

In [26]:
import os

# Create models folder if it doesn't exist
os.makedirs("/content/drive/MyDrive/Masai/MASAI-PROJECTS/LRS/models", exist_ok=True)



In [10]:
# TF-IDF Vectorization on courses
vectorizer = TfidfVectorizer(stop_words='english')
course_matrix = vectorizer.fit_transform(courses['combined'])

# Save vectorizer & course matrix
joblib.dump(vectorizer, "models/vectorizer.pkl")
joblib.dump(course_matrix, "models/course_matrix.pkl")

print("Course matrix shape:", course_matrix.shape)


Course matrix shape: (20, 46)


# Recommendation Function

In [19]:
def recommend_courses(student_id, top_n=5):
    # Get the student row
    student = students[students['student_id'] == student_id]
    if student.empty:
        return f"❌ No student found with ID {student_id}"

    # Extract student profile
    student_profile = student['skills'].iloc[0] + " " + student['interests'].iloc[0]
    student_vector = vectorizer.transform([student_profile])

    # Compute similarity with all courses
    similarities = cosine_similarity(student_vector, course_matrix).flatten()
    course_indices = similarities.argsort()[::-1][:top_n]

    # Select top courses
    recommended_courses = courses.iloc[course_indices][['course_id','course_name','category','difficulty']]

    # Display student info + recommendations
    print(f"🎓 Student: {student['name'].iloc[0]}")
    print(f"📌 Background: {student['background'].iloc[0]}")
    print(f"🛠 Skills: {student['skills'].iloc[0]}")
    print(f"💡 Interests: {student['interests'].iloc[0]}")
    print("\n✅ Recommended Courses:\n")

    return recommended_courses


# Test Recommendations

In [23]:
recommendations = recommend_courses(student_id=1, top_n=5)
display(recommendations)


🎓 Student: Quinn1
📌 Background: Physics
🛠 Skills: Statistics, TensorFlow
💡 Interests: Cybersecurity

✅ Recommended Courses:



Unnamed: 0,course_id,course_name,category,difficulty
2,103,Deep Learning Specialization,AI / Deep Learning,Advanced
14,115,R Programming for Data Science,Data Science,Beginner
0,101,Introduction to Data Science,Data Science,Beginner
13,114,Cloud Security,Security / Cloud,Advanced
16,117,Data Visualization with Tableau,Data Visualization,Beginner


In [24]:
joblib.dump(recommend_courses, "models/course_recommender.pkl")
print("Recommendation system saved in models/course_recommender.pkl")


Recommendation system saved in models/course_recommender.pkl


In [25]:
# app.py
import streamlit as st
import joblib
import pandas as pd

st.title("🎓 Learning Recommendation System")

students = pd.read_csv("/content/drive/MyDrive/Masai/MASAI-PROJECTS/LRS/students.csv")
courses = pd.read_csv("/content/drive/MyDrive/Masai/MASAI-PROJECTS/LRS/courses.csv")
vectorizer = joblib.load("models/vectorizer.pkl")
course_matrix = joblib.load("models/course_matrix.pkl")

student_id = st.selectbox("Select Student ID", students['student_id'].unique())

if st.button("Get Recommendations"):
    from recommend import recommend_courses
    recs = recommend_courses(student_id)
    st.dataframe(recs)


2025-08-22 10:47:17.834 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-08-22 10:47:17.902 Session state does not function when running a script without `streamlit run`
