In [1]:
# Required libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import torch
import torch.nn as nn
import torch.optim as optim

# Deep Learning Model
class CourseEmbeddingNet(nn.Module):
    def __init__(self, input_dim, embedding_dim=128):
        super(CourseEmbeddingNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, embedding_dim)
        )

    def forward(self, x):
        return self.model(x)

# Main Recommender
class CourseRecommender:
    def __init__(self, courses_df):
        self.df = courses_df
        self.content_matrix = None
        self.course_indices = None
        self.tfidf_vectorizer = None
        self.deep_model = None
        self.embeddings = None

    def fit(self):
        self.df['content'] = ''
        if 'description' in self.df.columns:
            self.df['content'] += (self.df['description'].fillna('') + ' ') * 3
        if 'skills' in self.df.columns:
            self.df['content'] += self.df['skills'].fillna('') + ' '
        self.df['content'] += self.df['course_name'].fillna('')
        if 'difficulty' in self.df.columns:
            self.df['content'] += ' ' + self.df['difficulty'].fillna('')

        self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
        mask = self.df['content'].str.strip() != ''
        if mask.sum() > 0:
            self.df = self.df.loc[mask].reset_index(drop=True)
            self.content_matrix = self.tfidf_vectorizer.fit_transform(self.df['content'])
            self.course_indices = pd.Series(self.df.index, index=self.df['course_name'])
        else:
            print("Warning: No courses with content found")
            self.content_matrix = None
            self.course_indices = None

        return self

    def fit_deep_model(self, embedding_dim=128, epochs=20, lr=0.001):
        if self.content_matrix is None:
            print("Content matrix is empty. Run fit() first.")
            return

        X = torch.tensor(self.content_matrix.toarray(), dtype=torch.float32)
        model = CourseEmbeddingNet(X.shape[1], embedding_dim)
        optimizer = optim.Adam(model.parameters(), lr=lr)
        criterion = nn.MSELoss()

        model.train()
        for epoch in range(epochs):
            optimizer.zero_grad()
            output = model(X)
            loss = criterion(output, X[:, :embedding_dim])
            loss.backward()
            optimizer.step()
            if (epoch + 1) % 5 == 0:
                print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

        self.deep_model = model
        self.embeddings = model(X).detach().numpy()

    def recommend(self, course_name, n=10):
        if self.content_matrix is None:
            print("Model not fitted.")
            return pd.DataFrame()

        if course_name not in self.course_indices.index:
            matches = [c for c in self.course_indices.index if course_name.lower() in c.lower()]
            if matches:
                course_name = matches[0]
                print(f"Using closest match: '{course_name}'")
            else:
                print(f"Course '{course_name}' not found")
                return pd.DataFrame()

        idx = self.course_indices[course_name]
        course_vec = self.content_matrix[idx]
        sim_scores = cosine_similarity(course_vec, self.content_matrix).flatten()
        sim_scores[idx] = -1  # exclude the course itself
        sim_indices = np.argsort(sim_scores)[-n:][::-1]

        similar_courses = self.df.iloc[sim_indices].copy()
        similar_courses['similarity_score'] = sim_scores[sim_indices]

        columns = ['course_name', 'similarity_score']
        optional_cols = ['platform', 'difficulty', 'skills']
        columns += [col for col in optional_cols if col in similar_courses.columns]
        return similar_courses[columns]

    def recommend_deep_learning(self, course_name, n=10):
        if self.embeddings is None or self.deep_model is None:
            print("Deep model not trained. Run fit_deep_model() first.")
            return pd.DataFrame()

        if course_name not in self.course_indices.index:
            matches = [c for c in self.course_indices.index if course_name.lower() in c.lower()]
            if matches:
                course_name = matches[0]
                print(f"Using closest match: '{course_name}'")
            else:
                print(f"Course '{course_name}' not found")
                return pd.DataFrame()

        idx = self.course_indices[course_name]
        course_vec = self.embeddings[idx].reshape(1, -1)

        sim_scores = cosine_similarity(course_vec, self.embeddings).flatten()
        sim_scores[idx] = -1  # avoid recommending the same course
        sim_indices = np.argsort(sim_scores)[-n:][::-1]

        similar_courses = self.df.iloc[sim_indices].copy()
        similar_courses['deep_similarity_score'] = sim_scores[sim_indices]

        columns = ['course_name', 'deep_similarity_score']
        optional_cols = ['platform', 'difficulty', 'skills']
        columns += [col for col in optional_cols if col in similar_courses.columns]
        return similar_courses[columns]

# CLI Demo
def run_recommender_cli():
    import os

    df = pd.read_csv("integrated_educational_courses.csv", low_memory=False)
    df.rename(columns={"integrated_22_Course Name": "course_name"}, inplace=True)
    df = df.reset_index(drop=True)

    recommender = CourseRecommender(df)
    recommender.fit()

    train_dl = input("Do you want to train the deep learning model? (y/n): ").lower()
    if train_dl == 'y':
        print("Training deep learning model...")
        recommender.fit_deep_model()

    while True:
        course_input = input("\nEnter a course name (or 'exit' to quit): ").strip()
        if course_input.lower() == 'exit':
            break

        method = input("Use deep learning? (y/n): ").lower()
        if method == 'y':
            results = recommender.recommend_deep_learning(course_input, n=5)
        else:
            results = recommender.recommend(course_input, n=5)

        if not results.empty:
            print("\nTop Recommendations:")
            print(results.to_string(index=False))
        else:
            print("No recommendations found.")

# Run the CLI
if __name__ == "__main__":
    run_recommender_cli()


Training deep learning model...
Epoch 5/20, Loss: 0.0009
Epoch 10/20, Loss: 0.0004
Epoch 15/20, Loss: 0.0004
Epoch 20/20, Loss: 0.0003
Using closest match: 'Python Programming Essentials'

Top Recommendations:
                                                     course_name  deep_similarity_score
                                              The Unix Workbench               0.583148
                                   Intellectual Humility: Theory               0.580663
Essentials in Clinical Simulations Across the Health Professions               0.578897
                      Python Programming: A Concise Introduction               0.577355
                                   Moral Foundations of Politics               0.554505
Using closest match: 'Python Programming Essentials'

Top Recommendations:
                                                  course_name  similarity_score
                                                R Programming          0.538950
                           