In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
import random
from collections import defaultdict
from sklearn.metrics import (
    roc_auc_score,
    average_precision_score,
    precision_recall_curve,
    auc  # Include this
)


# Load Data (enhanced data)
courses = pd.read_csv("courses_final.csv")
enrollments = pd.read_csv("enrollments_final.csv")

# --- Knowledge Graph ---
kg = nx.Graph()

# Adding nodes for courses
for _, row in courses.iterrows():
    kg.add_node(row['CourseID'], category=row['category'], skills=row['skills'])

# Adding edges for enrollments
for _, row in enrollments.iterrows():
    kg.add_edge(row['UserID'], row['CourseID'], grade=row['grade'], completed=row['completed'])

# --- Collaborative Filtering ---
interactions = pd.DataFrame({'user': enrollments['UserID'], 'course': enrollments['CourseID'], 'rating': 1})

user_encoder = LabelEncoder()
course_encoder = LabelEncoder()
interactions['user_id'] = user_encoder.fit_transform(interactions['user'])
interactions['course_id'] = course_encoder.fit_transform(interactions['course'])

interaction_matrix = interactions.pivot_table(values='rating', index=['user_id'], columns=['course_id']).fillna(0)


In [None]:
# --- Function to Recommend Using Collaborative Filtering ---
def collaborative_filtering_recommendations(user_id, interaction_matrix, num_recommendations=5):
    if user_id in interaction_matrix.index:
        user_interactions = interaction_matrix.loc[user_id].values.reshape(1, -1)
        similarity_scores = cosine_similarity(user_interactions, interaction_matrix)
        recommendations_indices = np.argsort(similarity_scores[0])[::-1]
        recs_encoded = recommendations_indices[:num_recommendations]
        return course_encoder.inverse_transform(recs_encoded).tolist()
    else:
        # If user is not in the interaction matrix, suggest random courses
        course_ids = courses['CourseID'].unique().tolist()
        return course_ids if num_recommendations >= len(course_ids) else random.sample(course_ids, num_recommendations)

# --- Ant Colony Optimization (ACO) Based Recommendations ---
def aco_recommendations(user_id, kg, num_recommendations=5, num_ants=10, num_iterations=5, alpha=1, beta=2, evaporation_rate=0.5):
    eligible_courses = set(courses["CourseID"]) - set(kg.neighbors(user_id)) if user_id in kg else set(courses["CourseID"])
    pheromone = defaultdict(lambda: 1)

    for _ in range(num_iterations):
        for ant in range(num_ants):
            path = []
            current_node = user_id

            for x in range(num_recommendations):
                neighbors = list(kg.neighbors(current_node)) if current_node in kg else []
                if neighbors:
                    intersection = list(set(neighbors).intersection(eligible_courses))
                    if not intersection:
                        next_node = random.choice(list(eligible_courses)) if eligible_courses else random.choice(courses["CourseID"])
                    else:
                        probabilities = []
                        for neighbor in intersection:
                            prob = pheromone[(current_node, neighbor)] ** alpha
                            if kg.has_edge(current_node, neighbor) and 'grade' in kg[current_node][neighbor]:
                                prob *= kg[current_node][neighbor]["grade"] ** beta
                            probabilities.append(prob)
                        probabilities = np.array(probabilities) / np.sum(probabilities)
                        next_node = np.random.choice(intersection, p=probabilities)
                else:
                    next_node = random.choice(list(eligible_courses)) if eligible_courses else random.choice(courses["CourseID"])

                eligible_courses.discard(next_node)
                path.append(next_node)
                current_node = next_node

            # Update pheromones
            for course in path:
                if kg.has_edge(user_id, course):
                    pheromone[(user_id, course)] *= (1 - evaporation_rate + kg[user_id][course].get('grade', 0) / 100)

    # Determine courses that have not been taken by the user
    user_interactions = interaction_matrix.loc[user_id].values.reshape(1, -1) if user_id in interaction_matrix.index else [0]
    not_taken_courses = np.where(user_interactions == 0)[1] if len(user_interactions) > 1 else np.where(user_interactions == 0)[0]

    eligible_not_taken = course_encoder.inverse_transform(not_taken_courses).tolist() if len(not_taken_courses) > 0 else list(eligible_courses)

    # Final recommendations based on pheromones
    top_recommendations = []
    sorted_pheromone = sorted(pheromone.items(), key=lambda item: item[1], reverse=True)
    for (node_a, node_b), _ in sorted_pheromone:
        if node_b in eligible_not_taken and node_b not in top_recommendations:
            top_recommendations.append(node_b)
            if len(top_recommendations) == num_recommendations:
                break

    return top_recommendations


In [None]:
def combined_recommendations(user_id, num_recommendations=5):
    try:
        user_id_encoded = user_encoder.transform([user_id])[0]
    except:
        print("UserID not present")
        all_course_ids = courses['CourseID'].unique().tolist()
        return all_course_ids

    # Previous Courses of the user
    previous_courses_ids = enrollments[(enrollments['UserID'] == user_id) & (enrollments['completed'] == 1)]['CourseID'].tolist()
    previous_courses_info = courses[courses['CourseID'].isin(previous_courses_ids)][['title', 'category']]

    print(f"Previous courses completed by User {user_id}:")
    for _, row in previous_courses_info.iterrows():
        print(f"- {row['title']} (category: {row['category']})")

    # Get recommendations
    collab_recs = collaborative_filtering_recommendations(user_id_encoded, interaction_matrix, num_recommendations=num_recommendations)
    aco_recs = aco_recommendations(user_id, kg, num_recommendations=num_recommendations)

    # Combine recommendations from collaborative filtering and ACO
    combined = list(set(aco_recs + collab_recs))
    final_recommendations = combined[:num_recommendations] if len(combined) > num_recommendations else combined + [x for x in collab_recs if x not in combined][:(num_recommendations - len(combined))]

    # Get the titles and categories of the recommended courses
    recommended_courses_info = courses[courses['CourseID'].isin(final_recommendations)][['title', 'category']]

    print("\nRecommended courses for the user:")
    for _, row in recommended_courses_info.iterrows():
        print(f"- {row['title']} (category: {row['category']})")

    return final_recommendations

# Example call
combined_recommendations('EMP156', num_recommendations=5)


Previous courses completed by User EMP156:
- Mathematics for Machine Learning Specialization (category: Machine Learning)
- Managing Cybersecurity Specialization (category: Security)
- Google IT Support Professional Certificate (category: Support and Operations)
- Suporte em TI do Google Professional Certificate (category: Operações e suporte)
- Instructional Design Foundations and Applications (category: Education)
- Introduction to Philosophy (category: Philosophy)
- Introduction to Economic Theories (category: Economics)
- Health Behavior Change: From Evidence to Action (category: Public Health)
- Introduction to Hardware and Operating Systems (category: Support and Operations)
- Introduction to Agile Development and Scrum (category: Software Development)
- Fundamentals of Finance (category: Finance)
- Introduction to Data, Signal, and Image Analysis with MATLAB (category: Data Analysis)
- Machine Learning for Accounting with Python (category: Machine Learning)
- Data Science Capsto

  not_taken_courses = np.where(user_interactions == 0)[1] if len(user_interactions) > 1 else np.where(user_interactions == 0)[0]


['COR0301', 'COR0130', 'COR0210', 'COR0170', 'COR0116']