In [3]:
import pandas as pd
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
users_df = pd.read_csv("moocuserfinal.csv")
courses_df = pd.read_csv("MOOC.csv")

# Select a user
selected_user_id = "U00002"  # <-- Change this to any valid User_ID
user_row = users_df[users_df["User_ID"] == selected_user_id].iloc[0]
user_profile = {
    "preferred_topics": user_row["Preferred Topics"].lower().split(", "),
    "learning_goals": user_row["Learning Goals"].lower(),
    "difficulty": user_row["Preferred Difficulty Level"].lower(),
    "past_courses": user_row["Previous Learning History (Courses Completed)"].lower().split(", "),
    "skills": user_row["Skills Acquired"].lower().split(", "),
}

# Prepare and clean course dataset
course_pool = courses_df.dropna(subset=["Course Description", "all_skill", "Difficulty Level"]).copy()
course_pool["combined_text"] = course_pool["Course Description"].str.lower() + " " + course_pool["all_skill"].str.lower()
course_pool.reset_index(drop=True, inplace=True)

# Vectorize course descriptions
vectorizer = TfidfVectorizer(stop_words="english")
course_vectors = vectorizer.fit_transform(course_pool["combined_text"])

# Vectorize user preferences
user_text = " ".join(user_profile["preferred_topics"]) + " " + user_profile["learning_goals"]
user_vector = vectorizer.transform([user_text])
# Combine all relevant user inputs
user_text = (
    str(user_profile["skills"]).lower() + " " +
    str(user_profile.get("Learning Goals", "")).lower() + " " +
    str(user_profile.get("Preferred Topics", "")).lower()
)

# Vectorize combined text
user_vector = vectorizer.transform([user_text])

# Fitness function
def fitness(individual):
    indices = individual
    course_vecs = course_vectors[indices]
    similarity_scores = cosine_similarity(course_vecs, user_vector).flatten()
    difficulty_scores = [
        1.0 if user_profile["difficulty"] in course_pool.iloc[i]["Difficulty Level"].lower() else 0.5
        for i in indices
    ]
    return np.mean(similarity_scores * difficulty_scores)

# GA Parameters
POP_SIZE = 30
GENS = 20
MUTATION_RATE = 0.2
COURSE_COUNT = 5
course_indices = list(range(len(course_pool)))  # fixed index range

# GA Functions
def generate_population():
    return [random.sample(course_indices, COURSE_COUNT) for _ in range(POP_SIZE)]

def crossover(p1, p2):
    child = list(set(p1[:2] + p2[2:]))
    while len(child) < COURSE_COUNT:
        extra = random.choice(course_indices)
        if extra not in child:
            child.append(extra)
    return child

def mutate(individual):
    if random.random() < MUTATION_RATE:
        idx_to_replace = random.randint(0, COURSE_COUNT - 1)
        new_idx = random.choice(course_indices)
        while new_idx in individual:
            new_idx = random.choice(course_indices)
        individual[idx_to_replace] = new_idx
    return individual

# Run Genetic Algorithm
population = generate_population()
for gen in range(GENS):
    scored_pop = [(ind, fitness(ind)) for ind in population]
    scored_pop.sort(key=lambda x: x[1], reverse=True)
    top = [ind for ind, _ in scored_pop[:10]]
    next_gen = top[:]
    while len(next_gen) < POP_SIZE:
        p1, p2 = random.sample(top, 2)
        child = mutate(crossover(p1, p2))
        next_gen.append(child)
    population = next_gen

# Output top 5 recommended courses
best_individual = scored_pop[0][0]
recommended_courses = course_pool.iloc[best_individual][["Course Name", "Course URL", "Difficulty Level", "Course Rating"]]
print(f"🎯 Top 5 Recommended Courses for {selected_user_id}:\n")
print(recommended_courses.reset_index(drop=True))


🎯 Top 5 Recommended Courses for U00002:

                                         Course Name  \
0                      3D Models for Virtual Reality   
1  Advanced Valuation and Strategy - M&A, Private...   
2           Engagement & Monetization | Mobile Games   
3  Create Your First Application with Java Using ...   
4  Interpersonal, Developmental, and Evolutionary...   

                                          Course URL Difficulty Level  \
0  https://www.coursera.org/learn/3d-models-virtu...         Advanced   
1  https://www.coursera.org/learn/Advanced-valuat...         Advanced   
2  https://www.udacity.com//course/engagement-mon...         Beginner   
3  https://www.coursera.org/learn/create-your-fir...         Beginner   
4  https://www.coursera.org/learn/mind-machine-pe...         Advanced   

  Course Rating  
0           4.6  
1           4.6  
2           NaN  
3           3.9  
4           4.1  
