<a href="https://colab.research.google.com/github/Ayman-queen/data-science-internship-advanced-tasks/blob/main/Task_2_NLP_%2B_Recommendation_Personalized_Learning_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
# ✅ Libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# ✅ 1. Sample Student Data (Preferences)
students = pd.DataFrame({
    'student_id': [1, 2, 3, 4],
    'preferences': [
        "I like solving math problems and logic puzzles",
        "Interested in AI, neural networks and coding",
        "Enjoys data visualization and exploring patterns",
        "Prefers practical work like coding and hands-on AI"
    ]
})

# ✅ 2. Past Scores (0–100) for key topics
scores = pd.DataFrame({
    'student_id': [1, 2, 3, 4],
    'Linear Algebra': [95, 40, 50, 70],
    'Data Visualization': [60, 75, 90, 40],
    'Machine Learning': [30, 85, 65, 95],
    'Python Basics': [80, 95, 60, 85],
    'AI Basics': [20, 90, 40, 88],
    'Statistics': [92, 45, 88, 60],
    'Data Analysis': [77, 80, 82, 70],
    'Deep Learning': [15, 60, 40, 85]
})

# ✅ 3. Topics list
topics = ['Linear Algebra', 'Data Visualization', 'Machine Learning', 'Python Basics',
          'AI Basics', 'Statistics', 'Data Analysis', 'Deep Learning']

# ✅ 4. Content-based filtering with TF-IDF
vectorizer = TfidfVectorizer()
combined_text = pd.concat([students['preferences'], pd.Series(topics)], ignore_index=True)
preference_matrix = vectorizer.fit_transform(combined_text)

student_vecs = preference_matrix[:len(students)]
topic_vecs = preference_matrix[len(students):]

content_scores = cosine_similarity(student_vecs, topic_vecs)

# ✅ 5. Collaborative filtering (based on scores)
topic_only = scores[topics]
scaler = MinMaxScaler()
normalized_scores = scaler.fit_transform(topic_only)

collab_scores = 1 - normalized_scores  # Recommend topics with lower scores

# ✅ 6. Combine hybrid recommendation (60% content + 40% collaborative)
hybrid_scores = 0.6 * content_scores + 0.4 * collab_scores

# ✅ 7. Get top 3 recommendations per student
recommendations = []
for i in range(hybrid_scores.shape[0]):
    top_indices = hybrid_scores[i].argsort()[-3:][::-1]
    top_topics = [topics[idx] for idx in top_indices]
    recommendations.append(top_topics)

# ✅ 8. Final Recommendation Table
result = pd.DataFrame(recommendations, columns=['Topic 1', 'Topic 2', 'Topic 3'])
result.insert(0, "Student ID", students['student_id'])

print("📚 Top 3 Personalized Learning Recommendations:\n")
print(result)

📚 Top 3 Personalized Learning Recommendations:

   Student ID        Topic 1             Topic 2             Topic 3
0           1  Deep Learning           AI Basics    Machine Learning
1           2     Statistics      Linear Algebra       Deep Learning
2           3  Python Basics      Linear Algebra  Data Visualization
3           4  Data Analysis  Data Visualization          Statistics
