In [None]:
!pip install Faker
from faker import Faker
import random
import pandas as pd
import numpy as np



In [None]:
fake = Faker()
# Define available options for courses, interests, and difficulty levels
courses = ['Computer Science', 'Mechanical Engineering', 'Civil Engineering', 'Electrical Engineering','Biotechnology', 'Computational Biology','Physics']
interests = ['AI', 'Blockchain', 'Environmental Science', 'Robotics', 'Data Science', 'Bioinformatics', 'Fintech','Sustainability','Computational Biology ']
difficulty_levels = ['Easy', 'Medium', 'Hard']


In [None]:
student_data = []
for _ in range(100):  # Generating data for 100 students
    student_data.append({
        'student_id': fake.unique.random_int(min=1, max=1000),
        'name': fake.name(),
        'course': random.choice(courses),
        'year': random.randint(1, 4),  # Year of study
        'interests': random.choice(interests),
        'average_quiz_score': random.uniform(50, 100)  # Quiz scores between 50 and 100
    })

student_df = pd.DataFrame(student_data)

In [None]:
student_df.head()

Unnamed: 0,student_id,name,course,year,interests,average_quiz_score
0,962,Mrs. Jessica Marks,Electrical Engineering,3,Blockchain,74.03873
1,145,Cheryl Sampson,Biotechnology,2,Computational Biology,88.98308
2,903,Victoria Thomas,Civil Engineering,2,Robotics,68.15446
3,548,William White,Civil Engineering,3,Blockchain,91.518573
4,113,Bradley Carlson,Computer Science,3,Computational Biology,79.680061


In [None]:
# Step 2: Generating Synthetic Study Material Data using Faker
material_data = []
for _ in range(50):  # 50 study materials
    material_data.append({
        'material_id': fake.unique.random_int(min=1, max=500),
        'subject': random.choice(interests),
        'difficulty_level': random.choice(difficulty_levels),
        'popularity_score': random.uniform(1, 100),  # Popularity score between 1 and 100
        'content_length': random.randint(5, 20)  # Content length between 5 and 20 pages
    })

material_df = pd.DataFrame(material_data)
material_df.head()

Unnamed: 0,material_id,subject,difficulty_level,popularity_score,content_length
0,161,Environmental Science,Easy,33.73475,14
1,410,Fintech,Easy,11.370131,8
2,181,Sustainability,Hard,61.804948,19
3,54,Blockchain,Hard,81.814577,20
4,141,Fintech,Easy,71.847689,17


In [None]:
engagement_data = []
for _ in range(200):  # 200 random engagements between students and materials
    engagement_data.append({
        'student_id': random.choice(student_df['student_id']),
        'material_id': random.choice(material_df['material_id']),
        'rating': random.randint(1, 5),  # Rating between 1 and 5
        'viewed': random.choice([True, False])  # Randomly indicating if the student viewed the material
    })

engagement_df = pd.DataFrame(engagement_data)
engagement_df.head()

Unnamed: 0,student_id,material_id,rating,viewed
0,607,135,4,False
1,286,446,1,False
2,31,74,5,True
3,189,12,5,False
4,720,275,3,False


In [None]:
print("Student Data Sample:")
print(student_df.head())

print("\nMaterial Data Sample:")
print(material_df.head())
print("\nEngagement Data Sample:")
print(engagement_df.head())

Student Data Sample:
   student_id                name                  course  year  \
0         962  Mrs. Jessica Marks  Electrical Engineering     3   
1         145      Cheryl Sampson           Biotechnology     2   
2         903     Victoria Thomas       Civil Engineering     2   
3         548       William White       Civil Engineering     3   
4         113     Bradley Carlson        Computer Science     3   

                interests  average_quiz_score  
0              Blockchain           74.038730  
1  Computational Biology            88.983080  
2                Robotics           68.154460  
3              Blockchain           91.518573  
4  Computational Biology            79.680061  

Material Data Sample:
   material_id                subject difficulty_level  popularity_score  \
0          161  Environmental Science             Easy         33.734750   
1          410                Fintech             Easy         11.370131   
2          181         Sustainability

In [None]:
def recommendation_score(student, material, engagement_df):
# 1. Interest Match
    interest_score = 1 if student['interests'] == material['subject'] else 0

    # 2. Quiz Performance
    performance_score = student['average_quiz_score'] / 100  # Normalized performance score

    # 3. Engagement Score (If the student previously interacted with the material)
    past_engagement = engagement_df[
        (engagement_df['student_id'] == student['student_id']) &
        (engagement_df['material_id'] == material['material_id'])
    ]
    engagement_score = past_engagement['rating'].mean() / 5 if not past_engagement.empty else 0  # Normalize rating

    # 4. Popularity Score
    popularity_score = material['popularity_score'] / 100  # Normalize popularity

    # Final Score: Weigh each component equally (you can adjust weights as needed)
    total_score = (0.3 * interest_score) + (0.3 * performance_score) + (0.2 * engagement_score) + (0.2 * popularity_score)

    return total_score

# Step 5: Generate Recommendations
recommendations = []

for student_id in student_df['student_id']:
    student = student_df[student_df['student_id'] == student_id].iloc[0]

    material_scores = []
    for material_id in material_df['material_id']:
        material = material_df[material_df['material_id'] == material_id].iloc[0]
        score = recommendation_score(student, material, engagement_df)
        material_scores.append((material_id, score))

    # Sort materials by score and select top 5
    top_5_materials = sorted(material_scores, key=lambda x: x[1], reverse=True)[:5]

    for material_id, score in top_5_materials:
        recommendations.append({'student_id': student_id, 'material_id': material_id, 'score': score})

# Step 6: Output the Recommendations
recommendation_df = pd.DataFrame(recommendations)

In [None]:
recommendation_df.head()


Unnamed: 0,student_id,material_id,score
0,962,54,0.685745
1,962,280,0.632818
2,962,193,0.623444
3,962,278,0.596096
4,962,309,0.5421


In [None]:
def knowledge_based_recommendation(student, material):
    # Rule 1: Interest Match
    interest_match = 1 if student['interests'] == material['subject'] else 0

    # Rule 2: Adjust material difficulty based on student's quiz performance
    if student['average_quiz_score'] > 85:
        difficulty_score = 1 if material['difficulty_level'] in ['Medium', 'Hard'] else 0
    elif student['average_quiz_score'] > 65:
        difficulty_score = 1 if material['difficulty_level'] == 'Medium' else 0
    else:
        difficulty_score = 1 if material['difficulty_level'] == 'Easy' else 0

    # Rule 3: Popularity score
    popularity_score = material['popularity_score'] / 100  # On a  0-1 scale

    # Final knowledge-based score
    total_score = (0.4 * interest_match) + (0.4 * difficulty_score) + (0.2 * popularity_score)

    return total_score

In [None]:
recommendations = []

for student_id in student_df['student_id']:
    student = student_df[student_df['student_id'] == student_id].iloc[0]

    material_scores = []
    for material_id in material_df['material_id']:
        material = material_df[material_df['material_id'] == material_id].iloc[0]
        score = knowledge_based_recommendation(student, material)
        material_scores.append((material_id, score))

    # Sort materials by score and select top 5
    top_5_materials = sorted(material_scores, key=lambda x: x[1], reverse=True)[:5]

    for material_id, score in top_5_materials:
        recommendations.append({'student_id': student_id, 'material_id': material_id, 'score': score})

In [None]:
recommendation_df = pd.DataFrame(recommendations)
recommendation_df.head()

Unnamed: 0,student_id,material_id,score
0,781,461,0.978146
1,781,94,0.969268
2,781,194,0.807009
3,781,318,0.597407
4,781,100,0.596632
