<a href="https://colab.research.google.com/github/SagnikChunder/AI-Based-Study-Planner/blob/main/AI_Based_Study_Planner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from dataclasses import dataclass, field
from typing import List, Dict
import datetime

@dataclass
class Topic:
    name: str
    difficulty: int  # 1 (easy) to 5 (hard)
    estimated_hours: float
    priority: float  # exam weight

@dataclass
class Subject:
    name: str
    topics: List[Topic]

@dataclass
class StudentProfile:
    name: str
    daily_study_hours: float
    energy_level: str  # morning / evening / night
    focus_score: float  # 0 to 1

@dataclass
class StudySession:
    subject: str
    topic: str
    duration: float
    date: datetime.date

@dataclass
class StudyPlan:
    sessions: List[StudySession] = field(default_factory=list)

Synthetic Data Generator

In [2]:
import random
import numpy as np

In [4]:
#Generate subject and topics
def generate_subjects(num_subjects=3, topics_per_subject=6):
    subjects = []

    for i in range(num_subjects):
        topics = []
        for j in range(topics_per_subject):
            difficulty = random.randint(1, 5)
            estimated_hours = round(np.random.uniform(1.5, 4.0) * difficulty, 2)
            priority = round(np.random.uniform(0.5, 1.5), 2)

            topics.append(
                Topic(
                    name=f"Topic_{j+1}",
                    difficulty=difficulty,
                    estimated_hours=estimated_hours,
                    priority=priority
                )
            )

        subjects.append(
            Subject(
                name=f"Subject_{i+1}",
                topics=topics
            )
        )

    return subjects

In [5]:
#Generate Student Profile
def generate_student_profile():
    return StudentProfile(
        name="Demo Student",
        daily_study_hours=round(np.random.uniform(3, 6), 1),
        energy_level=random.choice(["morning", "evening", "night"]),
        focus_score=round(np.random.uniform(0.6, 0.95), 2)
    )

In [6]:
#Generate exam date
def generate_exam_date(days_from_now=45):
    return datetime.date.today() + datetime.timedelta(days=days_from_now)

In [7]:
subjects = generate_subjects()
student = generate_student_profile()
exam_date = generate_exam_date()

print("Student Profile:", student)
print("\nExam Date:", exam_date)

for sub in subjects:
    print(f"\n{sub.name}")
    for t in sub.topics:
        print(vars(t))


Student Profile: StudentProfile(name='Demo Student', daily_study_hours=4.7, energy_level='evening', focus_score=0.8)

Exam Date: 2026-02-09

Subject_1
{'name': 'Topic_1', 'difficulty': 3, 'estimated_hours': 7.32, 'priority': 1.23}
{'name': 'Topic_2', 'difficulty': 3, 'estimated_hours': 5.87, 'priority': 1.11}
{'name': 'Topic_3', 'difficulty': 2, 'estimated_hours': 3.96, 'priority': 0.71}
{'name': 'Topic_4', 'difficulty': 1, 'estimated_hours': 3.27, 'priority': 0.9}
{'name': 'Topic_5', 'difficulty': 3, 'estimated_hours': 11.6, 'priority': 1.46}
{'name': 'Topic_6', 'difficulty': 4, 'estimated_hours': 8.94, 'priority': 1.02}

Subject_2
{'name': 'Topic_1', 'difficulty': 1, 'estimated_hours': 2.99, 'priority': 0.53}
{'name': 'Topic_2', 'difficulty': 3, 'estimated_hours': 7.01, 'priority': 1.24}
{'name': 'Topic_3', 'difficulty': 5, 'estimated_hours': 19.79, 'priority': 0.9}
{'name': 'Topic_4', 'difficulty': 4, 'estimated_hours': 6.95, 'priority': 0.84}
{'name': 'Topic_5', 'difficulty': 1, 'e

Rule-Based Study Planner

In [8]:
def flatten_topics(subjects):
    all_topics = []
    for subject in subjects:
        for topic in subject.topics:
            all_topics.append({
                "subject": subject.name,
                "topic": topic.name,
                "difficulty": topic.difficulty,
                "estimated_hours": topic.estimated_hours,
                "priority": topic.priority
            })
    return all_topics

In [9]:
def compute_priority_score(topic):
    # Higher difficulty & priority â†’ scheduled earlier
    return topic["difficulty"] * topic["priority"]

In [11]:
# Rule Based Planner Core
def generate_rule_based_plan(subjects, student, exam_date):
    topics = flatten_topics(subjects)

    # Sort topics by priority score (descending)
    topics.sort(key=compute_priority_score, reverse=True)

    today = datetime.date.today()
    total_days = (exam_date - today).days

    plan = StudyPlan()
    day_pointer = 0
    daily_hours_left = student.daily_study_hours

    for topic in topics:
        hours_needed = topic["estimated_hours"]

        while hours_needed > 0 and day_pointer < total_days:
            session_hours = min(hours_needed, daily_hours_left)

            plan.sessions.append(
                StudySession(
                    subject=topic["subject"],
                    topic=topic["topic"],
                    duration=round(session_hours, 2),
                    date=today + datetime.timedelta(days=day_pointer)
                )
            )

            hours_needed -= session_hours
            daily_hours_left -= session_hours

            # Move to next day if no hours left
            if daily_hours_left <= 0:
                day_pointer += 1
                daily_hours_left = student.daily_study_hours

    return plan

In [12]:
#Test the Planner
study_plan = generate_rule_based_plan(subjects, student, exam_date)

for session in study_plan.sessions[:10]:
    print(session)

StudySession(subject='Subject_2', topic='Topic_3', duration=4.7, date=datetime.date(2025, 12, 26))
StudySession(subject='Subject_2', topic='Topic_3', duration=4.7, date=datetime.date(2025, 12, 27))
StudySession(subject='Subject_2', topic='Topic_3', duration=4.7, date=datetime.date(2025, 12, 28))
StudySession(subject='Subject_2', topic='Topic_3', duration=4.7, date=datetime.date(2025, 12, 29))
StudySession(subject='Subject_2', topic='Topic_3', duration=0.99, date=datetime.date(2025, 12, 30))
StudySession(subject='Subject_3', topic='Topic_6', duration=3.71, date=datetime.date(2025, 12, 30))
StudySession(subject='Subject_3', topic='Topic_6', duration=4.7, date=datetime.date(2025, 12, 31))
StudySession(subject='Subject_3', topic='Topic_6', duration=4.7, date=datetime.date(2026, 1, 1))
StudySession(subject='Subject_3', topic='Topic_6', duration=2.33, date=datetime.date(2026, 1, 2))
StudySession(subject='Subject_1', topic='Topic_5', duration=2.37, date=datetime.date(2026, 1, 2))


In [13]:
!pip install pulp



Optimization Engine

In [14]:
import pulp
def prepare_optimization_data(subjects):
    topics = []
    for subject in subjects:
        for topic in subject.topics:
            topics.append({
                "id": f"{subject.name}_{topic.name}",
                "subject": subject.name,
                "topic": topic.name,
                "difficulty": topic.difficulty,
                "priority": topic.priority,
                "hours": topic.estimated_hours
            })
    return topics


In [15]:
def optimize_study_plan(subjects, student, exam_date):
    topics = prepare_optimization_data(subjects)

    today = datetime.date.today()
    days = [(today + datetime.timedelta(days=i))
            for i in range((exam_date - today).days)]

    model = pulp.LpProblem("Study_Planner_Optimization", pulp.LpMaximize)

    # Decision variables
    x = pulp.LpVariable.dicts(
        "study_hours",
        ((t["id"], d) for t in topics for d in days),
        lowBound=0,
        cat="Continuous"
    )

    # Objective function
    model += pulp.lpSum(
        x[(t["id"], d)] * t["priority"] * t["difficulty"]
        for t in topics for d in days
    )

    # Daily hour constraints
    for d in days:
        model += pulp.lpSum(
            x[(t["id"], d)] for t in topics
        ) <= student.daily_study_hours

    # Topic completion constraints
    for t in topics:
        model += pulp.lpSum(
            x[(t["id"], d)] for d in days
        ) == t["hours"]

    model.solve(pulp.PULP_CBC_CMD(msg=False))

    # Build study plan
    plan = StudyPlan()

    for t in topics:
        for d in days:
            hours = x[(t["id"], d)].value()
            if hours is not None and hours > 0:
                plan.sessions.append(
                    StudySession(
                        subject=t["subject"],
                        topic=t["topic"],
                        duration=round(hours, 2),
                        date=d
                    )
                )

    return plan

In [16]:
def optimize_study_plan(subjects, student, exam_date):
    topics = prepare_optimization_data(subjects)

    today = datetime.date.today()
    days = [(today + datetime.timedelta(days=i))
            for i in range((exam_date - today).days)]

    model = pulp.LpProblem("Study_Planner_Optimization", pulp.LpMaximize)

    # Decision variables
    x = pulp.LpVariable.dicts(
        "study_hours",
        ((t["id"], d) for t in topics for d in days),
        lowBound=0,
        cat="Continuous"
    )

    # Objective function
    model += pulp.lpSum(
        x[(t["id"], d)] * t["priority"] * t["difficulty"]
        for t in topics for d in days
    )

    # Daily hour constraints
    for d in days:
        model += pulp.lpSum(
            x[(t["id"], d)] for t in topics
        ) <= student.daily_study_hours

    # Topic completion constraints
    for t in topics:
        model += pulp.lpSum(
            x[(t["id"], d)] for d in days
        ) == t["hours"]

    model.solve(pulp.PULP_CBC_CMD(msg=False))

    # Build study plan
    plan = StudyPlan()

    for t in topics:
        for d in days:
            hours = x[(t["id"], d)].value()
            if hours is not None and hours > 0:
                plan.sessions.append(
                    StudySession(
                        subject=t["subject"],
                        topic=t["topic"],
                        duration=round(hours, 2),
                        date=d
                    )
                )

    return plan

In [17]:
optimized_plan = optimize_study_plan(subjects, student, exam_date)

for session in optimized_plan.sessions[:10]:
    print(session)

StudySession(subject='Subject_1', topic='Topic_1', duration=4.7, date=datetime.date(2026, 1, 1))
StudySession(subject='Subject_1', topic='Topic_1', duration=0.57, date=datetime.date(2026, 1, 3))
StudySession(subject='Subject_1', topic='Topic_1', duration=2.05, date=datetime.date(2026, 1, 30))
StudySession(subject='Subject_1', topic='Topic_2', duration=4.7, date=datetime.date(2026, 1, 11))
StudySession(subject='Subject_1', topic='Topic_2', duration=1.17, date=datetime.date(2026, 1, 31))
StudySession(subject='Subject_1', topic='Topic_3', duration=1.12, date=datetime.date(2026, 1, 15))
StudySession(subject='Subject_1', topic='Topic_3', duration=2.84, date=datetime.date(2026, 1, 19))
StudySession(subject='Subject_1', topic='Topic_4', duration=3.27, date=datetime.date(2026, 1, 16))
StudySession(subject='Subject_1', topic='Topic_5', duration=4.7, date=datetime.date(2025, 12, 28))
StudySession(subject='Subject_1', topic='Topic_5', duration=0.34, date=datetime.date(2026, 1, 2))


Feedback Model

In [18]:
# Phase 5: Feedback & Adaptation

@dataclass
class StudyFeedback:
    subject: str
    topic: str
    planned_hours: float
    actual_hours: float
    understanding: int  # 1 (poor) to 5 (excellent)


In [19]:
def simulate_feedback(plan):
    feedback_list = []

    for session in plan.sessions:
        completion_factor = np.random.uniform(0.6, 1.1)
        actual_hours = round(session.duration * completion_factor, 2)
        understanding = random.randint(2, 5)

        feedback_list.append(
            StudyFeedback(
                subject=session.subject,
                topic=session.topic,
                planned_hours=session.duration,
                actual_hours=actual_hours,
                understanding=understanding
            )
        )

    return feedback_list

In [20]:
def analyze_feedback(feedback_list):
    insights = {
        "time_ratio": [],
        "low_understanding_topics": []
    }

    for fb in feedback_list:
        ratio = fb.actual_hours / fb.planned_hours
        insights["time_ratio"].append(ratio)

        if fb.understanding <= 2:
            insights["low_understanding_topics"].append(fb.topic)

    insights["avg_time_ratio"] = np.mean(insights["time_ratio"])
    return insights

In [21]:
def adapt_student_profile(student, insights):
    # Adjust focus score based on time deviation
    student.focus_score *= (1 / insights["avg_time_ratio"])
    student.focus_score = min(max(student.focus_score, 0.5), 1.0)
    return student

In [22]:
feedback = simulate_feedback(optimized_plan)
insights = analyze_feedback(feedback)
student = adapt_student_profile(student, insights)

adapted_plan = optimize_study_plan(subjects, student, exam_date)

In [29]:
# Phase 6: Evaluation & Comparison

from collections import defaultdict


def syllabus_coverage(plan, subjects):
    planned_hours = sum(s.duration for s in plan.sessions)
    total_hours = sum(
        t.estimated_hours for sub in subjects for t in sub.topics
    )
    return round(planned_hours / total_hours, 3)

In [30]:
def workload_variance(plan):
    daily_hours = defaultdict(float)

    for s in plan.sessions:
        daily_hours[s.date] += s.duration

    return round(np.var(list(daily_hours.values())), 3)

In [31]:
def priority_alignment(plan, subjects):
    priority_map = {}
    for sub in subjects:
        for t in sub.topics:
            priority_map[t.name] = t.priority * t.difficulty

    weighted_day_sum = 0
    total_weight = 0

    start_date = min(s.date for s in plan.sessions)

    for s in plan.sessions:
        day_index = (s.date - start_date).days + 1
        weight = priority_map[s.topic]
        weighted_day_sum += weight * day_index
        total_weight += weight

    return round(weighted_day_sum / total_weight, 2)

In [32]:
def time_prediction_error(feedback_list):
    errors = [
        abs(fb.actual_hours - fb.planned_hours)
        for fb in feedback_list
    ]
    return round(np.mean(errors), 3)

RUN FULL EVALUATION

In [34]:
# Rule-based plan
rule_plan = generate_rule_based_plan(subjects, student, exam_date)

# Optimized + adaptive plan
opt_plan = adapted_plan

# Simulated feedback
rule_feedback = simulate_feedback(rule_plan)
opt_feedback = simulate_feedback(opt_plan)

results = {
    "Rule-Based": {
        "Coverage": syllabus_coverage(rule_plan, subjects),
        "Workload Variance": workload_variance(rule_plan),
        "Priority Alignment": priority_alignment(rule_plan, subjects),
        "Time Error": time_prediction_error(rule_feedback)
    },
    "Optimized + Adaptive": {
        "Coverage": syllabus_coverage(opt_plan, subjects),
        "Workload Variance": workload_variance(opt_plan),
        "Priority Alignment": priority_alignment(opt_plan, subjects),
        "Time Error": time_prediction_error(opt_feedback)
    }
}

results

{'Rule-Based': {'Coverage': 1.0,
  'Workload Variance': np.float64(0.568),
  'Priority Alignment': 17.16,
  'Time Error': np.float64(0.389)},
 'Optimized + Adaptive': {'Coverage': 1.0,
  'Workload Variance': np.float64(0.568),
  'Priority Alignment': 21.42,
  'Time Error': np.float64(0.506)}}

ML- Based Study Time prediction

In [35]:
import pandas as pd

In [36]:
def build_ml_dataset(feedback_list, student, subjects):
    difficulty_map = {}
    priority_map = {}

    for sub in subjects:
        for t in sub.topics:
            difficulty_map[t.name] = t.difficulty
            priority_map[t.name] = t.priority

    data = []

    for fb in feedback_list:
        data.append({
            "difficulty": difficulty_map[fb.topic],
            "priority": priority_map[fb.topic],
            "planned_hours": fb.planned_hours,
            "focus_score": student.focus_score,
            "actual_hours": fb.actual_hours
        })

    return pd.DataFrame(data)

In [37]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [38]:
df = build_ml_dataset(opt_feedback, student, subjects)

X = df.drop("actual_hours", axis=1)
y = df["actual_hours"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [39]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

In [40]:
model = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)

preds = model.predict(X_test)
mae = mean_absolute_error(y_test, preds)

print("MAE:", round(mae, 3))

MAE: 0.511


In [41]:
def update_topic_hours_with_ml(subjects, student, model):
    for sub in subjects:
        for t in sub.topics:
            features = pd.DataFrame([{
                "difficulty": t.difficulty,
                "priority": t.priority,
                "planned_hours": t.estimated_hours,
                "focus_score": student.focus_score
            }])

            predicted_hours = model.predict(features)[0]
            t.estimated_hours = round(predicted_hours, 2)

In [42]:
update_topic_hours_with_ml(subjects, student, model)

ml_optimized_plan = optimize_study_plan(subjects, student, exam_date)