In [6]:
# Import Libraries
import pandas as pd
import numpy as np
import joblib
import os

# Create necessary folders
os.makedirs('logs', exist_ok=True)
os.makedirs('models', exist_ok=True)

print("Libraries imported successfully!")

Libraries imported successfully!


In [7]:
# FIXED: SMART FILE LOADING (Works everywhere!)
print("LOADING DATA AND CREATING CLUSTER PROFILES - FIXED VERSION")

def load_models_smart():

    # Possible locations for files
    possible_paths = [
        '',            # Current directory (Colab uploads here)
        '/content/',   # Google Colab
        './',          # Local current directory
        '../',         # Parent directory
        './models/',   # Models folder
    ]

    scaler = None
    kmeans = None
    df = None

    # Try to load scaler
    scaler_files = ['/content/scaler.pkl', 'scaler.pkl', 'scaler_model.pkl']
    for path in possible_paths:
        for scaler_file in scaler_files:
            try:
                full_path = os.path.join(path, scaler_file)
                if os.path.exists(full_path):
                    scaler = joblib.load(full_path)
                    print(f"Loaded scaler from: {full_path}")
                    break
            except:
                continue
        if scaler is not None:
            break

    # Try to load kmeans
    kmeans_files = ['/content/kmeans_model.pkl', 'kmeans.pkl']
    for path in possible_paths:
        for kmeans_file in kmeans_files:
            try:
                full_path = os.path.join(path, kmeans_file)
                if os.path.exists(full_path):
                    kmeans = joblib.load(full_path)
                    print(f"Loaded KMeans from: {full_path}")
                    break
            except:
                continue
        if kmeans is not None:
            break

    # Try to load data
    csv_files = ['/content/student_study_hours_preprocessed (1).csv', 'preprocessed_data.csv']
    for path in possible_paths:
        for csv_file in csv_files:
            try:
                full_path = os.path.join(path, csv_file)
                if os.path.exists(full_path):
                    df = pd.read_csv(full_path)
                    print(f"Loaded data from: {full_path}")
                    break
            except:
                continue
        if df is not None:
            break

    if scaler is None or kmeans is None or df is None:
        print("\ WARNING: Some files not found!")
        print("Please upload these files:")
        if scaler is None:
            print("  scaler.pkl or scaler (1).pkl")
        if kmeans is None:
            print("  kmeans_model.pkl")
        if df is None:
            print("  student_study_hours_preprocessed.csv")
        print("\nUpload them to this notebook and run this cell again.")
    else:
        print("\nAll files loaded successfully!")

    return scaler, kmeans, df

# Load models
scaler, kmeans, df = load_models_smart()

# Create cluster profiles (if data loaded)
if df is not None:
    print("CREATING CLUSTER PROFILES")

    # Create cluster profiles from successful students
    successful_students = df[df['dropout_risk'] == 'No'].copy()
    df_sorted = successful_students.sort_values(by="previous_gpa", ascending=False)

    # FIXED: Added missing closing parenthesis!
    top_df = df_sorted.groupby("habit_cluster").head(
        int(0.2 * len(successful_students) / df['habit_cluster'].nunique())
    )

    # Define features
    habit_columns = [
        "study_hours_per_day",
        "sleep_hours",
        "social_media_hours",
        "exercise_frequency",
        "attendance_percentage",
    ]

    onehot_columns = [col for col in df.columns if "learning_style" in col or "study_environment" in col]
    cluster_profile = top_df.groupby("habit_cluster")[habit_columns + onehot_columns].mean()

    print(f"Loaded {len(df)} student records")
    print(f"Using {len(successful_students)} successful students")
    print(f"Created {len(cluster_profile)} cluster profiles")
    print(f"Habit columns: {len(habit_columns)}")
    print(f"One-hot columns: {len(onehot_columns)}")
    print("\nSETUP COMPLETE! Ready to generate recommendations!")
else:
    print("\nSkipping cluster profile creation (data not loaded)")
    cluster_profile = None

LOADING DATA AND CREATING CLUSTER PROFILES - FIXED VERSION
Loaded scaler from: /content/scaler.pkl
Loaded KMeans from: /content/kmeans_model.pkl




Loaded data from: /content/student_study_hours_preprocessed (1).csv

All files loaded successfully!
CREATING CLUSTER PROFILES
Loaded 80000 student records
Using 67723 successful students
Created 3 cluster profiles
Habit columns: 5
One-hot columns: 7

SETUP COMPLETE! Ready to generate recommendations!


In [8]:
# Helper Functions

def get_study_technique(cluster_id):
    """Return study technique based on cluster"""
    techniques = {
        0: "Pomodoro Technique (25 min focus + 5 min break)",
        1: "Time Blocking (90 min sessions)",
        2: "Spaced Repetition (Regular review intervals)"
    }
    return techniques.get(cluster_id, "Active Recall")


def get_detailed_techniques(cluster_id, student_profile):
    """Generate detailed study techniques"""
    techniques = []

    techniques.append({
        "name": "Active Recall",
        "description": "Test yourself instead of re-reading",
        "how_to": "After studying, close your book and write down everything you remember",
        "frequency": "After each study session"
    })

    techniques.append({
        "name": "Spaced Repetition",
        "description": "Review material at increasing intervals",
        "how_to": "Review today's material tomorrow, then in 3 days, then in 7 days, then in 14 days",
        "frequency": "Follow the schedule strictly"
    })

    techniques.append({
        "name": "Pomodoro Technique",
        "description": "Study in focused time blocks",
        "how_to": "25 minutes intense focus + 5 minutes break. After 4 rounds, take 15-30 min break",
        "frequency": "Use for all study sessions"
    })

    techniques.append({
        "name": "Feynman Technique",
        "description": "Teach concepts to learn deeply",
        "how_to": "Explain the concept in simple terms as if teaching a 10-year-old",
        "frequency": "Use for difficult concepts"
    })

    if student_profile.get('attendance', 100) < 70:
        techniques.append({
            "name": "Pre-Class Preparation",
            "description": "Skim materials before class",
            "how_to": "Spend 15 minutes reviewing topics before each class",
            "frequency": "Before every class"
        })

    return techniques


def generate_time_management_tips(student_profile, ideal):
    """Generate time management recommendations"""
    tips = []

    tips.append("Use a daily planner or digital app (Google Calendar, Notion, Todoist)")
    tips.append("Wake up at consistent time (recommended: 6:00 AM)")
    tips.append("Use apps like Forest or Freedom to block distracting websites during study time")

    if student_profile.get('social_media', 0) > ideal['social_media_hours']:
        tips.append(f"Reduce social media to {ideal['social_media_hours']:.1f} hours/day (currently {student_profile.get('social_media', 0):.1f} hours)")
        tips.append("Keep phone in another room during study sessions")

    tips.append("Use the 2-Minute Rule: If a task takes less than 2 minutes, do it immediately")
    tips.append("Prioritize tasks using Eisenhower Matrix (Urgent/Important quadrants)")
    tips.append("Set specific daily goals (not just 'study more' but '3 chapters of Physics')")

    return tips


def generate_wellness_routine(student_profile, ideal):
    """Generate wellness and self-care recommendations"""
    routine = {}

    # Sleep routine
    target_sleep = ideal['sleep_hours']
    current_sleep = student_profile.get('sleep_hours', 7)

    routine['sleep'] = {
        "target": f"{target_sleep:.1f} hours",
        "current": f"{current_sleep:.1f} hours",
        "tips": [
            f"Sleep by 10:30 PM to wake up at 6:00 AM ({target_sleep:.0f} hours)",
            "No screens 1 hour before bed",
            "Keep room cool and dark",
            "Avoid caffeine after 4 PM"
        ]
    }

    # Exercise routine
    target_exercise = ideal['exercise_frequency']
    current_exercise = student_profile.get('exercise', 3)

    routine['exercise'] = {
        "target": f"{target_exercise:.0f} times/week",
        "current": f"{current_exercise:.0f} times/week",
        "recommended_activities": [
            "Morning jog/walk (30 min)",
            "Yoga or stretching (20 min)",
            "Gym workout (45 min)",
            "Cycling or sports (1 hour)",
            "Swimming (30 min)"
        ],
        "tips": [
            "Exercise before studying to boost focus",
            "Even 10-minute walks help during study breaks"
        ]
    }

    # Nutrition
    routine['nutrition'] = {
        "tips": [
            "Eat protein-rich breakfast (eggs, oats, nuts)",
            "Drink 8 glasses of water daily",
            "Healthy snacks during study (fruits, nuts)",
            "Avoid heavy meals before studying",
            "Limit coffee to 1-2 cups/day"
        ]
    }

    # Mental health
    routine['mental_health'] = {
        "stress_management": [
            "5-minute meditation before studying",
            "Journal for 10 minutes daily",
            "Talk to friends/family when stressed",
            "Listen to focus music (lo-fi, classical)",
            "Spend time outdoors daily"
        ]
    }

    return routine

print("Helper functions defined successfully!")

Helper functions defined successfully!


In [9]:
# Study Routine Generator

def generate_study_routine(student_profile, cluster_id):
    """
    Generate a personalized weekly study routine

    Args:
        student_profile: Dictionary with current student stats
        cluster_id: Student's cluster assignment

    Returns:
        Dictionary with detailed study routine
    """

    ideal = cluster_profile.loc[cluster_id]
    target_study_hours = ideal['study_hours_per_day']

    # Determine optimal study times based on cluster
    if cluster_id == 0:  # High-performing balanced students
        study_blocks = [
            {"time": "06:00 AM - 08:00 AM", "duration": 2, "type": "Morning Deep Work", "subjects": "Core subjects (Math, Science)"},
            {"time": "02:00 PM - 04:00 PM", "duration": 2, "type": "Afternoon Review", "subjects": "Review and practice"},
            {"time": "07:00 PM - 09:00 PM", "duration": 2, "type": "Evening Study", "subjects": "Assignments and projects"}
        ]
    elif cluster_id == 1:  # Moderate performers
        study_blocks = [
            {"time": "08:00 AM - 10:00 AM", "duration": 2, "type": "Morning Study", "subjects": "Priority subjects"},
            {"time": "04:00 PM - 06:00 PM", "duration": 2, "type": "Evening Study", "subjects": "Review and homework"}
        ]
    else:  # Cluster 2 - Need improvement
        study_blocks = [
            {"time": "09:00 AM - 11:00 AM", "duration": 2, "type": "Focused Study", "subjects": "Weak areas"},
            {"time": "05:00 PM - 07:00 PM", "duration": 2, "type": "Practice Session", "subjects": "Problem-solving"}
        ]

    # Generate weekly routine
    days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
    weekly_routine = {}

    for day in days:
        if day in ["Saturday", "Sunday"]:
            daily_hours = min(target_study_hours * 0.7, 5)
            daily_blocks = study_blocks[:2]
        else:
            daily_hours = target_study_hours
            daily_blocks = study_blocks

        weekly_routine[day] = {
            "total_hours": round(daily_hours, 1),
            "study_blocks": daily_blocks,
            "breaks": "10 min break after each hour",
            "focus_technique": get_study_technique(cluster_id)
        }

    # Get additional components
    study_techniques = get_detailed_techniques(cluster_id, student_profile)
    time_management = generate_time_management_tips(student_profile, ideal)
    wellness_routine = generate_wellness_routine(student_profile, ideal)

    return {
        "weekly_schedule": weekly_routine,
        "study_techniques": study_techniques,
        "time_management_tips": time_management,
        "wellness_routine": wellness_routine,
        "target_metrics": {
            "daily_study_hours": round(target_study_hours, 1),
            "weekly_study_hours": round(target_study_hours * 7, 1),
            "sleep_hours": round(ideal['sleep_hours'], 1),
            "exercise_per_week": int(ideal['exercise_frequency']),
            "social_media_limit": round(ideal['social_media_hours'], 1)
        }
    }

print("Study routine generator defined!")

Study routine generator defined!


In [10]:
# Action Plan & Progress Tracking

def generate_action_plan(priority_score):
    """Generate week-by-week action plan"""

    if priority_score >= 10:
        timeline = "8-12 weeks"
    elif priority_score >= 6:
        timeline = "6-8 weeks"
    else:
        timeline = "4-6 weeks"

    action_plan = {
        "timeline": timeline,
        "phases": []
    }

    # Phase 1
    action_plan["phases"].append({
        "week": "1-2",
        "focus": "Foundation Building",
        "goals": [
            "Establish consistent sleep schedule",
            "Increase study time by 1 hour/day",
            "Attend all classes",
            "Reduce social media by 30 minutes/day"
        ]
    })

    # Phase 2
    action_plan["phases"].append({
        "week": "3-4",
        "focus": "Habit Reinforcement",
        "goals": [
            "Maintain Phase 1 improvements",
            "Add 1 more hour of study time",
            "Implement Pomodoro technique",
            "Start exercise routine (3x/week)"
        ]
    })

    # Phase 3
    action_plan["phases"].append({
        "week": "5-6",
        "focus": "Optimization",
        "goals": [
            "Reach target study hours",
            "Master active recall technique",
            "Achieve 90%+ attendance",
            "Balance all wellness metrics"
        ]
    })

    # Phase 4
    action_plan["phases"].append({
        "week": "7+",
        "focus": "Maintenance & Excellence",
        "goals": [
            "Maintain all target metrics",
            "Help peers with study strategies",
            "Continuously improve weak subjects"
        ]
    })

    return action_plan


def generate_progress_metrics(current, ideal):
    """Generate trackable progress metrics"""

    metrics = []

    metrics.append({
        "metric": "Study Hours/Day",
        "current": round(current['study_hours'], 1),
        "target": round(ideal['study_hours_per_day'], 1),
        "unit": "hours",
        "tracking_method": "Daily log in planner"
    })

    metrics.append({
        "metric": "Sleep Hours/Day",
        "current": round(current['sleep_hours'], 1),
        "target": round(ideal['sleep_hours'], 1),
        "unit": "hours",
        "tracking_method": "Sleep tracking app"
    })

    metrics.append({
        "metric": "Attendance Rate",
        "current": round(current['attendance'], 1),
        "target": round(ideal['attendance_percentage'], 1),
        "unit": "%",
        "tracking_method": "Class attendance record"
    })

    metrics.append({
        "metric": "Social Media Usage",
        "current": round(current['social_media'], 1),
        "target": round(ideal['social_media_hours'], 1),
        "unit": "hours/day",
        "tracking_method": "Screen time app"
    })

    metrics.append({
        "metric": "Exercise Frequency",
        "current": round(current['exercise'], 0),
        "target": round(ideal['exercise_frequency'], 0),
        "unit": "times/week",
        "tracking_method": "Fitness tracker"
    })

    return metrics

print("Action plan and progress tracking functions defined!")

Action plan and progress tracking functions defined!


In [11]:
# Main Function: Get Personalized Study Plan (Existing Students)

def get_personalized_study_plan(student_id):
    """
    Generate complete personalized study plan for existing student

    Args:
        student_id: Student ID from database

    Returns:
        Complete study plan dictionary
    """

    if student_id not in df['student_id'].values:
        return {"error": "Student ID not found"}

    # Get student data
    student = df[df['student_id'] == student_id].iloc[0]
    cluster = student['habit_cluster']
    ideal = cluster_profile.loc[cluster]

    # Create student profile
    student_profile = {
        'study_hours': student['study_hours_per_day'],
        'sleep_hours': student['sleep_hours'],
        'attendance': student['attendance_percentage'],
        'social_media': student['social_media_hours'],
        'exercise': student['exercise_frequency']
    }

    # Generate gap analysis
    recommendations = []
    priority_score = 0

    if student['study_hours_per_day'] < ideal['study_hours_per_day']:
        diff = round(ideal['study_hours_per_day'] - student['study_hours_per_day'], 1)
        recommendations.append({
            'category': 'Study Time',
            'priority': 'High',
            'current': round(student['study_hours_per_day'], 1),
            'target': round(ideal['study_hours_per_day'], 1),
            'gap': diff,
            'message': f"Increase study hours by {diff} hours/day"
        })
        priority_score += 3

    if student['sleep_hours'] < ideal['sleep_hours']:
        diff = round(ideal['sleep_hours'] - student['sleep_hours'], 1)
        recommendations.append({
            'category': 'Sleep',
            'priority': 'High',
            'current': round(student['sleep_hours'], 1),
            'target': round(ideal['sleep_hours'], 1),
            'gap': diff,
            'message': f"Get {diff} more hours of sleep"
        })
        priority_score += 3

    if student['attendance_percentage'] < ideal['attendance_percentage']:
        diff = round(ideal['attendance_percentage'] - student['attendance_percentage'], 1)
        recommendations.append({
            'category': 'Attendance',
            'priority': 'High',
            'current': round(student['attendance_percentage'], 1),
            'target': round(ideal['attendance_percentage'], 1),
            'gap': diff,
            'message': f"Improve attendance by {diff}%"
        })
        priority_score += 3

    if student['social_media_hours'] > ideal['social_media_hours']:
        diff = round(student['social_media_hours'] - ideal['social_media_hours'], 1)
        recommendations.append({
            'category': 'Digital Wellness',
            'priority': 'Medium',
            'current': round(student['social_media_hours'], 1),
            'target': round(ideal['social_media_hours'], 1),
            'gap': diff,
            'message': f"Reduce social media by {diff} hours/day"
        })
        priority_score += 2

    if student['exercise_frequency'] < ideal['exercise_frequency']:
        diff = round(ideal['exercise_frequency'] - student['exercise_frequency'], 1)
        recommendations.append({
            'category': 'Physical Health',
            'priority': 'Medium',
            'current': round(student['exercise_frequency'], 1),
            'target': round(ideal['exercise_frequency'], 1),
            'gap': diff,
            'message': f"Exercise {diff} more times/week"
        })
        priority_score += 2

    # Generate complete study routine
    study_routine = generate_study_routine(student_profile, cluster)

    # Risk assessment
    risk_status = "At Risk" if student['dropout_risk'] == 'Yes' else "Low Risk"

    # Complete response
    return {
        'student_info': {
            'student_id': int(student_id),
            'cluster': int(cluster),
            'cluster_name': f"Cluster {cluster}",
            'risk_status': risk_status,
            'priority_score': priority_score
        },
        'current_habits': student_profile,
        'gap_analysis': recommendations,
        'personalized_study_routine': study_routine,
        'action_plan': generate_action_plan(priority_score),
        'progress_tracking': generate_progress_metrics(student_profile, ideal)
    }

print("Main recommendation function defined!")

Main recommendation function defined!


In [12]:
# Predict for New Students

def predict_for_new_student(new_student_data):
    """
    Accept new student data and provide predictions + recommendations

    Args:
        new_student_data: Dictionary with student information

    Returns:
        Complete analysis and personalized study plan
    """

    # Required fields
    required_fields = [
        'study_hours_per_day', 'social_media_hours', 'netflix_hours',
        'attendance_percentage', 'sleep_hours', 'screen_time',
        'time_management_score', 'exercise_frequency'
    ]

    # Validate input
    for field in required_fields:
        if field not in new_student_data:
            return {"error": f"Missing required field: {field}"}

    # Check if models are loaded
    if scaler is None or kmeans is None:
        return {"error": "Models not loaded. Please ensure scaler.pkl and kmeans_model.pkl exist."}

    try:
        # Prepare data for clustering
        clustering_features = [
            'study_hours_per_day', 'social_media_hours', 'netflix_hours',
            'attendance_percentage', 'sleep_hours', 'screen_time', 'time_management_score'
        ]

        X_cluster = np.array([[new_student_data[f] for f in clustering_features]])
        X_cluster_scaled = scaler.transform(X_cluster)

        # Predict cluster
        cluster = kmeans.predict(X_cluster_scaled)[0]

        # Get ideal profile for cluster
        ideal = cluster_profile.loc[cluster]

        # Create student profile
        student_profile = {
            'study_hours': new_student_data['study_hours_per_day'],
            'sleep_hours': new_student_data['sleep_hours'],
            'attendance': new_student_data['attendance_percentage'],
            'social_media': new_student_data['social_media_hours'],
            'exercise': new_student_data['exercise_frequency']
        }

        # Generate recommendations
        recommendations = []
        priority_score = 0

        if new_student_data['study_hours_per_day'] < ideal['study_hours_per_day']:
            diff = round(ideal['study_hours_per_day'] - new_student_data['study_hours_per_day'], 1)
            recommendations.append({
                'category': 'Study Time',
                'priority': 'High',
                'current': round(new_student_data['study_hours_per_day'], 1),
                'target': round(ideal['study_hours_per_day'], 1),
                'gap': diff,
                'message': f"Increase study hours by {diff} hours/day"
            })
            priority_score += 3

        if new_student_data['sleep_hours'] < ideal['sleep_hours']:
            diff = round(ideal['sleep_hours'] - new_student_data['sleep_hours'], 1)
            recommendations.append({
                'category': 'Sleep',
                'priority': 'High',
                'current': round(new_student_data['sleep_hours'], 1),
                'target': round(ideal['sleep_hours'], 1),
                'gap': diff,
                'message': f"Get {diff} more hours of sleep"
            })
            priority_score += 3

        if new_student_data['attendance_percentage'] < ideal['attendance_percentage']:
            diff = round(ideal['attendance_percentage'] - new_student_data['attendance_percentage'], 1)
            recommendations.append({
                'category': 'Attendance',
                'priority': 'High',
                'current': round(new_student_data['attendance_percentage'], 1),
                'target': round(ideal['attendance_percentage'], 1),
                'gap': diff,
                'message': f"Improve attendance by {diff}%"
            })
            priority_score += 3

        if new_student_data['social_media_hours'] > ideal['social_media_hours']:
            diff = round(new_student_data['social_media_hours'] - ideal['social_media_hours'], 1)
            recommendations.append({
                'category': 'Digital Wellness',
                'priority': 'Medium',
                'current': round(new_student_data['social_media_hours'], 1),
                'target': round(ideal['social_media_hours'], 1),
                'gap': diff,
                'message': f"Reduce social media by {diff} hours/day"
            })
            priority_score += 2

        if new_student_data['exercise_frequency'] < ideal['exercise_frequency']:
            diff = round(ideal['exercise_frequency'] - new_student_data['exercise_frequency'], 1)
            recommendations.append({
                'category': 'Physical Health',
                'priority': 'Medium',
                'current': round(new_student_data['exercise_frequency'], 1),
                'target': round(ideal['exercise_frequency'], 1),
                'gap': diff,
                'message': f"Exercise {diff} more times/week"
            })
            priority_score += 2

        # Generate study routine
        study_routine = generate_study_routine(student_profile, cluster)

        # Estimate risk
        if priority_score >= 10:
            risk_status = "High Risk"
        elif priority_score >= 6:
            risk_status = "Moderate Risk"
        else:
            risk_status = "Low Risk"

        return {
            'student_info': {
                'cluster': int(cluster),
                'cluster_name': f"Cluster {cluster}",
                'risk_status': risk_status,
                'priority_score': priority_score
            },
            'current_habits': student_profile,
            'gap_analysis': recommendations,
            'personalized_study_routine': study_routine,
            'action_plan': generate_action_plan(priority_score),
            'progress_tracking': generate_progress_metrics(student_profile, ideal)
        }

    except Exception as e:
        return {"error": f"Prediction failed: {str(e)}"}

print("New student prediction function defined!")

New student prediction function defined!


In [13]:
# Display Function

def display_study_plan(plan):
    """Display the study plan in a formatted way"""

    if 'error' in plan:
        print(f"Error: {plan['error']}")
        return

    print("PERSONALIZED STUDY PLAN - STUDYTRACK AI")


    # Student Info
    info = plan['student_info']
    student_id_str = str(info.get('student_id', 'New Student'))
    print(f"\nStudent ID: {student_id_str}")
    print(f"Cluster: {info['cluster_name']}")
    print(f"Risk Status: {info['risk_status']}")
    print(f"Priority Score: {info['priority_score']}/15")

    # Current vs Target
    if plan['gap_analysis']:
        print("CURRENT HABITS vs TARGETS")

        for rec in plan['gap_analysis']:
            print(f"\n{rec['category']}")
            print(f"   Current: {rec['current']} | Target: {rec['target']} | Gap: {rec['gap']}")

    # Weekly Schedule
    print("WEEKLY STUDY SCHEDULE")

    routine = plan['personalized_study_routine']
    print(f"\nTarget: {routine['target_metrics']['daily_study_hours']} hours/day")
    print(f"Weekly Total: {routine['target_metrics']['weekly_study_hours']} hours")

    print("\nSample Daily Schedule (Monday-Friday):")
    sample_day = routine['weekly_schedule']['Monday']
    for block in sample_day['study_blocks']:
        print(f" {block['time']}: {block['type']} ({block['duration']} hrs)")
        print(f" Focus: {block['subjects']}")

    print(f"\nBreaks: {sample_day['breaks']}")
    print(f"Technique: {sample_day['focus_technique']}")

    # Study Techniques
    print("RECOMMENDED STUDY TECHNIQUES")

    for i, tech in enumerate(routine['study_techniques'][:3], 1):
        print(f"\n{i}. {tech['name']}")
        print(f"   What: {tech['description']}")
        print(f"   How: {tech['how_to']}")
        print(f"   When: {tech['frequency']}")

    # Time Management
    print("TIME MANAGEMENT TIPS")

    for tip in routine['time_management_tips'][:5]:
        print(f"\n  {tip}")

    # Wellness
    print("WELLNESS ROUTINE")

    wellness = routine['wellness_routine']
    print(f"\nSleep: {wellness['sleep']['target']} (currently {wellness['sleep']['current']})")
    for tip in wellness['sleep']['tips'][:3]:
        print(f"   • {tip}")

    print(f"\nExercise: {wellness['exercise']['target']} (currently {wellness['exercise']['current']})")
    for activity in wellness['exercise']['recommended_activities'][:3]:
        print(f"   • {activity}")

    # Action Plan
    print("ACTION PLAN")

    action = plan['action_plan']
    print(f"\nTimeline: {action['timeline']}")

    for phase in action['phases'][:2]:
        print(f"\n Week {phase['week']}: {phase['focus']}")
        for goal in phase['goals']:
            print(f"   - {goal}")

    print("Your success journey starts now! Track daily progress.")

print("Display function defined!")

Display function defined!


In [14]:
import pandas as pd
import numpy as np
import os

print("LOADING DATA FOR RECOMMENDATION ENGINE")

# Load CSV data
csv_files = [
    '/content/student_study_hours_preprocessed (1).csv',
    '/content/student_study_hours_preprocessed.csv',
    '/content/student_study_hours_preprocessed (1).csv'
]

df = None
for csv_file in csv_files:
    if os.path.exists(csv_file):
        df = pd.read_csv(csv_file)
        print(f"Loaded: {csv_file} ({len(df)} records)")
        break

if df is None:
    print("ERROR: Upload 'student_study_hours_preprocessed.csv' first!")
    print("   1. Click folder icon on left")
    print("   2. Upload your CSV file")
    print("   3. Run this cell again")
else:
    # Create cluster profiles
    print("Creating cluster profiles...")
    successful_students = df[df['dropout_risk'] == 'No'].copy()
    df_sorted = successful_students.sort_values(by="previous_gpa", ascending=False)

    top_df = df_sorted.groupby("habit_cluster").head(
        int(0.2 * len(successful_students) / df['habit_cluster'].nunique())
    )

    habit_columns = [
        "study_hours_per_day", "sleep_hours", "social_media_hours",
        "exercise_frequency", "attendance_percentage"
    ]

    onehot_columns = [col for col in df.columns if "learning_style" in col or "study_environment" in col]
    cluster_profile = top_df.groupby("habit_cluster")[habit_columns + onehot_columns].mean()

    print(f"Created {len(cluster_profile)} cluster profiles")
    print("READY! Now run your test code.")


LOADING DATA FOR RECOMMENDATION ENGINE
Loaded: /content/student_study_hours_preprocessed (1).csv (80000 records)
Creating cluster profiles...
Created 3 cluster profiles
READY! Now run your test code.


In [15]:
# Test with existing student
print("TESTING WITH EXISTING STUDENT")

test_student_id = 100059
complete_plan = get_personalized_study_plan(test_student_id)
display_study_plan(complete_plan)

TESTING WITH EXISTING STUDENT
PERSONALIZED STUDY PLAN - STUDYTRACK AI

Student ID: 100059
Cluster: Cluster 1
Risk Status: At Risk
Priority Score: 13/15
CURRENT HABITS vs TARGETS

Study Time
   Current: 2.2 | Target: 3.7 | Gap: 1.5

Sleep
   Current: 6.6 | Target: 7.1 | Gap: 0.5

Attendance
   Current: 49.3 | Target: 70.9 | Gap: 21.6

Digital Wellness
   Current: 1.7 | Target: 1.1 | Gap: 0.6

Physical Health
   Current: 2 | Target: 3.8 | Gap: 1.8
WEEKLY STUDY SCHEDULE

Target: 3.7 hours/day
Weekly Total: 25.9 hours

Sample Daily Schedule (Monday-Friday):
 08:00 AM - 10:00 AM: Morning Study (2 hrs)
 Focus: Priority subjects
 04:00 PM - 06:00 PM: Evening Study (2 hrs)
 Focus: Review and homework

Breaks: 10 min break after each hour
Technique: Time Blocking (90 min sessions)
RECOMMENDED STUDY TECHNIQUES

1. Active Recall
   What: Test yourself instead of re-reading
   How: After studying, close your book and write down everything you remember
   When: After each study session

2. Spaced R

In [16]:
# Test with new student
print("TESTING WITH NEW STUDENT DATA")

# New student data
new_student = {
    'age': 20,
    'study_hours_per_day': 3.5,
    'social_media_hours': 4.0,
    'netflix_hours': 2.0,
    'attendance_percentage': 65.0,
    'sleep_hours': 6.0,
    'exercise_frequency': 2,
    'mental_health_rating': 6.5,
    'stress_level': 7.0,
    'time_management_score': 4.5,
    'screen_time': 9.5
}

new_plan = predict_for_new_student(new_student)

if 'error' in new_plan:
    print(f"\nError: {new_plan['error']}")
else:
    print("\nNew student analysis complete!")
    print(f"Cluster: {new_plan['student_info']['cluster']}")
    print(f"Risk: {new_plan['student_info']['risk_status']}")
    print(f"Priority Score: {new_plan['student_info']['priority_score']}/15")
    print(f"Recommendations: {len(new_plan['gap_analysis'])} areas to improve")

    print("DISPLAYING COMPLETE PLAN")
    display_study_plan(new_plan)

TESTING WITH NEW STUDENT DATA

New student analysis complete!
Cluster: 2
Risk: High Risk
Priority Score: 13/15
Recommendations: 5 areas to improve
DISPLAYING COMPLETE PLAN
PERSONALIZED STUDY PLAN - STUDYTRACK AI

Student ID: New Student
Cluster: Cluster 2
Risk Status: High Risk
Priority Score: 13/15
CURRENT HABITS vs TARGETS

Study Time
   Current: 3.5 | Target: 3.6 | Gap: 0.1

Sleep
   Current: 6.0 | Target: 7.1 | Gap: 1.1

Attendance
   Current: 65.0 | Target: 72.5 | Gap: 7.5

Digital Wellness
   Current: 4.0 | Target: 3.7 | Gap: 0.3

Physical Health
   Current: 2 | Target: 3.7 | Gap: 1.7
WEEKLY STUDY SCHEDULE

Target: 3.6 hours/day
Weekly Total: 25.2 hours

Sample Daily Schedule (Monday-Friday):
 09:00 AM - 11:00 AM: Focused Study (2 hrs)
 Focus: Weak areas
 05:00 PM - 07:00 PM: Practice Session (2 hrs)
 Focus: Problem-solving

Breaks: 10 min break after each hour
Technique: Spaced Repetition (Regular review intervals)
RECOMMENDED STUDY TECHNIQUES

1. Active Recall
   What: Test you



In [19]:
# Save K-Means model
import joblib
import os

os.makedirs('models', exist_ok=True)

joblib.dump(kmeans, 'models/kmeans_model.pkl')
print(" K-Means model saved to models/kmeans_model.pkl")


 K-Means model saved to models/kmeans_model.pkl
