In [1]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np

# ============================================================================
# PART 1: RECOMMENDATION SYSTEM CLASS
# ============================================================================

class RecommendationSystem:
    def __init__(self):
        self.model = None
        self.scaler = None
        self.cluster_labels = None
        self.recommendations = {
            0: {
                "cluster_name": "High Performers / Balanced",
                "title": "Keep your current routine – it's working perfectly.",
                "description": "The ideal students: High study + High score + Good sleep + Low stress + Low social media",
                "main_tips": [
                    "Continue Same study routine + 7–8h sleep + minimal distractions",
                    "Maintain your current study routine - it's working perfectly.",
                    "Ensure 7-8 hours of sleep nightly for optimal memory consolidation.",
                    "Balance study and leisure effectively to avoid burnout.",
                    "Keep managing stress well - your low stress levels are a strength.",
                    "Track your progress weekly and celebrate your achievements."
                ],
                "priority": "LOW"
            },
            1: {
                "cluster_name": "Stressed Overworkers",
                "title": "Reduce Stress & Improve Sleep - You're Burning Out!",
                "description": "Study too much, burn out: Very high study hours + High stress + Decent score + Low sleep",
                "main_tips": [
                    "Use Pomodoro (25 min study + 5 min break)",
                    "Add 10-min daily meditation",
                    "Protect sleep (7.5h minimum)",
                    "Take regular breaks every 30 minutes during study sessions.",
                    "Consider exercise or yoga to manage your high stress levels.",
                    "Review your study material before bed instead of right before exams."
                ],
                "priority": "HIGH"
            },
            2: {
                "cluster_name": "Distracted",
                "title": "Minimize Distractions - Focus on What Matters!",
                "description": "Heavy phone usage, low productivity: High social media hours + Low study + Low score + Normal sleep",
                "main_tips": [
                    "Block Instagram/TikTok during study hours (use Freedom or Cold Turkey app)",
                    "Study with phone in another room",
                    "Use app blockers to prevent access to social media apps.",
                    "Increase study hours gradually - aim for 1-2 more hours per week.",
                    "Create a dedicated study space free from distractions and notifications.",
                    "Set specific study goals for each session to stay motivated and focused."
                ],
                "priority": "HIGH"
            },
            3: {
                "cluster_name": "Sleep-Deprived",
                "title": "Critical: Sleep Deprivation - Fix Your Sleep NOW!",
                "description": "Stay up late, crash before exams: Low sleep + High stress + Irregular study + Poor score",
                "main_tips": [
                    "Fixed bedtime (before 11 PM)",
                    "Aim for 7–9 hours sleep nightly",
                    "Create a consistent daily schedule with fixed wake-up and sleep times.",
                    "Sleep helps consolidate learning - don't sacrifice sleep for more study time.",
                    "Reduce caffeine and screens 1 hour before bed for better sleep quality.",
                    "Stabilize your sleep routine first, then gradually increase study hours."
                ],
                "priority": "CRITICAL"
            }
        }

    def fit(self, df):
        """Train the K-Means model on student data"""
        features = ['study_hours', 'sleep_hours', 'social_media_hours', 'stress_level', 'exam_score']
        X = df[features].values
        self.scaler = StandardScaler()
        X_scaled = self.scaler.fit_transform(X)
        self.model = KMeans(n_clusters=4, random_state=42, n_init=10)
        self.cluster_labels = self.model.fit_predict(X_scaled)
        return self.cluster_labels

    def predict_cluster(self, student_data):
        """Predict cluster for a student"""
        if self.model is None:
            raise ValueError("Model not trained. Call fit() first.")
        data_array = np.array(student_data).reshape(1, -1)
        data_scaled = self.scaler.transform(data_array)
        cluster = self.model.predict(data_scaled)[0]
        return int(cluster)

    def get_recommendation(self, cluster_id):
        """Get full recommendation for a cluster"""
        return self.recommendations.get(cluster_id, {})

    def analyze_student(self, student_data):
        """Analyze a student and return their recommendation"""
        cluster_id = self.predict_cluster(student_data)
        rec = self.get_recommendation(cluster_id)
        return {
            "cluster_id": cluster_id,
            "cluster_name": rec.get("cluster_name", "Unknown"),
            "title": rec.get("title", "No title"),
            "description": rec.get("description", ""),
            "main_tips": rec.get("main_tips", []),
            "priority": rec.get("priority", "NORMAL"),
            "recommendations": rec.get("main_tips", [])
        }

    def get_cluster_stats(self, df, clusters):
        """Calculate statistics for each cluster"""
        df_temp = df.copy()
        df_temp['cluster'] = clusters
        stats = {}

        for cluster_id in range(4):
            cluster_data = df_temp[df_temp['cluster'] == cluster_id]
            rec = self.get_recommendation(cluster_id)

            stats[cluster_id] = {
                'cluster_name': rec.get('cluster_name', 'Unknown'),
                'count': len(cluster_data),
                'percentage': round((len(cluster_data) / len(df_temp)) * 100, 1),
                'avg_study_hours': round(cluster_data['study_hours'].mean(), 1),
                'avg_sleep_hours': round(cluster_data['sleep_hours'].mean(), 1),
                'avg_social_media_hours': round(cluster_data['social_media_hours'].mean(), 1),
                'avg_stress_level': round(cluster_data['stress_level'].mean(), 1),
                'avg_exam_score': round(cluster_data['exam_score'].mean(), 1),
            }

        return stats

    def print_recommendation(self, cluster_id):
        """Print a formatted recommendation"""
        rec = self.get_recommendation(cluster_id)

        print("=" * 70)
        print(f"Cluster {cluster_id}: {rec.get('cluster_name', 'Unknown')}")
        print("=" * 70)
        print(f"\nTitle: {rec.get('title', 'No title')}")
        print(f"Description: {rec.get('description', '')}")
        print(f"Priority: {rec.get('priority', 'NORMAL')}")
        print("\nRecommendations:")
        for i, tip in enumerate(rec.get('main_tips', []), 1):
            print(f" {i}. {tip}")
        print("\n")


# ============================================================================
# PART 2: CREATE SAMPLE DATA
# ============================================================================

data = {
    'study_hours': [7, 10, 3, 4, 8, 6, 9, 2, 5, 7],
    'sleep_hours': [8, 4, 7, 3, 7, 8, 4, 6, 3, 8],
    'social_media_hours': [1, 1, 5, 2, 2, 1, 1, 6, 3, 1],
    'stress_level': [3, 8, 6, 8, 4, 3, 7, 5, 9, 2],
    'exam_score': [90, 75, 55, 50, 85, 88, 72, 48, 45, 92]
}

df = pd.DataFrame(data)
rec_sys = RecommendationSystem()
clusters = rec_sys.fit(df)
df["cluster"] = clusters

# ============================================================================
# OUTPUT 1: HEADER AND INITIALIZATION
# ============================================================================

print("\n" + "="*70)
print("RECOMMENDATION SYSTEM - COMBINED OUTPUT FROM BOTH FILES")
print("="*70 + "\n")

print("HIGH PERFORMER STUDENT")
print("-" * 70)

# ============================================================================
# OUTPUT 2: FIRST EXAMPLE - HIGH PERFORMER
# ============================================================================

student = [7, 8, 1, 3, 90]
result = rec_sys.analyze_student(student)
print(f"Cluster: {result['cluster_name']}")
print(f"Title: {result['title']}")
print(f"Priority: {result['priority']}")
print("\nRecommendations:")
for i, tip in enumerate(result['recommendations'], 1):
    print(f" {i}. {tip}")

print("\n")

# ============================================================================
# OUTPUT 3: SECOND EXAMPLE - STRESSED OVERWORKER
# ============================================================================

print("STRESSED OVERWORKER STUDENT")
print("-" * 70)

student = [10, 4, 1, 8, 75]
result = rec_sys.analyze_student(student)
print(f"Cluster: {result['cluster_name']}")
print(f"Title: {result['title']}")
print(f"Priority: {result['priority']}")
print("\nRecommendations:")
for i, tip in enumerate(result['recommendations'], 1):
    print(f" {i}. {tip}")

print("\n")

# ============================================================================
# OUTPUT 4: THIRD EXAMPLE - DISTRACTED STUDENT
# ============================================================================

print("DISTRACTED STUDENT")
print("-" * 70)

student = [3, 7, 5, 6, 55]
result = rec_sys.analyze_student(student)
print(f"Cluster: {result['cluster_name']}")
print(f"Title: {result['title']}")
print(f"Priority: {result['priority']}")
print("\nRecommendations:")
for i, tip in enumerate(result['recommendations'], 1):
    print(f" {i}. {tip}")

print("\n")

# ============================================================================
# OUTPUT 5: FOURTH EXAMPLE - SLEEP-DEPRIVED STUDENT
# ============================================================================

print("SLEEP-DEPRIVED STUDENT")
print("-" * 70)

student = [5, 3, 2, 8, 50]
result = rec_sys.analyze_student(student)
print(f"Cluster: {result['cluster_name']}")
print(f"Title: {result['title']}")
print(f"Priority: {result['priority']}")
print("\nRecommendations:")
for i, tip in enumerate(result['recommendations'], 1):
    print(f" {i}. {tip}")

print("\n")

# ============================================================================
# OUTPUT 6: ALL CLUSTERS COMPREHENSIVE VIEW
# ============================================================================

print("\n" + "="*70)
print("ALL 4 CLUSTERS - COMPREHENSIVE VIEW")
print("="*70 + "\n")

for cluster_id in range(4):
    rec_sys.print_recommendation(cluster_id)

# ============================================================================
# OUTPUT 7: CLUSTER STATISTICS
# ============================================================================

print("\n" + "="*70)
print("CLUSTER STATISTICS")
print("="*70 + "\n")

stats = rec_sys.get_cluster_stats(df, clusters)

for cluster_id, stat in stats.items():
    print("-" * 70)
    print(f"Cluster {cluster_id}: {stat['cluster_name']}")
    print("-" * 70)
    print(f"  Count: {stat['count']} students ({stat['percentage']}%)")
    print(f"  Avg Study Hours: {stat['avg_study_hours']} hrs/day")
    print(f"  Avg Sleep Hours: {stat['avg_sleep_hours']} hrs/night")
    print(f"  Avg Social Media: {stat['avg_social_media_hours']} hrs/day")
    print(f"  Avg Stress Level: {stat['avg_stress_level']}/10")
    print(f"  Avg Exam Score: {stat['avg_exam_score']}%")
    print()

# ============================================================================
# OUTPUT 8: SUMMARY TABLE
# ============================================================================

print("\n" + "="*70)
print("SUMMARY TABLE - ALL 4 CLUSTERS")
print("="*70 + "\n")

print(f"{'Cluster':<10} {'Name':<25} {'Priority':<10} {'Count':<8} {'Avg Score':<10}")
print("-" * 70)

for cluster_id, stat in stats.items():
    rec = rec_sys.get_recommendation(cluster_id)
    print(f"{cluster_id:<10} {stat['cluster_name']:<25} {rec['priority']:<10} {stat['count']:<8} {stat['avg_exam_score']:<10}")

# ============================================================================
# COMPLETION MESSAGE
# ============================================================================

print("\n" + "="*70)
print("✓ COMPLETE OUTPUT GENERATED")
print("="*70)
print("\nBoth files merged successfully!")
print("All recommendations and statistics displayed above.")
print("\nTotal Clusters: 4")
print("Total Recommendations: 24 (6 per cluster)")
print("\n" + "="*70)



RECOMMENDATION SYSTEM - COMBINED OUTPUT FROM BOTH FILES

HIGH PERFORMER STUDENT
----------------------------------------------------------------------
Cluster: Stressed Overworkers
Title: Reduce Stress & Improve Sleep - You're Burning Out!
Priority: HIGH

Recommendations:
 1. Use Pomodoro (25 min study + 5 min break)
 2. Add 10-min daily meditation
 3. Protect sleep (7.5h minimum)
 4. Take regular breaks every 30 minutes during study sessions.
 5. Consider exercise or yoga to manage your high stress levels.
 6. Review your study material before bed instead of right before exams.


STRESSED OVERWORKER STUDENT
----------------------------------------------------------------------
Cluster: Distracted
Title: Minimize Distractions - Focus on What Matters!
Priority: HIGH

Recommendations:
 1. Block Instagram/TikTok during study hours (use Freedom or Cold Turkey app)
 2. Study with phone in another room
 3. Use app blockers to prevent access to social media apps.
 4. Increase study hours gra