# Education Effectiveness Attribution
## Analyzing Course Impact Across All Disciplines

This notebook analyzes the effectiveness of courses across:
- STEM (Math, Science, Technology)
- Humanities (Social Sciences, Languages, History)
- Arts (Visual, Performing, Creative Writing)

And their interdisciplinary connections.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from unified_attribution import CompleteUnifiedFramework

print('='*70)
print('EDUCATION EFFECTIVENESS ATTRIBUTION')
print('='*70)

## Define Educational Taxonomy

In [None]:
EDUCATION_TAXONOMY = {
    'STEM': {
        'Mathematics': ['Algebra', 'Calculus', 'Statistics', 'Geometry'],
        'Science': ['Physics', 'Chemistry', 'Biology', 'Earth_Science'],
        'Technology': ['Computer_Science', 'Engineering', 'Data_Science'],
    },
    'Humanities': {
        'Social_Sciences': ['Psychology', 'Sociology', 'Economics', 'Political_Science'],
        'Languages': ['English', 'Spanish', 'Mandarin', 'Writing'],
        'History': ['World_History', 'US_History', 'Ancient_Civilizations'],
    },
    'Arts': {
        'Visual_Arts': ['Painting', 'Sculpture', 'Photography', 'Design'],
        'Performing_Arts': ['Music', 'Theater', 'Dance'],
        'Creative_Writing': ['Poetry', 'Fiction', 'Non_Fiction'],
    }
}

# Flatten course list
all_courses = []
for domain, categories in EDUCATION_TAXONOMY.items():
    for category, courses in categories.items():
        all_courses.extend(courses)

print(f'Total courses: {len(all_courses)}')
print(f'Domains: {list(EDUCATION_TAXONOMY.keys())}')

## Generate Student Data

In [None]:
n_students = 5000

# Course enrollment
student_data = pd.DataFrame()
for course in all_courses:
    base_rate = 0.3
    if course in ['Algebra', 'English', 'Biology']:
        base_rate = 0.9  # Core courses
    student_data[course] = np.random.binomial(1, base_rate, n_students)

# Generate grades with interdisciplinary effects
grades = pd.DataFrame()
for course in all_courses:
    base_grade = np.random.normal(75, 15, n_students)
    
    # Math helps science
    if course in ['Physics', 'Chemistry']:
        math_boost = (student_data['Algebra'] + student_data['Calculus']) * 3
        base_grade += math_boost
    
    # Writing helps everything
    if course == 'Writing':
        base_grade += student_data[all_courses].sum(axis=1) * 0.5
    
    grades[course] = np.clip(base_grade, 0, 100)

# Overall GPA
student_data['GPA'] = grades.mean(axis=1) / 25

print(f'Generated {n_students} student records')
print(f'Average GPA: {student_data["GPA"].mean():.2f}')
print(f'Courses per student: {student_data[all_courses].sum(axis=1).mean():.1f}')

## Compute Course Attribution

In [None]:
# Create student journeys
journeys = []
for idx, row in student_data[all_courses].iterrows():
    journey = [course for course in all_courses if row[course] == 1]
    journeys.append(journey)

# Run attribution
framework = CompleteUnifiedFramework(
    journeys=journeys,
    data=student_data[all_courses],
    epsilon=1.0
)

results, elapsed = framework.compute_complete_attribution()

print(f'Attribution computed in {elapsed:.2f}s')

## Analyze Course Effectiveness

In [None]:
course_attribution = results['hybrid']
ranked_courses = sorted(course_attribution.items(), 
                       key=lambda x: x[1], reverse=True)

print('\n' + '='*70)
print('TOP 10 MOST IMPACTFUL COURSES')
print('='*70)
for i, (course, impact) in enumerate(ranked_courses[:10], 1):
    print(f'{i:2d}. {course:25s}: {impact:.4f}')

## Interdisciplinary Connections

In [None]:
# Calculate correlations
correlation_matrix = grades[all_courses].corr()

# Find strong connections
connections = []
for i, c1 in enumerate(all_courses):
    for j, c2 in enumerate(all_courses):
        if i < j:
            corr = correlation_matrix.loc[c1, c2]
            if corr > 0.5:
                connections.append((c1, c2, corr))

connections.sort(key=lambda x: x[2], reverse=True)

print('\n' + '='*70)
print('TOP 10 INTERDISCIPLINARY CONNECTIONS')
print('='*70)
for i, (c1, c2, corr) in enumerate(connections[:10], 1):
    print(f'{i:2d}. {c1:20s} <-> {c2:20s}: {corr:.3f}')

## Visualizations

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle('Education Attribution Analysis', fontsize=16, fontweight='bold')

# Plot 1: Course impact
ax = axes[0, 0]
top_20 = pd.DataFrame(ranked_courses[:20], columns=['Course', 'Impact'])
ax.barh(top_20['Course'], top_20['Impact'], color='steelblue')
ax.set_xlabel('Attribution Weight')
ax.set_title('Top 20 Most Impactful Courses')
ax.grid(axis='x', alpha=0.3)

# Plot 2: Knowledge network
ax = axes[0, 1]
G = nx.Graph()
for c in all_courses[:15]:
    G.add_node(c)
for c1, c2, corr in connections[:20]:
    if c1 in all_courses[:15] and c2 in all_courses[:15]:
        G.add_edge(c1, c2, weight=corr)

pos = nx.spring_layout(G, k=0.5)
nx.draw_networkx(G, pos, ax=ax, node_color='lightblue', 
                node_size=500, font_size=8, alpha=0.8)
ax.set_title('Interdisciplinary Knowledge Network')
ax.axis('off')

# Plot 3: Domain impact
ax = axes[1, 0]
domain_impacts = {}
for domain, categories in EDUCATION_TAXONOMY.items():
    total = sum(course_attribution.get(c, 0) 
               for cat, courses in categories.items() 
               for c in courses)
    domain_impacts[domain] = total

ax.bar(domain_impacts.keys(), domain_impacts.values(), color='coral')
ax.set_ylabel('Total Attribution')
ax.set_title('Impact by Educational Domain')
ax.grid(axis='y', alpha=0.3)

# Plot 4: Correlation heatmap
ax = axes[1, 1]
sample_courses = all_courses[:10]
sample_corr = correlation_matrix.loc[sample_courses, sample_courses]
sns.heatmap(sample_corr, annot=True, fmt='.2f', cmap='coolwarm', 
           ax=ax, square=True, cbar_kws={'label': 'Correlation'})
ax.set_title('Course Correlation Matrix (Sample)')

plt.tight_layout()
plt.savefig('education_attribution.png', dpi=300, bbox_inches='tight')
print('Saved: education_attribution.png')

## Export Results

In [None]:
# Course effectiveness report
report = pd.DataFrame({
    'Course': list(course_attribution.keys()),
    'Impact': list(course_attribution.values()),
    'Enrollment_Rate': [student_data[c].mean() for c in course_attribution.keys()],
    'Avg_Grade': [grades[c].mean() for c in course_attribution.keys()]
})
report = report.sort_values('Impact', ascending=False)
report.to_csv('course_effectiveness.csv', index=False)

# Connections
connections_df = pd.DataFrame(connections, 
                             columns=['Course1', 'Course2', 'Correlation'])
connections_df.to_csv('interdisciplinary_connections.csv', index=False)

print('\n' + '='*70)
print('ANALYSIS COMPLETE')
print('='*70)
print('Saved: course_effectiveness.csv')
print('Saved: interdisciplinary_connections.csv')
print('Saved: education_attribution.png')