# Day 5 Module 1: Core Quantum ML Foundations 🧬

## **Welcome to Day 5 - Quantum Machine Learning Integration!**

This is **Module 1 of 3** for Day 5, focusing on fundamental quantum ML concepts and the QM9 dataset.

### **📍 Module Navigation:**
- **Current**: Module 1 - Core Quantum ML Foundations (this notebook)
- **Next**: Module 2 - Advanced Quantum ML Architectures
- **Final**: Module 3 - Production Integration & Applications

### **Module 1 Learning Objectives:**
- Master the QM9 dataset and quantum property prediction
- Implement basic SchNet architecture foundations
- Understand quantum feature engineering principles
- Complete foundational assessment checkpoints

### **Prerequisites from Previous Days:**
- Day 1: ML & Cheminformatics foundations
- Day 2: Deep learning for molecules
- Day 3: Molecular analysis pipelines
- Day 4: Quantum chemistry calculations

---

## **Section 1: QM9 Dataset Mastery & Quantum Feature Engineering** 🧬

Let's start by mastering the QM9 dataset - one of the most important quantum ML benchmarks!

In [None]:
# Essential imports for Quantum ML
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple, Optional, Union, Any
import warnings
warnings.filterwarnings('ignore')

# Core scientific computing
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch_geometric.data import Data, Batch
from torch_geometric.nn import MessagePassing, global_add_pool, global_mean_pool
from torch_geometric.utils import add_self_loops, degree

# Chemistry and quantum computing
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors, rdMolDescriptors
import deepchem as dc
from ase import Atoms
from ase.io import read, write

# ML and optimization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import optuna

# Visualization and analysis
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import joblib
import pickle
from pathlib import Path
import json
import time
from datetime import datetime
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("🎯 Quantum ML Integration Environment Ready!")
print(f"📊 PyTorch version: {torch.__version__}")
print(f"🧪 RDKit available: {Chem is not None}")
print(f"🔬 DeepChem version: {dc.__version__}")

In [None]:
# 🎓 **MODULE 1 ASSESSMENT FRAMEWORK INITIALIZATION**

print("🎓 MODULE 1 ASSESSMENT FRAMEWORK INITIALIZATION")
print("="*70)

try:
    from assessment_framework import create_assessment, create_widget, create_dashboard
    print("✅ Assessment framework loaded successfully")
except ImportError:
    print("⚠️ Assessment framework not found. Please ensure assessment_framework.py is available.")
    print("📁 Expected location: same directory as this notebook")
    # Create a basic assessment object for fallback
    class BasicAssessment:
        def start_section(self, section): pass
        def end_section(self, section): pass
        def record_activity(self, activity, result, metadata=None): pass
        def get_progress_summary(self): return {"overall_score": 0.0, "section_scores": {}}
        def get_comprehensive_report(self): return {"activities": []}
        def save_final_report(self, filename): pass
    
    class BasicWidget:
        def display(self): print("📋 Assessment widget would appear here")
    
    def create_assessment(student_id, day=5, track="quantum_ml"):
        return BasicAssessment()
    
    def create_widget(assessment, section, concepts, activities):
        return BasicWidget()
    
    def create_dashboard(assessment):
        return BasicWidget()

# Student Information Collection
print("\n📝 Student Assessment Setup:")
student_id = input("Enter your student ID: ").strip()
if not student_id:
    student_id = f"student_day5_mod1_{np.random.randint(1000, 9999)}"
    print(f"Generated ID: {student_id}")

# Track Selection for Day 5 Module 1
print("\n🎯 Module 1 Focus: Core Quantum ML Foundations")
print("   • QM9 dataset mastery")
print("   • Quantum feature engineering")
print("   • Basic SchNet concepts")

# Initialize Assessment System
try:
    assessment = create_assessment(student_id=student_id, day=5, track="quantum_ml_foundations")
    print(f"✅ Module 1 assessment initialized")
    print(f"👤 Student ID: {student_id}")
    
    # Start Module 1 assessment
    assessment.start_section("day_5_module_1_foundations")
    print("\n🎯 Day 5 Module 1 Assessment: Core Quantum ML Foundations")
    print("📊 Progress tracking enabled - All activities will be recorded")
    
except Exception as e:
    print(f"⚠️ Assessment initialization warning: {e}")
    assessment = None

print("\n" + "="*70)
print("🚀 Ready to begin Module 1: Core Quantum ML Foundations!")
print("="*70)

### **1.1 QM9 Dataset Handler - Professional Implementation**

In [None]:
class QM9DatasetHandler:
    """
    Professional QM9 dataset handler with advanced preprocessing capabilities.
    
    The QM9 dataset contains ~134k small organic molecules with quantum chemical properties
    computed at the B3LYP/6-31G(2df,p) level of theory.
    """
    
    def __init__(self, cache_dir: str = "./qm9_cache"):
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(exist_ok=True)
        
        # QM9 property definitions with units and descriptions
        self.qm9_properties = {
            'mu': {'name': 'Dipole moment', 'unit': 'Debye', 'index': 0},
            'alpha': {'name': 'Polarizability', 'unit': 'Bohr^3', 'index': 1},
            'homo': {'name': 'HOMO energy', 'unit': 'Hartree', 'index': 2},
            'lumo': {'name': 'LUMO energy', 'unit': 'Hartree', 'index': 3},
            'gap': {'name': 'HOMO-LUMO gap', 'unit': 'Hartree', 'index': 4},
            'r2': {'name': 'Electronic spatial extent', 'unit': 'Bohr^2', 'index': 5},
            'zpve': {'name': 'Zero-point vibrational energy', 'unit': 'Hartree', 'index': 6},
            'u0': {'name': 'Internal energy at 0K', 'unit': 'Hartree', 'index': 7},
            'u298': {'name': 'Internal energy at 298K', 'unit': 'Hartree', 'index': 8},
            'h298': {'name': 'Enthalpy at 298K', 'unit': 'Hartree', 'index': 9},
            'g298': {'name': 'Free energy at 298K', 'unit': 'Hartree', 'index': 10},
            'cv': {'name': 'Heat capacity at 298K', 'unit': 'cal/(mol*K)', 'index': 11}
        }
        
        self.data = None
        self.molecular_graphs = []
        self.statistics = {}
        
    def load_qm9_dataset(self, subset_size: Optional[int] = None) -> pd.DataFrame:
        """
        Load and preprocess QM9 dataset with caching.
        """
        cache_file = self.cache_dir / f"qm9_processed_{subset_size or 'full'}.pkl"
        
        if cache_file.exists():
            logger.info(f"Loading cached QM9 data from {cache_file}")
            with open(cache_file, 'rb') as f:
                self.data = pickle.load(f)
            return self.data
        
        logger.info("Loading QM9 dataset from DeepChem...")
        try:
            # Load QM9 dataset using DeepChem
            qm9_loader = dc.molnet.load_qm9(featurizer='ECFP', split='random')
            train, valid, test = qm9_loader[0]
            
            # Combine all data
            all_smiles = np.concatenate([train[0], valid[0], test[0]])
            all_properties = np.concatenate([train[1], valid[1], test[1]])
            
            # Create DataFrame
            property_names = list(self.qm9_properties.keys())
            
            data_dict = {'smiles': all_smiles}
            for i, prop in enumerate(property_names):
                data_dict[prop] = all_properties[:, i]
            
            self.data = pd.DataFrame(data_dict)
            
            # Apply subset if requested
            if subset_size and subset_size < len(self.data):
                self.data = self.data.sample(n=subset_size, random_state=42).reset_index(drop=True)
            
            # Cache the processed data
            with open(cache_file, 'wb') as f:
                pickle.dump(self.data, f)
            
            logger.info(f"QM9 dataset loaded: {len(self.data)} molecules")
            return self.data
            
        except Exception as e:
            logger.error(f"Error loading QM9 dataset: {e}")
            # Fallback: create synthetic QM9-like data for demonstration
            return self._create_synthetic_qm9(subset_size or 1000)
    
    def _create_synthetic_qm9(self, n_samples: int = 1000) -> pd.DataFrame:
        """
        Create synthetic QM9-like data for demonstration purposes.
        """
        logger.warning("Creating synthetic QM9-like data for demonstration")
        
        # Generate simple organic molecules
        simple_smiles = [
            'C', 'CC', 'CCC', 'CCCC', 'CCCCC',  # Alkanes
            'C=C', 'CC=C', 'C=CC=C',  # Alkenes
            'C#C', 'CC#C',  # Alkynes
            'c1ccccc1', 'Cc1ccccc1',  # Aromatics
            'CO', 'CCO', 'CCCO',  # Alcohols
            'C=O', 'CC=O', 'CCC=O',  # Aldehydes/Ketones
            'CN', 'CCN', 'CCCN',  # Amines
        ]
        
        np.random.seed(42)
        smiles_list = np.random.choice(simple_smiles, n_samples)
        
        # Generate synthetic properties with realistic ranges
        data_dict = {'smiles': smiles_list}
        
        # Realistic property ranges based on QM9 statistics
        property_ranges = {
            'mu': (0, 5),  # Debye
            'alpha': (10, 100),  # Bohr^3
            'homo': (-0.3, -0.1),  # Hartree
            'lumo': (-0.1, 0.1),  # Hartree
            'gap': (0.05, 0.3),  # Hartree
            'r2': (20, 200),  # Bohr^2
            'zpve': (0.01, 0.3),  # Hartree
            'u0': (-500, -100),  # Hartree
            'u298': (-500, -100),  # Hartree
            'h298': (-500, -100),  # Hartree
            'g298': (-500, -100),  # Hartree
            'cv': (5, 50)  # cal/(mol*K)
        }
        
        for prop, (low, high) in property_ranges.items():
            data_dict[prop] = np.random.uniform(low, high, n_samples)
        
        self.data = pd.DataFrame(data_dict)
        return self.data
    
    def compute_statistics(self) -> Dict[str, Any]:
        """
        Compute comprehensive statistics for QM9 properties.
        """
        if self.data is None:
            raise ValueError("No data loaded. Call load_qm9_dataset first.")
        
        stats = {}
        
        for prop in self.qm9_properties.keys():
            if prop in self.data.columns:
                values = self.data[prop].values
                stats[prop] = {
                    'mean': np.mean(values),
                    'std': np.std(values),
                    'min': np.min(values),
                    'max': np.max(values),
                    'median': np.median(values),
                    'q25': np.percentile(values, 25),
                    'q75': np.percentile(values, 75),
                    'skewness': self._compute_skewness(values),
                    'kurtosis': self._compute_kurtosis(values)
                }
        
        self.statistics = stats
        return stats
    
    def _compute_skewness(self, values: np.ndarray) -> float:
        """Compute skewness of the distribution."""
        mean = np.mean(values)
        std = np.std(values)
        return np.mean(((values - mean) / std) ** 3)
    
    def _compute_kurtosis(self, values: np.ndarray) -> float:
        """Compute kurtosis of the distribution."""
        mean = np.mean(values)
        std = np.std(values)
        return np.mean(((values - mean) / std) ** 4) - 3
    
    def visualize_property_distributions(self, properties: Optional[List[str]] = None):
        """
        Create comprehensive visualization of QM9 property distributions.
        """
        if self.data is None:
            raise ValueError("No data loaded. Call load_qm9_dataset first.")
        
        if properties is None:
            properties = list(self.qm9_properties.keys())
        
        # Filter available properties
        available_props = [p for p in properties if p in self.data.columns]
        
        n_props = len(available_props)
        n_cols = 3
        n_rows = (n_props + n_cols - 1) // n_cols
        
        fig = make_subplots(
            rows=n_rows, cols=n_cols,
            subplot_titles=[f"{prop} ({self.qm9_properties[prop]['unit']})" 
                          for prop in available_props],
            vertical_spacing=0.1
        )
        
        for i, prop in enumerate(available_props):
            row = i // n_cols + 1
            col = i % n_cols + 1
            
            values = self.data[prop].values
            
            fig.add_trace(
                go.Histogram(
                    x=values,
                    name=prop,
                    nbinsx=50,
                    showlegend=False,
                    marker_color=px.colors.qualitative.Set3[i % len(px.colors.qualitative.Set3)]
                ),
                row=row, col=col
            )
        
        fig.update_layout(
            title="QM9 Property Distributions",
            height=300 * n_rows,
            showlegend=False
        )
        
        fig.show()
        
        return fig

# Initialize QM9 handler
qm9_handler = QM9DatasetHandler()
print("\n🎯 QM9 Dataset Handler initialized!")
print("📊 Ready to load and analyze quantum chemical properties")

### **1.2 Load and Explore QM9 Dataset**

In [None]:
# Load QM9 dataset (using subset for faster processing)
print("Loading QM9 dataset...")
qm9_data = qm9_handler.load_qm9_dataset(subset_size=2000)  # Start with 2k molecules for Module 1

print(f"\n📊 QM9 Dataset Overview:")
print(f"   • Total molecules: {len(qm9_data)}")
print(f"   • Properties: {len(qm9_handler.qm9_properties)}")
print(f"   • Data shape: {qm9_data.shape}")

# Display first few rows
print("\n🔍 Sample data:")
display(qm9_data.head())

# Compute and display statistics
print("\nComputing property statistics...")
stats = qm9_handler.compute_statistics()

# Create statistics summary table
stats_df = pd.DataFrame({
    prop: {
        'Mean': f"{data['mean']:.4f}",
        'Std': f"{data['std']:.4f}",
        'Min': f"{data['min']:.4f}",
        'Max': f"{data['max']:.4f}",
        'Unit': qm9_handler.qm9_properties[prop]['unit']
    }
    for prop, data in stats.items()
}).T

print("\n📈 QM9 Property Statistics:")
display(stats_df)

# Visualize property distributions
print("\nGenerating property distribution plots...")
fig = qm9_handler.visualize_property_distributions(['mu', 'alpha', 'homo', 'lumo', 'gap', 'cv'])

print("\n✅ QM9 dataset successfully loaded and analyzed!")

### **1.3 Basic Quantum Feature Engineering**

In [None]:
class BasicQuantumFeatureEngineer:
    """
    Basic quantum feature engineering for Module 1 - focusing on core concepts.
    """
    
    def __init__(self):
        self.feature_cache = {}
        self.scalers = {}
        
    def extract_basic_features(self, smiles_list: List[str]) -> Dict[str, np.ndarray]:
        """
        Extract basic molecular features for quantum property prediction.
        """
        features = {
            'constitutional': [],
            'electronic': [],
            'aromatic': []
        }
        
        valid_molecules = []
        
        for smiles in smiles_list:
            mol = Chem.MolFromSmiles(smiles)
            if mol is None:
                continue
                
            valid_molecules.append(smiles)
            
            # Constitutional descriptors (basic structure)
            const_features = [
                mol.GetNumAtoms(),
                mol.GetNumBonds(),
                mol.GetNumHeavyAtoms(),
                Descriptors.MolWt(mol),
                Descriptors.NumHeteroatoms(mol),
                Descriptors.NumRotatableBonds(mol)
            ]
            features['constitutional'].append(const_features)
            
            # Electronic descriptors (quantum-relevant)
            elec_features = [
                Descriptors.NumValenceElectrons(mol),
                sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6),  # Carbon count
                sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 7),  # Nitrogen count
                sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 8),  # Oxygen count
            ]
            features['electronic'].append(elec_features)
            
            # Aromatic descriptors (important for HOMO/LUMO)
            # Calculate FractionCsp3 manually if not available
            try:
                fraction_csp3 = Descriptors.FractionCsp3(mol) if Descriptors.FractionCsp3(mol) is not None else 0.0
            except AttributeError:
                # Manual calculation: ratio of sp3 carbons to total carbons
                sp3_carbons = sum(1 for atom in mol.GetAtoms() 
                                if atom.GetAtomicNum() == 6 and atom.GetHybridization() == Chem.HybridizationType.SP3)
                total_carbons = sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
                fraction_csp3 = sp3_carbons / total_carbons if total_carbons > 0 else 0.0
            
            aromatic_features = [
                Descriptors.NumAromaticRings(mol),
                sum(1 for atom in mol.GetAtoms() if atom.GetIsAromatic()),
                sum(1 for bond in mol.GetBonds() if bond.GetIsAromatic()),
                fraction_csp3
            ]
            features['aromatic'].append(aromatic_features)
        
        # Convert to numpy arrays
        for key in features:
            if features[key]:
                features[key] = np.array(features[key])
            else:
                features[key] = np.array([]).reshape(0, 0)
        
        self.valid_molecules = valid_molecules
        return features
    
    def create_feature_matrix(self, features_dict: Dict[str, np.ndarray]) -> Tuple[np.ndarray, List[str]]:
        """
        Combine all feature types into a single matrix.
        """
        feature_arrays = []
        feature_names = []
        
        # Constitutional features
        if features_dict['constitutional'].size > 0:
            feature_arrays.append(features_dict['constitutional'])
            feature_names.extend(['num_atoms', 'num_bonds', 'num_heavy_atoms', 
                                'mol_weight', 'num_heteroatoms', 'num_rotatable_bonds'])
        
        # Electronic features
        if features_dict['electronic'].size > 0:
            feature_arrays.append(features_dict['electronic'])
            feature_names.extend(['num_valence_electrons', 'carbon_count', 
                                'nitrogen_count', 'oxygen_count'])
        
        # Aromatic features
        if features_dict['aromatic'].size > 0:
            feature_arrays.append(features_dict['aromatic'])
            feature_names.extend(['num_aromatic_rings', 'aromatic_atoms', 
                                'aromatic_bonds', 'fraction_csp3'])
        
        if feature_arrays:
            combined_features = np.hstack(feature_arrays)
        else:
            combined_features = np.array([]).reshape(0, 0)
        
        return combined_features, feature_names
    
    def scale_features(self, features: np.ndarray) -> np.ndarray:
        """
        Scale features using standard scaling.
        """
        if 'standard' not in self.scalers:
            self.scalers['standard'] = StandardScaler()
        
        scaler = self.scalers['standard']
        
        if not hasattr(scaler, 'mean_'):
            scaled_features = scaler.fit_transform(features)
        else:
            scaled_features = scaler.transform(features)
        
        return scaled_features

# Initialize basic feature engineer
basic_feature_engineer = BasicQuantumFeatureEngineer()
print("\n🎯 Basic Quantum Feature Engineer initialized!")
print("🔬 Ready to extract core molecular features")

### **1.4 Extract Features and Build Baseline Model**

In [None]:
# Extract basic molecular features
print("Extracting basic molecular features...")
smiles_list = qm9_data['smiles'].tolist()

start_time = time.time()
basic_features_dict = basic_feature_engineer.extract_basic_features(smiles_list)
feature_extraction_time = time.time() - start_time

print(f"\n⏱️ Feature extraction completed in {feature_extraction_time:.2f} seconds")

# Display feature statistics
for feature_type, features in basic_features_dict.items():
    if features.size > 0:
        print(f"   • {feature_type}: {features.shape[1]} features, {features.shape[0]} molecules")

# Create combined feature matrix
basic_feature_matrix, basic_feature_names = basic_feature_engineer.create_feature_matrix(basic_features_dict)

print(f"\n📊 Basic Feature Matrix:")
print(f"   • Shape: {basic_feature_matrix.shape}")
print(f"   • Features: {basic_feature_names}")

# Scale features
print("\nScaling features...")
scaled_basic_features = basic_feature_engineer.scale_features(basic_feature_matrix)

# Record the feature extraction in assessment
if assessment:
    assessment.record_activity(
        "quantum_feature_engineering", 
        "completed",
        {
            "feature_types": list(basic_features_dict.keys()),
            "total_features": len(basic_feature_names),
            "molecules_processed": len(basic_feature_engineer.valid_molecules),
            "processing_time": feature_extraction_time
        }
    )

print(f"✅ Basic features extracted and scaled successfully!")

### **1.5 Baseline Quantum Property Prediction**

In [None]:
# Align QM9 data with valid molecules
valid_indices = [i for i, smiles in enumerate(qm9_data['smiles']) 
                if smiles in basic_feature_engineer.valid_molecules]
aligned_qm9_data = qm9_data.iloc[valid_indices].reset_index(drop=True)

print(f"Aligned dataset: {len(aligned_qm9_data)} molecules")

# Focus on key quantum properties for Module 1
key_properties = ['homo', 'lumo', 'gap', 'mu']
available_properties = [p for p in key_properties if p in aligned_qm9_data.columns]

print(f"\n🎯 Predicting key quantum properties: {available_properties}")

# Train simple baseline models
baseline_results = {}

for prop in available_properties:
    print(f"\n📊 Training baseline model for {prop}...")
    
    # Get target values
    y_values = aligned_qm9_data[prop].values
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        scaled_basic_features, y_values, test_size=0.2, random_state=42
    )
    
    # Train Random Forest
    rf_model = RandomForestRegressor(
        n_estimators=50,  # Reduced for faster training in Module 1
        max_depth=10,
        random_state=42,
        n_jobs=-1
    )
    
    rf_model.fit(X_train, y_train)
    rf_pred = rf_model.predict(X_test)
    
    # Compute metrics
    mae = mean_absolute_error(y_test, rf_pred)
    rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
    r2 = r2_score(y_test, rf_pred)
    
    baseline_results[prop] = {
        'mae': mae,
        'rmse': rmse,
        'r2': r2,
        'model': rf_model,
        'y_test': y_test,
        'y_pred': rf_pred
    }
    
    print(f"   • MAE: {mae:.4f}")
    print(f"   • RMSE: {rmse:.4f}")
    print(f"   • R²: {r2:.4f}")

# Create simple prediction plot
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=available_properties[:4],
    vertical_spacing=0.1
)

for i, prop in enumerate(available_properties[:4]):
    row = i // 2 + 1
    col = i % 2 + 1
    
    results = baseline_results[prop]
    y_test = results['y_test']
    y_pred = results['y_pred']
    
    # Scatter plot
    fig.add_trace(
        go.Scatter(
            x=y_test,
            y=y_pred,
            mode='markers',
            name=prop,
            showlegend=False,
            marker=dict(size=4, opacity=0.6)
        ),
        row=row, col=col
    )
    
    # Perfect prediction line
    min_val = min(y_test.min(), y_pred.min())
    max_val = max(y_test.max(), y_pred.max())
    
    fig.add_trace(
        go.Scatter(
            x=[min_val, max_val],
            y=[min_val, max_val],
            mode='lines',
            line=dict(dash='dash', color='red'),
            showlegend=False
        ),
        row=row, col=col
    )
    
    fig.update_xaxes(title_text="Actual", row=row, col=col)
    fig.update_yaxes(title_text="Predicted", row=row, col=col)

fig.update_layout(
    title="Module 1 Baseline Model Performance",
    height=600,
    showlegend=False
)

fig.show()

print("\n📊 Module 1 Baseline Results Summary:")
print("=" * 50)
for prop, results in baseline_results.items():
    prop_info = qm9_handler.qm9_properties[prop]
    print(f"{prop_info['name']:25} | MAE: {results['mae']:.4f} | R²: {results['r2']:.3f}")

print("\n✅ Module 1 Complete: Core Quantum ML Foundations")

## **📋 Module 1 Assessment & Completion**

In [None]:
# 📋 MODULE 1 CHECKPOINT ASSESSMENT
print("\n" + "="*80)
print("📋 MODULE 1 CHECKPOINT ASSESSMENT: Core Quantum ML Foundations")
print("="*80)

if assessment:
    # Record module completion
    assessment.record_activity(
        "module_1_completion", 
        "completed",
        {
            "module": "Core Quantum ML Foundations", 
            "dataset_size": len(aligned_qm9_data),
            "features_extracted": len(basic_feature_names),
            "properties_modeled": len(baseline_results),
            "timestamp": datetime.now().isoformat()
        }
    )

# Create assessment widget for Module 1
module1_widget = create_widget(
    assessment=assessment,
    section="Module 1: Core Quantum ML Foundations",
    concepts=[
        "QM9 dataset structure and loading",
        "Quantum chemical property understanding (HOMO, LUMO, gap, dipole)", 
        "Basic molecular feature extraction",
        "Feature scaling and preprocessing",
        "Baseline model training and evaluation",
        "Performance metrics interpretation"
    ],
    activities=[
        "Successfully loaded QM9 dataset subset",
        "Analyzed quantum chemical property distributions",
        "Implemented basic feature engineering pipeline",
        "Created and evaluated baseline prediction models",
        "Interpreted model performance for quantum properties"
    ]
)

# Display the interactive assessment
module1_widget.display()

# Progress tracking
if assessment:
    progress = assessment.get_progress_summary()
    print(f"\n📊 Module 1 Progress: {progress['overall_score']:.1f}%")

print("\n🎯 Module 1 Complete! Ready for Module 2: Advanced Quantum ML Architectures")
print("\n" + "="*80)
print("📍 NEXT STEPS:")
print("   📖 Continue to: day_05_module_2_advanced.ipynb")
print("   🎯 Focus: SchNet implementation and advanced architectures")
print("   💡 Build on: Foundation concepts from this module")
print("="*80)