In [None]:
print('Setup complete.')

# Lab 10: AI-Powered Package Refactoring

## Learning Objectives
- Apply AI assistance to refactor legacy monolithic code
- Design modular package architecture
- Implement systematic code transformation
- Create proper module separation and interfaces

## Lab Overview
Transform a legacy monolithic script into a well-structured package using AI guidance:
1. **Analysis Phase** - Understand existing code structure
2. **Design Phase** - Plan modular architecture with AI
3. **Implementation Phase** - Create individual modules
4. **Integration Phase** - Ensure everything works together

## Exit Ticket
- [ ] Legacy code analysis completed
- [ ] Modular package structure designed
- [ ] Individual modules implemented
- [ ] Integration tests passing

In [None]:
# Setup and imports
!pip install asksageclient pip_system_certs
from google.colab import drive
drive.mount('/content/drive')

import os
import json
import time
import tiktoken
from pathlib import Path
from typing import Dict, List, Any

# Import our AskSage client
from asksageclient import AskSageClient

# Get API credentials from Google Colab secrets
from google.colab import userdata
api_key = userdata.get('ASKSAGE_API_KEY')
email = userdata.get('ASKSAGE_EMAIL')

# Initialize client and tokenizer
client = AskSageClient(api_key=api_key, email=email)
tokenizer = tiktoken.encoding_for_model("gpt-4")
print("AskSage client initialized successfully")
print("Ready to showcase AI capabilities...")

In [None]:
# ================================
# 🔐 Cell 1 — Load secrets (Colab) + pricing + token utils
# ================================
import os, time, csv
from typing import Optional, Dict
import tiktoken

from google.colab import userdata

ASKSAGE_API_KEY = userdata.get("ASKSAGE_API_KEY")
ASKSAGE_BASE_URL = userdata.get("ASKSAGE_BASE_URL")
ASKSAGE_EMAIL = userdata.get("ASKSAGE_EMAIL")

assert ASKSAGE_API_KEY, "ASKSAGE_API_KEY not provided."
assert ASKSAGE_EMAIL, "ASKSAGE_EMAIL not provided."

print("✓ Secrets loaded")
print("  • EMAIL:", ASKSAGE_EMAIL)
print("  • BASE URL:", ASKSAGE_BASE_URL or "(default)")

# Pricing (USD per 1,000,000 tokens)
PRICES_PER_M = {
    "gpt-5": {"input_per_m": 1.25, "output_per_m": 10.00},
    "gpt-5-mini": {"input_per_m": 0.25, "output_per_m": 2.00},
}

# Tokenizer
enc = tiktoken.get_encoding("o200k_base")

def count_tokens(text: str) -> int:
    return len(enc.encode(text or ""))

def cost_usd(model: str, input_tokens: int, output_tokens: int) -> float:
    if model not in PRICES_PER_M:
        raise ValueError(f"Unknown model: {model}")
    r = PRICES_PER_M[model]
    return (input_tokens / 1_000_000) * r["input_per_m"] + (output_tokens / 1_000_000) * r["output_per_m"]

In [None]:
# Setup and imports
!pip install asksageclient pip_system_certs
from google.colab import drive
drive.mount('/content/drive')

import os
import json
import time
import tiktoken
from pathlib import Path
from typing import Dict, List, Any

# Import our AskSage client
from asksageclient import AskSageClient

# Get API credentials from Google Colab secrets
from google.colab import userdata
api_key = userdata.get('ASKSAGE_API_KEY')
email = userdata.get('ASKSAGE_EMAIL')

# Initialize client and tokenizer
client = AskSageClient(api_key=api_key, email=email)
tokenizer = tiktoken.encoding_for_model("gpt-4")
print("AskSage client initialized successfully")
print("Ready to showcase AI capabilities...")

In [None]:
import os
import ast
import json
from pathlib import Path
from typing import Dict, List, Any, Optional
from dataclasses import dataclass

import openai
from rich.console import Console
from rich.panel import Panel
from rich.syntax import Syntax
from rich.tree import Tree

console = Console()
print("🔄 Package Refactoring Lab loading...")

## Legacy Code Example

In [None]:
# Create a legacy monolithic script to refactor
legacy_code = '''
import os
import csv
import json
import sqlite3
from datetime import datetime
import smtplib
from email.mime.text import MIMEText
import requests
import matplotlib.pyplot as plt

# Global variables
DB_PATH = "sales.db"
API_KEY = "your-api-key"
EMAIL_CONFIG = {
    "smtp_server": "smtp.gmail.com",
    "port": 587,
    "username": "user@gmail.com",
    "password": "password"
}

def setup_database():
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    cursor.execute("""CREATE TABLE IF NOT EXISTS sales (
        id INTEGER PRIMARY KEY,
        product TEXT,
        quantity INTEGER,
        price REAL,
        date TEXT,
        customer TEXT
    )""")
    conn.commit()
    conn.close()

def import_csv_data(filename):
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    with open(filename, \'r\') as f:
        reader = csv.DictReader(f)
        for row in reader:
            cursor.execute(
                "INSERT INTO sales (product, quantity, price, date, customer) VALUES (?, ?, ?, ?, ?)",
                (row[\'product\'], int(row[\'quantity\']), float(row[\'price\']), row[\'date\'], row[\'customer\'])
            )
    conn.commit()
    conn.close()

def get_sales_data():
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM sales")
    data = cursor.fetchall()
    conn.close()
    return data

def calculate_metrics(data):
    total_revenue = sum(row[2] * row[3] for row in data)  # quantity * price
    total_orders = len(data)
    avg_order_value = total_revenue / total_orders if total_orders > 0 else 0
    return {
        "total_revenue": total_revenue,
        "total_orders": total_orders,
        "avg_order_value": avg_order_value
    }

def generate_report(metrics):
    report = f"""Sales Report - {datetime.now().strftime(\'%Y-%m-%d\')}
    
Total Revenue: ${metrics[\'total_revenue\']:.2f}
Total Orders: {metrics[\'total_orders\']}
Average Order Value: ${metrics[\'avg_order_value\']:.2f}
"""
    return report

def create_chart(data):
    products = {}
    for row in data:
        product = row[1]
        revenue = row[2] * row[3]
        products[product] = products.get(product, 0) + revenue
    
    plt.figure(figsize=(10, 6))
    plt.bar(products.keys(), products.values())
    plt.title(\'Revenue by Product\')
    plt.xlabel(\'Product\')
    plt.ylabel(\'Revenue\')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(\'sales_chart.png\')
    plt.close()

def send_email_report(report, recipient):
    msg = MIMEText(report)
    msg[\'Subject\'] = \'Sales Report\')
    msg[\'From\'] = EMAIL_CONFIG[\'username\']
    msg[\'To\'] = recipient
    
    server = smtplib.SMTP(EMAIL_CONFIG[\'smtp_server\'], EMAIL_CONFIG[\'port\'])
    server.starttls()
    server.login(EMAIL_CONFIG[\'username\'], EMAIL_CONFIG[\'password\'])
    server.send_message(msg)
    server.quit()

def sync_to_api(data):
    url = "https://api.example.com/sales"
    headers = {"Authorization": f"Bearer {API_KEY}"}
    
    for row in data:
        payload = {
            "product": row[1],
            "quantity": row[2],
            "price": row[3],
            "date": row[4],
            "customer": row[5]
        }
        response = requests.post(url, json=payload, headers=headers)
        if response.status_code != 200:
            print(f"Failed to sync record {row[0]}")

def main():
    setup_database()
    import_csv_data(\'sales_data.csv\')
    
    data = get_sales_data()
    metrics = calculate_metrics(data)
    report = generate_report(metrics)
    
    create_chart(data)
    send_email_report(report, \'manager@company.com\')
    sync_to_api(data)
    
    print("Sales processing complete!")

if __name__ == "__main__":
    main()
'''

# Save legacy code to file
Path("legacy_sales_processor.py").write_text(legacy_code)
console.print("📄 Legacy monolithic script created")

## AI Code Analyzer

In [None]:
class CodeAnalyzer:
    """Analyze legacy code structure and identify refactoring opportunities"""
    
    def __init__(self):
        self.setup_client()
    
    def setup_client(self):
        if os.getenv('OPENAI_API_KEY'):
            try:
                self.client = openai.OpenAI()
                self.has_api = True
                console.print("✅ OpenAI client configured")
            except Exception as e:
                self.has_api = False
                console.print(f"⚠️ Using mock analysis: {e}")
        else:
            self.has_api = False
            console.print("💡 No API key, using static analysis")
    
    def analyze_functions(self, code: str) -> Dict:
        """Extract function information from code"""
        try:
            tree = ast.parse(code)
            functions = []
            
            for node in ast.walk(tree):
                if isinstance(node, ast.FunctionDef):
                    functions.append({
                        'name': node.name,
                        'args': [arg.arg for arg in node.args.args],
                        'lineno': node.lineno
                    })
            
            return {'functions': functions}
        except SyntaxError as e:
            console.print(f"❌ Syntax error in code: {e}")
            return {'functions': []}

# TODO: Implement AI-powered refactoring suggestions
    def get_refactor_plan(self, code: str) -> Dict:
        """Get AI suggestions for refactoring the code into modules"""
        
        # TODO: Create a prompt that analyzes the code and suggests module structure
        # TODO: Ask AI to identify:
        # - Related functions that should be grouped together
        # - Appropriate module names
        # - Dependencies between modules
        # - Configuration management approach
        
        # HINT: Use a structured prompt asking for JSON output
        # HINT: Include the code in the prompt for analysis
        
        if self.has_api:
            # TODO: Implement API call here
            pass
        
        # Mock refactor plan for testing
        return {
            'modules': [
                {
                    'name': 'database',
                    'functions': ['setup_database', 'import_csv_data', 'get_sales_data'],
                    'description': 'Database operations and data access'
                },
                {
                    'name': 'analytics',
                    'functions': ['calculate_metrics', 'generate_report'],
                    'description': 'Sales data analysis and reporting'
                },
                {
                    'name': 'visualization',
                    'functions': ['create_chart'],
                    'description': 'Data visualization and charting'
                },
                {
                    'name': 'communication',
                    'functions': ['send_email_report'],
                    'description': 'Email and notification services'
                },
                {
                    'name': 'integration',
                    'functions': ['sync_to_api'],
                    'description': 'External API integration'
                }
            ],
            'config_module': 'config',
            'main_module': 'main'
        }

# Initialize analyzer
analyzer = CodeAnalyzer()
print("🔍 Code analyzer ready!")

## Task 1: Analyze Legacy Code

In [None]:
# Analyze the legacy code
legacy_code = Path("legacy_sales_processor.py").read_text()

# Extract function information
function_info = analyzer.analyze_functions(legacy_code)
refactor_plan = analyzer.get_refactor_plan(legacy_code)

# Display analysis results
console.print("\n[bold blue]📊 Code Analysis Results[/bold blue]")
console.print(f"Functions found: {len(function_info['functions'])}")

# Show refactoring plan
tree = Tree("🏗️ Refactoring Plan")
for module in refactor_plan['modules']:
    module_branch = tree.add(f"📦 {module['name']}.py")
    module_branch.add(f"[italic]{module['description']}[/italic]")
    for func in module['functions']:
        module_branch.add(f"⚡ {func}()")

console.print(tree)
print("\n✅ Analysis complete - ready for refactoring!")

## Module Generator

In [None]:
class ModuleGenerator:
    """Generate individual modules from refactoring plan"""
    
    def __init__(self, analyzer: CodeAnalyzer):
        self.analyzer = analyzer
        self.package_dir = Path("refactored_sales")
        self.package_dir.mkdir(exist_ok=True)
    
    def create_config_module(self) -> str:
        """Generate configuration module"""
        config_code = '''"""Configuration settings for sales processing system"""
import os
from dataclasses import dataclass
from typing import Dict

@dataclass
class DatabaseConfig:
    """Database configuration"""
    path: str = "sales.db"

@dataclass  
class EmailConfig:
    """Email configuration"""
    smtp_server: str = "smtp.gmail.com"
    port: int = 587
    username: str = os.getenv("EMAIL_USERNAME", "")
    password: str = os.getenv("EMAIL_PASSWORD", "")

@dataclass
class APIConfig:
    """API configuration"""
    key: str = os.getenv("API_KEY", "")
    base_url: str = "https://api.example.com"

# Global configuration instances
db_config = DatabaseConfig()
email_config = EmailConfig()
api_config = APIConfig()
'''
        return config_code

# TODO: Implement generate_module method
    def generate_module(self, module_name: str, functions: List[str], original_code: str) -> str:
        """Generate a specific module with selected functions"""
        
        # TODO: Extract specified functions from original code
        # TODO: Add proper imports and docstrings
        # TODO: Update function signatures to use config objects
        # TODO: Add error handling and logging
        
        # HINT: Use AST parsing to extract specific functions
        # HINT: Generate imports based on what each function needs
        
        pass  # Replace with your implementation

    def create_package_structure(self, refactor_plan: Dict, original_code: str):
        """Create the complete refactored package"""
        
        # Create __init__.py
        init_file = self.package_dir / "__init__.py"
        init_file.write_text('"""Refactored Sales Processing Package"""\n')
        
        # Create config module
        config_file = self.package_dir / "config.py"
        config_file.write_text(self.create_config_module())
        
        # Generate each module
        for module_info in refactor_plan['modules']:
            module_code = self.generate_module(
                module_info['name'], 
                module_info['functions'], 
                original_code
            )
            
            if module_code:  # Only create if generation succeeded
                module_file = self.package_dir / f"{module_info['name']}.py"
                module_file.write_text(module_code)
                console.print(f"✅ Created {module_file}")

# Initialize generator
generator = ModuleGenerator(analyzer)
print("🏗️ Module generator ready!")

## Task 2: Implement Module Generation

Complete the `generate_module` method to extract functions from the original code and create proper modules.

In [None]:
# TODO: Test your module generation
# generator.create_package_structure(refactor_plan, legacy_code)
# 
# # List created files
# package_files = list(generator.package_dir.glob("*.py"))
# console.print(f"\n📦 Created {len(package_files)} module files:")
# for file in package_files:
#     console.print(f"  - {file.name}")

pass  # Replace with your test code

## Task 3: Create Integration Tests

In [None]:
# TODO: Create integration tests for the refactored package
# Tests should verify:
# 1. All modules can be imported successfully
# 2. Functions work with sample data
# 3. Module interfaces are compatible
# 4. Configuration system works properly

def test_refactored_package():
    """Test the refactored package functionality"""
    
    # TODO: Import all modules and test basic functionality
    # TODO: Create sample data and test end-to-end workflow
    # TODO: Verify that refactored code produces same results as original
    
    pass  # Replace with your test implementation

print("🧪 Test framework ready (implement tests above)")

## Extension Ideas

🚀 **Advanced Refactoring Features:**

1. **Dependency Analysis**: Map dependencies between functions
2. **Interface Generation**: Create abstract base classes
3. **Documentation**: Auto-generate module documentation
4. **Type Hints**: Add proper type annotations
5. **Testing**: Generate unit tests for each module
6. **Performance**: Profile and optimize refactored code
7. **Packaging**: Create setup.py and distribution files

## Deliverable Checklist

- [ ] Legacy code analysis with function extraction
- [ ] AI-generated modular architecture plan
- [ ] Individual modules with proper separation of concerns
- [ ] Configuration management system
- [ ] Integration tests verifying functionality
- [ ] Package structure with __init__.py files

**Bonus Points:**
- [ ] Type hints and documentation
- [ ] Error handling and logging improvements
- [ ] Performance optimizations
- [ ] CLI interface for the refactored package