# MPM Comparison and Migration

This notebook provides tools to:
1. Compare MPM deployments to find common and unique report actions
2. Migrate (copy) report actions between deployments with proper attribute adjustments

## Key Considerations:
- Deployment-specific attributes: `domain_code`, `deployment_version`, `deployment_code` (schema name)
- Communities lists need adjustment for target deployment
- Schedules differ between domains
- Parent dependencies need validation

In [None]:
# Imports
import pandas as pd
import sqlite3
import json
from pathlib import Path
from typing import Dict, List, Set, Tuple
from dataclasses import dataclass

# Setup paths
BASE_DIR = Path.cwd()
DB_PATH = BASE_DIR / "resources" / "meta-db" / "schema-sentinel.db"

print(f"Database: {DB_PATH}")
print(f"Exists: {DB_PATH.exists()}")

In [None]:
# Load all data from SQLite
conn = sqlite3.connect(str(DB_PATH))

deployments_df = pd.read_sql_query("SELECT * FROM mpm_deployments", conn)
communities_df = pd.read_sql_query("SELECT * FROM mpm_communities", conn)
sensor_actions_df = pd.read_sql_query("SELECT * FROM mpm_sensor_actions", conn)
report_actions_df = pd.read_sql_query("SELECT * FROM mpm_report_actions", conn)

conn.close()

print("Data loaded:")
print(f"  Deployments: {len(deployments_df)} rows")
print(f"  Communities: {len(communities_df)} rows")
print(f"  Sensor Actions: {len(sensor_actions_df)} rows")
print(f"  Report Actions: {len(report_actions_df)} rows")

print("\nAvailable domains:")
print(deployments_df[['DOMAIN_CODE', 'DEPLOYMENT_VERSION']].to_string(index=False))

## 1. Compare Report Actions Between Domains

In [None]:
def compare_report_actions(domain1: str, domain2: str, df: pd.DataFrame) -> Dict:
    """
    Compare report actions between two domains.

    Returns:
        Dictionary with:
        - common: action_codes in both domains
        - only_in_domain1: action_codes only in domain1
        - only_in_domain2: action_codes only in domain2
        - common_details: DataFrame with side-by-side comparison
    """
    df1 = df[df['DOMAIN_CODE'] == domain1].copy()
    df2 = df[df['DOMAIN_CODE'] == domain2].copy()

    actions1 = set(df1['ACTION_CODE'].unique())
    actions2 = set(df2['ACTION_CODE'].unique())

    common = actions1 & actions2
    only_in_1 = actions1 - actions2
    only_in_2 = actions2 - actions1

    # Create side-by-side comparison for common actions
    common_details = []
    for action_code in sorted(common):
        row1 = df1[df1['ACTION_CODE'] == action_code].iloc[0]
        row2 = df2[df2['ACTION_CODE'] == action_code].iloc[0]

        common_details.append({
            'ACTION_CODE': action_code,
            f'{domain1}_REPORT_NAME': row1['REPORT_NAME'],
            f'{domain2}_REPORT_NAME': row2['REPORT_NAME'],
            f'{domain1}_COMMUNITIES': row1['COMMUNITIES'],
            f'{domain2}_COMMUNITIES': row2['COMMUNITIES'],
            f'{domain1}_SCHEDULE': row1['SCHEDULE'],
            f'{domain2}_SCHEDULE': row2['SCHEDULE'],
            'NAMES_MATCH': row1['REPORT_NAME'] == row2['REPORT_NAME'],
            'SCHEDULES_MATCH': row1['SCHEDULE'] == row2['SCHEDULE'],
        })

    return {
        'common': sorted(common),
        'only_in_domain1': sorted(only_in_1),
        'only_in_domain2': sorted(only_in_2),
        'common_details': pd.DataFrame(common_details),
        'count_common': len(common),
        'count_only_domain1': len(only_in_1),
        'count_only_domain2': len(only_in_2),
    }

# Example: Compare AZ and BS
comparison = compare_report_actions('AZ', 'BS', report_actions_df)

print(f"\n{'='*80}")
print(f"Comparison: AZ vs BS")
print(f"{'='*80}")
print(f"Common report actions: {comparison['count_common']}")
print(f"Only in AZ: {comparison['count_only_domain1']}")
print(f"Only in BS: {comparison['count_only_domain2']}")

print(f"\nCommon actions:")
for action in comparison['common'][:10]:  # Show first 10
    print(f"  - {action}")
if len(comparison['common']) > 10:
    print(f"  ... and {len(comparison['common']) - 10} more")

print(f"\nOnly in AZ (sample):")
for action in comparison['only_in_domain1'][:10]:
    print(f"  - {action}")
if len(comparison['only_in_domain1']) > 10:
    print(f"  ... and {len(comparison['only_in_domain1']) - 10} more")

print(f"\nOnly in BS (sample):")
for action in comparison['only_in_domain2'][:10]:
    print(f"  - {action}")
if len(comparison['only_in_domain2']) > 10:
    print(f"  ... and {len(comparison['only_in_domain2']) - 10} more")

In [None]:
# View detailed comparison for common actions
print("Common actions - detailed comparison:")
print("\nActions with different names:")
diff_names = comparison['common_details'][~comparison['common_details']['NAMES_MATCH']]
print(diff_names[['ACTION_CODE', 'AZ_REPORT_NAME', 'BS_REPORT_NAME']].to_string(index=False))

print("\nActions with different schedules:")
diff_schedules = comparison['common_details'][~comparison['common_details']['SCHEDULES_MATCH']]
print(f"Count: {len(diff_schedules)}")
if len(diff_schedules) > 0:
    print(diff_schedules[['ACTION_CODE', 'AZ_SCHEDULE', 'BS_SCHEDULE']].head(5).to_string(index=False))

## 2. Comparison Matrix - All Domains

In [None]:
# Create comparison matrix for all domain pairs
domains = deployments_df['DOMAIN_CODE'].unique()

print("Report Actions Comparison Matrix:")
print("="*80)

matrix = []
for d1 in domains:
    row = {'Domain': d1}
    for d2 in domains:
        if d1 == d2:
            total = len(report_actions_df[report_actions_df['DOMAIN_CODE'] == d1])
            row[d2] = f"{total} (total)"
        else:
            comp = compare_report_actions(d1, d2, report_actions_df)
            row[d2] = f"{comp['count_common']} common"
    matrix.append(row)

matrix_df = pd.DataFrame(matrix)
print(matrix_df.to_string(index=False))

## 3. Migration Helper - Copy Report Actions

In [None]:
@dataclass
class MigrationConfig:
    """Configuration for migrating a report action between domains."""
    source_domain: str
    target_domain: str
    action_code: str
    # Optional overrides
    new_schedule: str = None
    new_communities: List[str] = None
    adjust_parents: bool = True

def prepare_action_migration(
    config: MigrationConfig,
    report_actions_df: pd.DataFrame,
    deployments_df: pd.DataFrame,
    communities_df: pd.DataFrame
) -> Dict:
    """
    Prepare a report action for migration from source to target domain.

    Returns:
        Dictionary with:
        - original: Original action data
        - migrated: Adjusted action data for target domain
        - changes: List of changes made
        - warnings: List of warnings/issues to address
    """
    # Get source action
    source_action = report_actions_df[
        (report_actions_df['DOMAIN_CODE'] == config.source_domain) &
        (report_actions_df['ACTION_CODE'] == config.action_code)
    ]

    if len(source_action) == 0:
        raise ValueError(f"Action {config.action_code} not found in {config.source_domain}")

    source_action = source_action.iloc[0].to_dict()

    # Get target deployment info
    target_deployment = deployments_df[
        deployments_df['DOMAIN_CODE'] == config.target_domain
    ].iloc[0]

    # Get target communities
    target_communities = communities_df[
        communities_df['DOMAIN_CODE'] == config.target_domain
    ]['COMMUNITY_ID'].tolist()

    # Create migrated action
    migrated = source_action.copy()
    changes = []
    warnings = []

    # 1. Update deployment-specific attributes
    migrated['DOMAIN_CODE'] = config.target_domain
    changes.append(f"DOMAIN_CODE: {source_action['DOMAIN_CODE']} → {config.target_domain}")

    migrated['DEPLOYMENT_VERSION'] = target_deployment['DEPLOYMENT_VERSION']
    changes.append(f"DEPLOYMENT_VERSION: {source_action['DEPLOYMENT_VERSION']} → {target_deployment['DEPLOYMENT_VERSION']}")

    # 2. Adjust communities
    try:
        communities = json.loads(source_action['COMMUNITIES']) if source_action['COMMUNITIES'] else []

        if config.new_communities is not None:
            # Use specified communities
            new_communities = config.new_communities
            changes.append(f"COMMUNITIES: {len(communities)} → {len(new_communities)} (manually specified)")
        elif communities:
            # Check if source communities exist in target
            valid_communities = [c for c in communities if c in target_communities]
            invalid_communities = [c for c in communities if c not in target_communities]

            if invalid_communities:
                warnings.append(f"Communities not in target: {invalid_communities}")
                warnings.append(f"Valid target communities: {target_communities}")

            new_communities = valid_communities
            changes.append(f"COMMUNITIES: {len(communities)} → {len(valid_communities)} (filtered for target)")
        else:
            new_communities = []

        migrated['COMMUNITIES'] = json.dumps(new_communities)
    except (json.JSONDecodeError, TypeError) as e:
        warnings.append(f"Failed to parse COMMUNITIES: {e}")

    # 3. Adjust schedule
    if config.new_schedule:
        migrated['SCHEDULE'] = config.new_schedule
        changes.append(f"SCHEDULE: Updated with provided schedule")
    else:
        warnings.append("SCHEDULE: Review required - may need timezone/timing adjustments")

    # 4. Check parents
    try:
        parents = json.loads(source_action['PARENTS']) if source_action['PARENTS'] else []
        if parents:
            # Check if parents exist in target domain
            target_actions = set(report_actions_df[
                report_actions_df['DOMAIN_CODE'] == config.target_domain
            ]['ACTION_CODE'].tolist())

            missing_parents = [p for p in parents if p not in target_actions]
            if missing_parents:
                warnings.append(f"Parent actions not in target: {missing_parents}")
                if config.adjust_parents:
                    valid_parents = [p for p in parents if p in target_actions]
                    migrated['PARENTS'] = json.dumps(valid_parents)
                    changes.append(f"PARENTS: Removed missing parents - {len(parents)} → {len(valid_parents)}")
    except (json.JSONDecodeError, TypeError) as e:
        warnings.append(f"Failed to parse PARENTS: {e}")

    # 5. Update query reference schema names
    try:
        query_ref = json.loads(source_action['QUERY_REFERENCE']) if source_action['QUERY_REFERENCE'] else {}
        if query_ref and 'database_name' in query_ref:
            # This might need schema name adjustment
            warnings.append(f"QUERY_REFERENCE: Review database_name '{query_ref['database_name']}' - may need schema adjustment")
    except (json.JSONDecodeError, TypeError) as e:
        warnings.append(f"Failed to parse QUERY_REFERENCE: {e}")

    return {
        'original': source_action,
        'migrated': migrated,
        'changes': changes,
        'warnings': warnings,
    }

print("Migration helper functions loaded.")

## 4. Example: Prepare Migration

In [None]:
# Example: Migrate a report action from AZ to WY
# First, find a common action to use as example
az_actions = set(report_actions_df[report_actions_df['DOMAIN_CODE'] == 'AZ']['ACTION_CODE'])
example_action = list(az_actions)[0] if az_actions else None

if example_action:
    print(f"Example: Migrating action '{example_action}' from AZ to WY\n")

    config = MigrationConfig(
        source_domain='AZ',
        target_domain='WY',
        action_code=example_action,
        adjust_parents=True
    )

    result = prepare_action_migration(
        config,
        report_actions_df,
        deployments_df,
        communities_df
    )

    print("Changes to be made:")
    for change in result['changes']:
        print(f"  ✓ {change}")

    if result['warnings']:
        print("\nWarnings - Manual Review Required:")
        for warning in result['warnings']:
            print(f"  ⚠️  {warning}")

    print("\nOriginal action details:")
    print(f"  ACTION_CODE: {result['original']['ACTION_CODE']}")
    print(f"  REPORT_NAME: {result['original']['REPORT_NAME']}")
    print(f"  DOMAIN_CODE: {result['original']['DOMAIN_CODE']}")
    print(f"  COMMUNITIES: {result['original']['COMMUNITIES']}")

    print("\nMigrated action details:")
    print(f"  ACTION_CODE: {result['migrated']['ACTION_CODE']}")
    print(f"  REPORT_NAME: {result['migrated']['REPORT_NAME']}")
    print(f"  DOMAIN_CODE: {result['migrated']['DOMAIN_CODE']}")
    print(f"  COMMUNITIES: {result['migrated']['COMMUNITIES']}")
else:
    print("No actions found to use as example")

## 5. Batch Analysis - Actions to Consider Migrating

In [None]:
def analyze_migration_candidates(
    source_domain: str,
    target_domain: str,
    report_actions_df: pd.DataFrame
) -> pd.DataFrame:
    """
    Analyze which actions from source could be migrated to target.
    """
    comparison = compare_report_actions(source_domain, target_domain, report_actions_df)

    # Actions only in source are candidates
    candidates = report_actions_df[
        (report_actions_df['DOMAIN_CODE'] == source_domain) &
        (report_actions_df['ACTION_CODE'].isin(comparison['only_in_domain1']))
    ][['ACTION_CODE', 'REPORT_NAME', 'ABBREVIATION', 'ACTION_TYPE']].copy()

    candidates['SOURCE'] = source_domain
    candidates['TARGET'] = target_domain

    return candidates

# Example: What could we migrate from AZ to WY?
candidates = analyze_migration_candidates('AZ', 'WY', report_actions_df)

print(f"\nActions in AZ that could be migrated to WY: {len(candidates)}")
print("\nSample candidates:")
print(candidates.head(20).to_string(index=False))

## 6. Export Migration Plan

In [None]:
# Create a migration plan for review
def create_migration_plan(
    source_domain: str,
    target_domain: str,
    action_codes: List[str],
    report_actions_df: pd.DataFrame,
    deployments_df: pd.DataFrame,
    communities_df: pd.DataFrame
) -> pd.DataFrame:
    """
    Create a migration plan for multiple actions.
    """
    plan = []

    for action_code in action_codes:
        try:
            config = MigrationConfig(
                source_domain=source_domain,
                target_domain=target_domain,
                action_code=action_code,
                adjust_parents=True
            )

            result = prepare_action_migration(
                config,
                report_actions_df,
                deployments_df,
                communities_df
            )

            plan.append({
                'ACTION_CODE': action_code,
                'REPORT_NAME': result['original']['REPORT_NAME'],
                'SOURCE': source_domain,
                'TARGET': target_domain,
                'CHANGES_COUNT': len(result['changes']),
                'WARNINGS_COUNT': len(result['warnings']),
                'STATUS': 'READY' if len(result['warnings']) == 0 else 'REVIEW_REQUIRED',
                'WARNINGS': ' | '.join(result['warnings']) if result['warnings'] else '',
            })
        except Exception as e:
            plan.append({
                'ACTION_CODE': action_code,
                'REPORT_NAME': '',
                'SOURCE': source_domain,
                'TARGET': target_domain,
                'CHANGES_COUNT': 0,
                'WARNINGS_COUNT': 1,
                'STATUS': 'ERROR',
                'WARNINGS': str(e),
            })

    return pd.DataFrame(plan)

# Example: Create migration plan for first 5 candidates
sample_actions = candidates['ACTION_CODE'].head(5).tolist()

if sample_actions:
    migration_plan = create_migration_plan(
        'AZ', 'WY',
        sample_actions,
        report_actions_df,
        deployments_df,
        communities_df
    )

    print("\nMigration Plan Summary:")
    print(migration_plan[['ACTION_CODE', 'REPORT_NAME', 'STATUS', 'WARNINGS_COUNT']].to_string(index=False))

    # Save to CSV for review
    output_path = BASE_DIR / "migration_plan_AZ_to_WY.csv"
    migration_plan.to_csv(output_path, index=False)
    print(f"\nFull migration plan saved to: {output_path}")