# MPM Test Data Loading with Snowpark

This notebook loads MPM (Master Project Management) YAML files into a local Snowflake session using Snowpark.

## Goals:
1. Load 4 MPM YAML files (AZ, BS, CO, WY) with version 005
2. Use Snowpark local testing session
3. Create Snowpark DataFrames with schema variations
4. Enable data comparison testing

In [None]:
# Imports
import json
from pathlib import Path
from datetime import datetime
import sys
import importlib

# Ensure project root is in path for imports
project_root = Path.cwd()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from snowflake.snowpark import Session as SnowparkSession
from snowflake.snowpark.types import StructType, StructField, StringType, ArrayType, IntegerType

# Import and reload to pick up schema changes
from snowflake_local_testing import mpm_parser, mpm_snowpark, schema
importlib.reload(schema)
importlib.reload(mpm_parser)
importlib.reload(mpm_snowpark)

from snowflake_local_testing.mpm_parser import MPMConfig
from snowflake_local_testing.mpm_snowpark import MPMSnowparkSaver

In [None]:
# Debug: Check environment
import sys
from pathlib import Path

project_root = Path.cwd()
print(f"Current directory: {project_root}")
print(f"\nChecking if tests directory exists: {(project_root / 'tests').exists()}")
print(f"Checking if tests/snowflake_local_testing exists: {(project_root / 'tests' / 'snowflake_local_testing').exists()}")
print(f"Checking if mpm_parser.py exists: {(project_root / 'tests' / 'snowflake_local_testing' / 'mpm_parser.py').exists()}")

print(f"\nPython path:")
for p in sys.path[:5]:
    print(f"  {p}")

# Try importing step by step
try:
    import tests
    print(f"\n✓ tests module imported from: {tests.__file__}")
except Exception as e:
    print(f"\n✗ Failed to import tests: {e}")

try:
    import tests.snowflake_local_testing
    print(f"✓ tests.snowflake_local_testing imported")
except Exception as e:
    print(f"✗ Failed to import tests.snowflake_local_testing: {e}")

In [None]:
# Configuration
BASE_DIR = Path.cwd()
MPM_DIR = BASE_DIR / "resources" / "master-mpm"

# MPM files to load
MPM_FILES = {
    "AZ": MPM_DIR / "AZ" / "AZ_005-mpm.yaml",
    "BS": MPM_DIR / "BS" / "BS_005-mpm.yaml",
    "CO": MPM_DIR / "CO" / "CO_005-mpm.yaml",
    "WY": MPM_DIR / "WY" / "WY_005-mpm.yaml",
}

# Verify files exist
print("Checking MPM files...")
for domain, mpm_file in MPM_FILES.items():
    if not mpm_file.exists():
        print(f"⚠️  {domain}: File not found at {mpm_file}")
    else:
        print(f"✓ {domain}: {mpm_file.name} ({mpm_file.stat().st_size:,} bytes)")

In [None]:
# Create local Snowpark session
session = SnowparkSession.builder.configs({
    "local_testing": True
}).create()

print(f"✓ Snowpark session created (local testing mode)")
print(f"Session ID: {session.session_id}")

In [None]:
# Parse MPM configurations
mpm_configs = {}

for domain, mpm_file in MPM_FILES.items():
    print(f"\nParsing {domain}...")
    config = MPMConfig(mpm_file)
    mpm_configs[domain] = config

    # Show summary
    deployment = config.get_deployment_info()
    communities = config.get_communities_list()
    sensor_actions = config.get_sensor_actions()
    report_actions = config.get_report_actions()

    print(f"  Deployment: {deployment.get('name', 'N/A')}")
    print(f"  Communities: {len(communities)}")
    print(f"  Sensor Actions: {len(sensor_actions)}")
    print(f"  Report Actions: {len(report_actions)}")

print(f"\n✓ Parsed {len(mpm_configs)} MPM configurations")

In [None]:
# Initialize SQLite database connection
import sqlalchemy as db
from sqlalchemy.orm import Session
import pandas as pd

DB_PATH = BASE_DIR / "resources" / "meta-db" / "schema-sentinel.db"
DB_PATH.parent.mkdir(parents=True, exist_ok=True)

engine = db.create_engine(f"sqlite:///{DB_PATH}")

# Initialize MPM Snowpark Saver
saver = MPMSnowparkSaver(session)
print(f"✓ MPMSnowparkSaver initialized")
print(f"✓ SQLite database ready at {DB_PATH}")

In [None]:
# Load all domains using Snowpark, then convert to pandas
all_deployments = []
all_communities = []
all_sensor_actions = []
all_report_actions = []

for domain, config in mpm_configs.items():
    print(f"\n{'='*60}")
    print(f"Loading {domain} domain...")
    print(f"{'='*60}")

    # Create Snowpark DataFrames
    deployment_sf_df = saver.save_deployment(
        config.get_deployment_info(),
        table_name=f"{domain}_DEPLOYMENT"
    )

    communities_sf_df = saver.save_communities(
        config.get_communities_list(),
        table_name=f"{domain}_COMMUNITIES"
    )

    sensor_actions_sf_df = saver.save_sensor_actions(
        config.get_sensor_actions(),
        table_name=f"{domain}_SENSOR_ACTIONS"
    )

    report_actions_sf_df = saver.save_report_actions(
        config.get_report_actions(),
        table_name=f"{domain}_REPORT_ACTIONS"
    )

    # Convert to pandas and collect
    deployment_df = deployment_sf_df.to_pandas()
    communities_df = communities_sf_df.to_pandas()
    sensor_actions_df = sensor_actions_sf_df.to_pandas()
    report_actions_df = report_actions_sf_df.to_pandas()

    all_deployments.append(deployment_df)
    all_communities.append(communities_df)
    all_sensor_actions.append(sensor_actions_df)
    all_report_actions.append(report_actions_df)

    print(f"✓ {domain} loaded:")
    print(f"  - Deployment: {len(deployment_df)} rows")
    print(f"  - Communities: {len(communities_df)} rows")
    print(f"  - Sensor Actions: {len(sensor_actions_df)} rows")
    print(f"  - Report Actions: {len(report_actions_df)} rows")

# Combine all domains into single DataFrames
print(f"\n{'='*60}")
print(f"Combining all domains...")
print(f"{'='*60}")

deployments_df = pd.concat(all_deployments, ignore_index=True)
communities_df = pd.concat(all_communities, ignore_index=True)
sensor_actions_df = pd.concat(all_sensor_actions, ignore_index=True)
report_actions_df = pd.concat(all_report_actions, ignore_index=True)

print(f"\n✓ Combined DataFrames:")
print(f"  - Total Deployments: {len(deployments_df)} rows")
print(f"  - Total Communities: {len(communities_df)} rows")
print(f"  - Total Sensor Actions: {len(sensor_actions_df)} rows")
print(f"  - Total Report Actions: {len(report_actions_df)} rows")

In [None]:
# Show summary by domain
print("\nDeployments by domain:")
print(deployments_df.groupby('DOMAIN_CODE').size())

print("\nCommunities by domain:")
print(communities_df.groupby('DOMAIN_CODE').size())

print("\nSensor Actions by domain:")
print(sensor_actions_df.groupby('DOMAIN_CODE').size())

print("\nReport Actions by domain:")
print(report_actions_df.groupby('DOMAIN_CODE').size())

In [None]:
# Save all DataFrames to SQLite using raw connection
import sqlite3

conn = sqlite3.connect(str(DB_PATH))
deployments_df.to_sql('mpm_deployments', conn, if_exists='replace', index=False)
communities_df.to_sql('mpm_communities', conn, if_exists='replace', index=False)
sensor_actions_df.to_sql('mpm_sensor_actions', conn, if_exists='replace', index=False)
report_actions_df.to_sql('mpm_report_actions', conn, if_exists='replace', index=False)
conn.close()

print(f"✓ Saved all domains to SQLite database: {DB_PATH}")
print(f"  - mpm_deployments: {len(deployments_df)} rows")
print(f"  - mpm_communities: {len(communities_df)} rows")
print(f"  - mpm_sensor_actions: {len(sensor_actions_df)} rows")
print(f"  - mpm_report_actions: {len(report_actions_df)} rows")

In [None]:
# Verify data in SQLite - show summary by domain
import sqlite3

conn = sqlite3.connect(str(DB_PATH))

print("Tables in database:")
tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table'", conn)
print(tables)

print("\n" + "="*80)
print("Deployments by domain:")
print(pd.read_sql_query("SELECT DOMAIN_CODE, COUNT(*) as count FROM mpm_deployments GROUP BY DOMAIN_CODE", conn))

print("\n" + "="*80)
print("Communities by domain:")
print(pd.read_sql_query("SELECT DOMAIN_CODE, COUNT(*) as count FROM mpm_communities GROUP BY DOMAIN_CODE", conn))

print("\n" + "="*80)
print("Sensor Actions by domain:")
print(pd.read_sql_query("SELECT DOMAIN_CODE, COUNT(*) as count FROM mpm_sensor_actions GROUP BY DOMAIN_CODE", conn))

print("\n" + "="*80)
print("Report Actions by domain:")
print(pd.read_sql_query("SELECT DOMAIN_CODE, COUNT(*) as count FROM mpm_report_actions GROUP BY DOMAIN_CODE", conn))

print("\n" + "="*80)
print("Sample AZ communities:")
print(pd.read_sql_query("SELECT * FROM mpm_communities WHERE DOMAIN_CODE='AZ'", conn))

print("\n" + "="*80)
print("Sample BS communities:")
print(pd.read_sql_query("SELECT * FROM mpm_communities WHERE DOMAIN_CODE='BS'", conn))

conn.close()
print(f"\n✓ All 4 domains loaded successfully into {DB_PATH}")