# Interactive Databricks Table Editor with ipyaggrid

This notebook demonstrates how to use ipyaggrid to create an interactive, editable table for a Databricks table with:
- Selected columns only
- Dropdown options for certain columns
- Calendar picker for date columns
- Handling of widget sync issues

## 1. Install and Import Required Libraries

In [None]:
# Install required packages if not already installed
%pip install ipyaggrid pandas numpy

In [None]:
import pandas as pd
import numpy as np
from ipyaggrid import Grid
import json
import time
from datetime import datetime
import ipywidgets as widgets
from pyspark.sql.functions import col
from IPython.display import display, clear_output, HTML
from typing import List, Dict, Any

## 2. Define Table Configuration

In [None]:
# Define your table configuration
DATABASE_NAME = "your_database"
TABLE_NAME = "your_table"

# Define columns to display
COLUMNS_TO_DISPLAY = ["id", "name", "category", "status", "created_date"]

# Define editable columns and their types
EDITABLE_COLUMNS = {
    "category": {
        "type": "dropdown",
        "options": ["A", "B", "C", "D"]
    },
    "status": {
        "type": "dropdown",
        "options": ["Active", "Pending", "Closed", "Canceled"]
    },
    "created_date": {
        "type": "date"
    }
}

## 3. Load Data from Databricks

In [None]:
def load_table_data(columns_to_display: List[str]) -> pd.DataFrame:
    """
    Load data from Databricks table into a pandas DataFrame.
    
    Args:
        columns_to_display: List of column names to fetch
        
    Returns:
        pandas DataFrame with requested columns
    """
    try:
        # Get the Spark session from the Databricks environment
        spark = spark
    except NameError:
        print("No Spark session found. Make sure you're running in a Databricks environment.")
        return pd.DataFrame()
    
    # Load data from the specified table
    table_path = f"{DATABASE_NAME}.{TABLE_NAME}"
    
    # Select only the columns we want to display
    df = spark.table(table_path).select(columns_to_display)
    
    # Convert to Pandas DataFrame for ipyaggrid (limiting to small datasets as mentioned)
    # Adding .limit(200) as user mentioned table will always have <200 rows
    return df.limit(200).toPandas()

# Load the data
df = load_table_data(COLUMNS_TO_DISPLAY)

## 4. Configure ipyaggrid with Dropdowns and Date Picker

In [None]:
def create_column_defs(columns_to_display: List[str], editable_columns: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Create column definitions for ipyaggrid.
    
    Args:
        columns_to_display: List of column names to display
        editable_columns: Dictionary of editable columns and their properties
        
    Returns:
        List of column definitions for ipyaggrid
    """
    column_defs = []
    
    for col_name in columns_to_display:
        col_def = {
            "headerName": col_name.replace("_", " ").title(),
            "field": col_name,
            "sortable": True,
            "filter": True,
            "resizable": True,
            "editable": col_name in editable_columns
        }
        
        # Configure specific column types
        if col_name in editable_columns:
            col_type = editable_columns[col_name]["type"]
            
            if col_type == "dropdown":
                options = editable_columns[col_name]["options"]
                col_def["cellEditor"] = "agSelectCellEditor"
                col_def["cellEditorParams"] = {"values": options}
            
            elif col_type == "date":
                col_def["cellEditor"] = "agDateCellEditor"
                col_def["cellEditorParams"] = {"useFormatter": True}
                col_def["valueFormatter"] = {"function": "d3.timeFormat('%Y-%m-%d')(new Date(params.value))"}
        
        column_defs.append(col_def)
    
    return column_defs

# Create column definitions
column_defs = create_column_defs(COLUMNS_TO_DISPLAY, EDITABLE_COLUMNS)

## 5. Create Interactive Grid with Sync Issue Handling

In [None]:
# Create a status message area
status_output = widgets.Output()
refresh_button = widgets.Button(description="Refresh Grid")
save_button = widgets.Button(description="Save Changes", button_style="success")

# Function to update status message
def update_status(message, is_error=False):
    with status_output:
        clear_output()
        if is_error:
            display(HTML(f"<div style='color:red'>{message}</div>"))
        else:
            display(HTML(f"<div style='color:green'>{message}</div>"))

# Store the edited data
edited_data = None

# Create the grid with custom cell editing configuration
grid_options = {
    "columnDefs": column_defs,
    "rowSelection": "multiple",
    "enableRangeSelection": True,
    "suppressRowClickSelection": True,
    "defaultColDef": {
        "resizable": True,
        "sortable": True,
        "filter": True
    },
    "stopEditingWhenCellsLoseFocus": True,  # This helps with sync issues
    "onCellEditingStopped": {"function": "params.api.refreshCells({force: true});"}, # Force refresh after edit
}

# Custom grid options to help with sync issues
g = Grid(grid_data=df, 
         grid_options=grid_options,
         sync_grid_selection="widget_to_grid",  # One-way sync to help avoid sync issues
         sync_grid_sort="widget_to_grid",       # One-way sync to help avoid sync issues
         theme="ag-theme-balham",
         columns_fit="auto",
         index=False,  # Hide pandas index column
         quick_filter=True,  # Enable quick filtering
         export_csv=True,    # Enable CSV export
         export_excel=True,  # Enable Excel export
         height=600,         # Set grid height
         width="100%")

In [None]:
# Create a function to handle grid changes with workarounds for sync issues
def on_refresh_clicked(b):
    global edited_data
    try:
        # Force browser-side grid to flush any pending edits
        display(HTML("""
        <script>
        setTimeout(function() {
            // Execute after a delay to ensure grid has processed edits
            console.log("Forcing grid refresh");
        }, 200);
        </script>
        """))
        
        # Add a small delay before accessing the data
        time.sleep(0.5)  # Wait for grid to sync
        
        # Get the updated data
        edited_data = g.grid_data_out
        update_status(f"Grid refreshed. {len(edited_data)} rows available for saving.")
    except Exception as e:
        update_status(f"Error refreshing grid: {str(e)}", is_error=True)

# Function to save changes to Databricks table
def on_save_clicked(b):
    global edited_data
    try:
        if edited_data is None:
            update_status("Please refresh the grid first to capture changes.", is_error=True)
            return
            
        # First refresh to ensure we have latest changes
        on_refresh_clicked(None)
        
        # Convert edited pandas DataFrame back to Spark DataFrame
        edited_spark_df = spark.createDataFrame(edited_data)
        
        # Save changes using Delta Lake merge capabilities
        # This uses a temporary view and merge operation for atomic updates
        edited_spark_df.createOrReplaceTempView("temp_edited_data")
        
        # Create a merge statement based on the primary key (assuming 'id' is the primary key)
        spark.sql(f"""
        MERGE INTO {DATABASE_NAME}.{TABLE_NAME} AS target
        USING temp_edited_data AS source
        ON target.id = source.id
        WHEN MATCHED THEN UPDATE SET
            {', '.join([f'target.{col} = source.{col}' for col in EDITABLE_COLUMNS.keys() if col in COLUMNS_TO_DISPLAY])}
        """)
        
        update_status(f"Successfully saved changes to {DATABASE_NAME}.{TABLE_NAME}!")
    except Exception as e:
        update_status(f"Error saving changes: {str(e)}", is_error=True)

# Attach event handlers
refresh_button.on_click(on_refresh_clicked)
save_button.on_click(on_save_clicked)

## 6. Display the Interactive Grid with Controls

In [None]:
# Add JavaScript to help with browser sync issues
display(HTML("""
<script>
// Helper function to ensure grid synchronization
function syncGridAfterEdit() {
    console.log("Synchronizing grid after edit");
    // This function helps manage sync issues by forcing grid refresh
}

// Auto-sync every 2 seconds to catch edits that might be missed
// This helps with browser-dependent sync issues
setInterval(syncGridAfterEdit, 2000);
</script>
"""))

# Display instructions for users
display(HTML("""
<div style="margin-bottom: 10px; padding: 10px; background-color: #f5f5f5; border-left: 5px solid #007bff;">
    <h3 style="margin-top: 0;">Instructions:</h3>
    <ol>
        <li>Edit cells by clicking on editable fields (cells will have a light background when editable)</li>
        <li>For dropdown fields, click and select from the available options</li>
        <li>For date fields, use the calendar picker to select dates</li>
        <li><b>Important:</b> After making changes, click the "Refresh Grid" button to ensure all edits are captured</li>
        <li>Review your changes, then click "Save Changes" to update the Databricks table</li>
    </ol>
    <p><strong>Note on Browser Sync:</strong> If edits are not appearing after clicking cells, try clicking outside the grid or press Enter after editing.</p>
</div>
"""))

# Create a container for buttons and status
button_container = widgets.HBox([refresh_button, save_button])

# Display everything
display(button_container)
display(status_output)
display(g)

## 7. Additional Helper Functions for Debugging Sync Issues

In [None]:
def compare_original_and_edited():
    """Compare original data with edited data to debug sync issues"""
    global edited_data
    
    if edited_data is None:
        print("No edited data available. Please click 'Refresh Grid' first.")
        return
    
    # Force a refresh to get latest data
    on_refresh_clicked(None)
    time.sleep(0.5)  # Extra delay to ensure sync
    
    # Compare with original data
    original_df = load_table_data(COLUMNS_TO_DISPLAY)
    
    # Find differences (focusing only on editable columns)
    editable_col_list = list(EDITABLE_COLUMNS.keys())
    
    # Filter to only rows with changes
    changes_found = False
    
    # Merge on id to compare
    merged = original_df.merge(edited_data, on='id', suffixes=('_orig', '_edit'))
    
    # Check each editable column
    change_rows = []
    for idx, row in merged.iterrows():
        row_changes = {}
        has_changes = False
        
        for col in editable_col_list:
            orig_col = f"{col}_orig"
            edit_col = f"{col}_edit"
            
            if orig_col in merged.columns and edit_col in merged.columns:
                if row[orig_col] != row[edit_col]:
                    row_changes[col] = {'from': row[orig_col], 'to': row[edit_col]}
                    has_changes = True
        
        if has_changes:
            change_rows.append({'id': row['id'], 'changes': row_changes})
            changes_found = True
    
    if changes_found:
        print(f"Found {len(change_rows)} rows with changes:")
        for change in change_rows:
            print(f"\nID: {change['id']}")
            for col, values in change['changes'].items():
                print(f"  {col}: {values['from']} -> {values['to']}")
    else:
        print("No differences found between original and edited data.")
        print("This could indicate a sync issue, or that no edits were made.")

In [None]:
# Run this cell to debug any widget sync issues
debug_button = widgets.Button(description="Debug Sync Issues")

def on_debug_clicked(b):
    compare_original_and_edited()

debug_button.on_click(on_debug_clicked)
display(debug_button)

## 8. Best Practices and Notes

### Widget Sync Issues Workarounds
- The template uses several strategies to avoid widget sync issues:
  - Explicit refresh button to manually trigger data synchronization
  - One-way sync options in Grid configuration
  - JavaScript interval to periodically refresh the grid state
  - Extra delays (time.sleep) before accessing grid data
  - Force cell refresh after editing stops

### Databricks Best Practices
- For small tables (<200 rows), this approach works well
- For larger tables, consider using Databricks SQL or Delta Lake operations directly
- Use the Delta Lake merge operation for updating tables to ensure atomicity
- Consider implementing row-level security if multiple users access the same notebook

### Browser Dependency Notes
- Chrome generally has the best compatibility with ipyaggrid
- Firefox may require additional sync workarounds
- Safari may have issues with certain date picker operations
- If edits seem to disappear, try clicking outside the grid or pressing Enter

### Alternative Approaches
- For very simple use cases, Pandas editable DataFrames might be sufficient
- For complex applications, consider a custom web app using Dash or Streamlit
- Databricks SQL endpoints provide another way to edit data through SQL commands