In [2]:
# import ipywidgets as widgets
from IPython.display import display, clear_output
from ai_cookbook.pipeline.data_source import DataSource
from ai_cookbook.pipeline.processing_step import ProcessingStep
from ai_cookbook.pipeline.output import Output
from ai_cookbook.pipeline.pipeline import Pipeline
from pydantic import ValidationError, TypeAdapter
import yaml



In [3]:
import plotly.graph_objects as go

labels = ["A1", "A2", "B1", "B2", "C1", "C2"]
source = [0, 1, 0, 2, 3, 3]
target = [2, 3, 3, 4, 4, 5]
values = [8, 4, 2, 8, 4, 2]

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels,
      color = "blue"
    ),
    link = dict(
      source = source,
      target = target,
      value = values
  ))])

fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
fig.show()

In [1]:
import solara

# Declare reactive variables at the top level. Components using these variables
# will be re-executed when their values change.
sentence = solara.reactive("Solara makes our team more productive.")
word_limit = solara.reactive(10)


@solara.component
def Page():
    # Calculate word_count within the component to ensure re-execution when reactive variables change.
    word_count = len(sentence.value.split())

    solara.SliderInt("Word limit", value=word_limit, min=2, max=20)
    solara.InputText(label="Your sentence", value=sentence, continuous_update=True)

    # Display messages based on the current word count and word limit.
    if word_count >= int(word_limit.value):
        solara.Error(f"With {word_count} words, you passed the word limit of {word_limit.value}.")
    elif word_count >= int(0.8 * word_limit.value):
        solara.Warning(f"With {word_count} words, you are close to the word limit of {word_limit.value}.")
    else:
        solara.Success("Great short writing!")


# The following line is required only when running the code in a Jupyter notebook:
Page()

<IPython.core.display.Javascript object>

In [2]:
pipeline = Pipeline.from_yaml("../src/ai_cookbook/pipeline_config.yaml")

In [3]:
# Widget for Data Source Name
ds_name = widgets.Text(description='Name')

# Widget for Data Source Type
ds_type = widgets.Dropdown(
    options=['volume', 'delta table'],
    description='Type'
)

# Widget for Path
ds_path = widgets.Text(description='Path')

# Widgets for Format, Catalog, Schema, Table
ds_format = widgets.Text(description='Format')
ds_catalog = widgets.Text(description='Catalog')
ds_schema = widgets.Text(description='Schema')
ds_table = widgets.Text(description='Table')

# Button to Add Data Source
add_ds_button = widgets.Button(description='Add Data Source')

# Output area for Data Source
ds_output = widgets.Output()

In [4]:
def on_add_ds_clicked(b):
    with ds_output:
        clear_output()
        try:
            ds_config = DataSource(
                name=ds_name.value,
                type=ds_type.value,
                path=ds_path.value,
                format=ds_format.value,
                catalog=ds_catalog.value,
                schema=ds_schema.value,
                table=ds_table.value
            )
            pipeline.data_sources.append(ds_config)
            print(f"Added Data Source: {ds_config.name}")
        except ValidationError as e:
            print("Validation Error:")
            print(e)


In [5]:
add_ds_button.on_click(on_add_ds_clicked)

In [6]:
ds_widget_box = widgets.VBox([
    ds_name,
    ds_type,
    ds_path,
    ds_format,
    ds_catalog,
    ds_schema,
    ds_table,
    add_ds_button,
    ds_output
])

display(ds_widget_box)


VBox(children=(Text(value='', description='Name'), Dropdown(description='Type', options=('volume', 'delta tabl…

In [11]:
# Add this import with your others
from IPython.display import display, Markdown

def visualize_pipeline(pipeline):
    mermaid_diagram = """
    ```mermaid
    graph LR
        %% Style definitions
        classDef datasource fill:#b3e0ff,stroke:#333,stroke-width:2px;
        classDef processing fill:#98fb98,stroke:#333,stroke-width:2px;
        classDef output fill:#ffb3b3,stroke:#333,stroke-width:2px;
    """
    
    # Add data sources
    for ds in pipeline.data_sources:
        mermaid_diagram += f'\n    DS_{ds.name}["{ds.name}"]:::datasource'
    
    # Add processing steps and their connections
    for step in pipeline.processing_steps:
        mermaid_diagram += f'\n    PS_{step.name}["{step.name}"]:::processing'
        # Connect data sources to this step
        for input_source in step.inputs:
            mermaid_diagram += f'\n    DS_{input_source} --> PS_{step.name}'
    
    # Add outputs and their connections
    for output in pipeline.outputs:
        mermaid_diagram += f'\n    O_{output.name}["{output.name}"]:::output'
        # Connect steps to this output
        for input_step in output.inputs:
            mermaid_diagram += f'\n    PS_{input_step} --> O_{output.name}'
    
    return mermaid_diagram + "\n    ```"

# Add visualization button and its handler
visualize_button = widgets.Button(description='Visualize Pipeline')
viz_output = widgets.Output()

def on_visualize_clicked(b):
    with viz_output:
        clear_output()
        try:
            diagram = visualize_pipeline(pipeline)
            display(Markdown(diagram))
        except Exception as e:
            print("Error visualizing pipeline:")
            print(e)

visualize_button.on_click(on_visualize_clicked)

# Display the visualization button and output area
display(visualize_button, viz_output)

Button(description='Visualize Pipeline', style=ButtonStyle())

Output()

```mermaid
graph LR
    %% Style definitions
    classDef datasource fill:#b3e0ff,stroke:#333,stroke-width:2px;
    classDef processing fill:#98fb98,stroke:#333,stroke-width:2px;
    classDef output fill:#ffb3b3,stroke:#333,stroke-width:2px;

DS_financial_reports["financial_reports"]:::datasource
PS_parsing["parsing"]:::processing
DS_name='financial_reports' type='volume' path='/Volumes/main/raw_data/financial_reports' format='pdf' catalog='main' schema='agent_app' volume_name='raw_data' table_schema=None table=None --> PS_parsing
PS_add_metadata["add_metadata"]:::processing
DS_name='parsing' function='ai_cookbook.functions.parsing.extract_text_from_pdf' inputs=[DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)] output_table='processed_data.extracted_texts' parameters={} --> PS_add_metadata
DS_name='financial_reports' type='volume' path='/Volumes/main/raw_data/financial_reports' format='pdf' catalog='main' schema='agent_app' volume_name='raw_data' table_schema=None table=None --> PS_add_metadata
O_financial_reports_index["financial_reports_index"]:::output
PS_name='add_metadata' function='ai_cookbook.functions.metadata.add_metadata' inputs=[ProcessingStep(name='parsing', function='ai_cookbook.functions.parsing.extract_text_from_pdf', inputs=[DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)], output_table='processed_data.extracted_texts', parameters={}), DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)] output_table='processed_data.enriched_texts' parameters={} --> O_financial_reports_index
```
```mermaid
graph LR
    %% Style definitions
    classDef datasource fill:#b3e0ff,stroke:#333,stroke-width:2px;
    classDef processing fill:#98fb98,stroke:#333,stroke-width:2px;
    classDef output fill:#ffb3b3,stroke:#333,stroke-width:2px;

DS_financial_reports["financial_reports"]:::datasource
PS_parsing["parsing"]:::processing
DS_name='financial_reports' type='volume' path='/Volumes/main/raw_data/financial_reports' format='pdf' catalog='main' schema='agent_app' volume_name='raw_data' table_schema=None table=None --> PS_parsing
PS_add_metadata["add_metadata"]:::processing
DS_name='parsing' function='ai_cookbook.functions.parsing.extract_text_from_pdf' inputs=[DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)] output_table='processed_data.extracted_texts' parameters={} --> PS_add_metadata
DS_name='financial_reports' type='volume' path='/Volumes/main/raw_data/financial_reports' format='pdf' catalog='main' schema='agent_app' volume_name='raw_data' table_schema=None table=None --> PS_add_metadata
O_financial_reports_index["financial_reports_index"]:::output
PS_name='add_metadata' function='ai_cookbook.functions.metadata.add_metadata' inputs=[ProcessingStep(name='parsing', function='ai_cookbook.functions.parsing.extract_text_from_pdf', inputs=[DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)], output_table='processed_data.extracted_texts', parameters={}), DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)] output_table='processed_data.enriched_texts' parameters={} --> O_financial_reports_index
```
```mermaid
graph LR
    %% Style definitions
    classDef datasource fill:#b3e0ff,stroke:#333,stroke-width:2px;
    classDef processing fill:#98fb98,stroke:#333,stroke-width:2px;
    classDef output fill:#ffb3b3,stroke:#333,stroke-width:2px;

DS_financial_reports["financial_reports"]:::datasource
PS_parsing["parsing"]:::processing
DS_name='financial_reports' type='volume' path='/Volumes/main/raw_data/financial_reports' format='pdf' catalog='main' schema='agent_app' volume_name='raw_data' table_schema=None table=None --> PS_parsing
PS_add_metadata["add_metadata"]:::processing
DS_name='parsing' function='ai_cookbook.functions.parsing.extract_text_from_pdf' inputs=[DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)] output_table='processed_data.extracted_texts' parameters={} --> PS_add_metadata
DS_name='financial_reports' type='volume' path='/Volumes/main/raw_data/financial_reports' format='pdf' catalog='main' schema='agent_app' volume_name='raw_data' table_schema=None table=None --> PS_add_metadata
O_financial_reports_index["financial_reports_index"]:::output
PS_name='add_metadata' function='ai_cookbook.functions.metadata.add_metadata' inputs=[ProcessingStep(name='parsing', function='ai_cookbook.functions.parsing.extract_text_from_pdf', inputs=[DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)], output_table='processed_data.extracted_texts', parameters={}), DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)] output_table='processed_data.enriched_texts' parameters={} --> O_financial_reports_index
```
```mermaid
graph LR
    %% Style definitions
    classDef datasource fill:#b3e0ff,stroke:#333,stroke-width:2px;
    classDef processing fill:#98fb98,stroke:#333,stroke-width:2px;
    classDef output fill:#ffb3b3,stroke:#333,stroke-width:2px;

DS_financial_reports["financial_reports"]:::datasource
PS_parsing["parsing"]:::processing
DS_name='financial_reports' type='volume' path='/Volumes/main/raw_data/financial_reports' format='pdf' catalog='main' schema='agent_app' volume_name='raw_data' table_schema=None table=None --> PS_parsing
PS_add_metadata["add_metadata"]:::processing
DS_name='parsing' function='ai_cookbook.functions.parsing.extract_text_from_pdf' inputs=[DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)] output_table='processed_data.extracted_texts' parameters={} --> PS_add_metadata
DS_name='financial_reports' type='volume' path='/Volumes/main/raw_data/financial_reports' format='pdf' catalog='main' schema='agent_app' volume_name='raw_data' table_schema=None table=None --> PS_add_metadata
O_financial_reports_index["financial_reports_index"]:::output
PS_name='add_metadata' function='ai_cookbook.functions.metadata.add_metadata' inputs=[ProcessingStep(name='parsing', function='ai_cookbook.functions.parsing.extract_text_from_pdf', inputs=[DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)], output_table='processed_data.extracted_texts', parameters={}), DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)] output_table='processed_data.enriched_texts' parameters={} --> O_financial_reports_index
```
```mermaid
graph LR
    %% Style definitions
    classDef datasource fill:#b3e0ff,stroke:#333,stroke-width:2px;
    classDef processing fill:#98fb98,stroke:#333,stroke-width:2px;
    classDef output fill:#ffb3b3,stroke:#333,stroke-width:2px;

DS_financial_reports["financial_reports"]:::datasource
PS_parsing["parsing"]:::processing
DS_name='financial_reports' type='volume' path='/Volumes/main/raw_data/financial_reports' format='pdf' catalog='main' schema='agent_app' volume_name='raw_data' table_schema=None table=None --> PS_parsing
PS_add_metadata["add_metadata"]:::processing
DS_name='parsing' function='ai_cookbook.functions.parsing.extract_text_from_pdf' inputs=[DataSource(name='financial_reports', type='volume', path='/Volumes/main/raw_data/financial_reports', format='pdf', catalog='main', schema='agent_app', volume_name='raw_data', table_schema=None, table=None)] output_table='processed_data.extracted_texts' parameters={} --> PS_add_metadata
DS_name='financial_reports' type='volume' path='/Volumes/main/raw_data/financial_reports' format='pdf' catalog='main' schema='agent_app' volume_name='raw_data' table_schema=None table=None --> PS_add_metadata
O_financial_reports_index["financial_reports_index"]:::output
PS_name='add_metadata' function='ai_cookbook.functions.metadata.add_metadata' inputs

In [10]:
assemble_button = widgets.Button(description='Assemble Pipeline')
assemble_output = widgets.Output()

def on_assemble_clicked(b):
    with assemble_output:
        clear_output()
        try:
            pipeline_config = PipelineConfig(
                data_sources=data_sources,
                processing_steps=processing_steps,
                outputs=outputs
            )
            print("Pipeline Configuration Assembled Successfully!")
            # Optionally display or save the configuration
            config_dict = pipeline_config.dict()
            print(yaml.dump(config_dict))
        except ValidationError as e:
            print("Validation Error in Pipeline Configuration:")
            print(e)

assemble_button.on_click(on_assemble_clicked)

display(assemble_button, assemble_output)


Button(description='Assemble Pipeline', style=ButtonStyle())

Output()

In [None]:
run_pipeline_button = widgets.Button(description='Run Pipeline')

def on_run_pipeline_clicked(b):
    with assemble_output:
        clear_output()
        try:
            pipeline_config = PipelineConfig(
                data_sources=data_sources,
                processing_steps=processing_steps,
                outputs=outputs
            )
            pipeline = Pipeline(pipeline_config)
            pipeline.execute()
            print("Pipeline executed successfully.")
        except Exception as e:
            print("Error executing pipeline:")
            print(e)

run_pipeline_button.on_click(on_run_pipeline_clicked)

display(run_pipeline_button)


In [None]:
save_button = widgets.Button(description='Save Configuration')

def on_save_clicked(b):
    with assemble_output:
        clear_output()
        try:
            pipeline_config = PipelineConfig(
                data_sources=data_sources,
                processing_steps=processing_steps,
                outputs=outputs
            )
            config_dict = pipeline_config.dict()
            with open('config/generated_pipeline_config.yaml', 'w') as f:
                yaml.dump(config_dict, f)
            print("Configuration saved to 'config/generated_pipeline_config.yaml'.")
        except ValidationError as e:
            print("Validation Error:")
            print(e)

save_button.on_click(on_save_clicked)

display(save_button)


In [None]:
load_button = widgets.Button(description='Load Configuration')
config_file_input = widgets.Text(description='Config File', value='config/pipeline_config.yaml')

def on_load_clicked(b):
    with assemble_output:
        clear_output()
        try:
            with open(config_file_input.value, 'r') as f:
                config_data = yaml.safe_load(f)
            # Clear existing data
            data_sources.clear()
            processing_steps.clear()
            outputs.clear()
            # Populate data sources
            for ds in config_data.get('data_sources', []):
                ds_config = DataSourceConfig(**ds)
                data_sources.append(ds_config)
                # Update widgets accordingly (if desired)
            # Similarly for processing steps and outputs
            print("Configuration loaded successfully.")
        except Exception as e:
            print("Error loading configuration:")
            print(e)

load_button.on_click(on_load_clicked)

display(config_file_input, load_button)
