In [1]:
from kfp import dsl
from kfp import components as comp
from kfp.compiler import Compiler

## Data preprocessing

In [2]:
def data_load(output_csv: comp.OutputPath('csv'), dataset_name:str = 'titanic'):
    import bnlearn as bn

    #Load data
    df = bn.import_example(dataset_name)

    dfhot, dfnum = bn.df2onehot(df)
    df = dfnum

    #TODO If some columns are with continues data, discretize that data
    
    print(df.head())
    print(df.describe())
    df.to_csv(output_csv)

comp_data_load = comp.create_component_from_func(
    data_load,
    packages_to_install=['bnlearn']
)

## Structure learning

In [3]:
def structure_learning(input_csv: comp.InputPath('csv'), 
                       output_df: comp.OutputPath('csv'), 
                       output_model: comp.OutputPath('pkl'), 
                       edges:list = []):
    import bnlearn as bn
    import pandas as pd
    import pickle

    if edges:
        DAG = bn.make_DAG(edges)
        bn.plot(DAG)
        return {
            'df':df,
            'model': DAG
        }
    
    #TODO Compare different structure learning algos

    df = pd.read_csv(input_csv)

    model = bn.structure_learning.fit(df)
    G = bn.plot(model, interactive=False)

    model = bn.independence_test(model, df, test='chi_square', alpha=0.05, prune=True)
    bn.plot(model, interactive=False, pos=G['pos'])

    df.to_csv(output_df)

    with open(output_model, 'wb') as file:
        pickle.dump(model, file)

comp_structure_learning = comp.create_component_from_func(
    structure_learning,
    packages_to_install=['bnlearn', 'pandas']
)

## Parameter learning

In [4]:
def parameter_learning(input_model: comp.InputPath('pkl'), input_df: comp.InputPath('csv'), output_model: comp.OutputPath('pkl')):
    import bnlearn as bn
    import pickle
    import pandas as pd

    with open(input_model, 'rb') as file:
        model = pickle.load(file)

    df = pd.read_csv(input_df)

    updated_model = bn.parameter_learning.fit(model, df)

    with open(output_model, 'wb') as file:
        pickle.dump(updated_model, file)


comp_parameter_learning = comp.create_component_from_func(
    parameter_learning,
    packages_to_install=['bnlearn', 'pandas']
)

## Inference

In [5]:
def inference(input_model: comp.InputPath('pkl'), inference:dict={'variables':['Survived', 'Sex'], 'evidence':{'Pclass':1}}):
    import bnlearn as bn
    import pickle

    with open(input_model, 'rb') as file:
        model = pickle.load(file)

    inf = bn.inference.fit(model, variables=inference['variables'], evidence=inference['evidence'])

comp_inference = comp.create_component_from_func(
    inference,
    packages_to_install=['bnlearn']
)

## Sampling

In [6]:
def sampling(input_model: comp.InputPath('pkl'), n_sample: int, output_model: comp.OutputPath('csv'), methodtype='bayes'):
    import bnlearn as bn
    import pandas as pd
    import pickle

    with open(input_model, 'rb') as file:
        model = pickle.load(file)


    df = bn.sampling(model, n=n_sample, methodtype=methodtype)
    print(df.head())
    df.to_csv(output_model)

comp_sampling = comp.create_component_from_func(
    sampling,
    packages_to_install=['bnlearn', 'pandas']
)

## Pipeline

In [7]:
def pl(n_samples: int = 1000):
    data = comp_data_load()
    structure = comp_structure_learning(data.output)
    parameter = comp_parameter_learning(structure.outputs['output_model'], structure.outputs['output_df'])
    inference = comp_inference(parameter.output)
    sample = comp_sampling(parameter.output, n_samples)

Compiler().compile(pl, 'bn_pipeline.yaml')