In [1]:
from os import path
import mlrun

# Set the base project name
project_name_base = 'test-mlrun'

# Initialize the MLRun project object
project = mlrun.get_or_create_project(project_name_base, context="./", user_project=True)

# Display the current project name
project_name = project.metadata.name
print(f'Full project name: {project_name}')

> 2022-03-13 08:37:44,914 [info] created and saved project test-mlrun
Full project name: test-mlrun-jovyan


In [2]:
# mlrun: start-code

In [3]:
import mlrun
def prep_data(context, source_url: mlrun.DataItem, label_column='label'):

    # Convert the DataItem to a pandas DataFrame
    df = source_url.as_df()
    df[label_column] = df[label_column].astype('category').cat.codes    
    
    # Log the DataFrame size after the run
    context.log_result('num_rows', df.shape[0])

    # Store the dataset in your artifacts database
    context.log_dataset('cleaned_data', df=df, index=False, format='csv')


In [4]:
# mlrun: end-code

In [5]:
# Convert the local prep_data function to an MLRun project function
data_prep_func = mlrun.code_to_function(name='prep_data', kind='job', image='mlrun/mlrun')

In [7]:
# Run the `data_prep_func` MLRun function locally
prep_data_run = data_prep_func.run(name='prep_data',
                                   handler=prep_data,
                                   inputs={'source_url': '/home/jovyan/data/iris.csv'},
                                   local=False)

> 2022-03-13 08:37:57,893 [info] starting run prep_data uid=4fc194e66ab34b75a3ea148aaa1d75cc DB=http://mlrun-api:8080
> 2022-03-13 08:37:58,068 [info] Job is running in the background, pod: prep-data-vgsw9
> 2022-03-13 08:38:01,508 [info] run executed, status=completed
final state: completed


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
test-mlrun-jovyan,...aa1d75cc,0,Mar 13 08:38:01,completed,prep_data,kind=jobowner=jovyanmlrun/client_version=0.10.0host=prep-data-vgsw9,source_url,,num_rows=150,cleaned_data





> 2022-03-13 08:38:04,229 [info] run executed, status=completed


In [8]:
prep_data_run.outputs

{'num_rows': 150,
 'cleaned_data': 'store://artifacts/test-mlrun-jovyan/prep_data_cleaned_data:4fc194e66ab34b75a3ea148aaa1d75cc'}