In [None]:
import os

# Create output directory
output_path = "output"
os.makedirs(output_path, exist_ok=True)

# Example 1

In [None]:
from trainer import LocalTrainer, SourceCodeConfig

trainer = LocalTrainer(
    image="python:3.10-slim",
    output_path=output_path,
)

In [None]:
input_data = "titanic_processed.csv" # data exists within src directory

source_code_config = SourceCodeConfig(
    source_code_dir='src',
    command= f"pip install -r requirements.txt && python train.py --data_path {input_data}",
)

trainer.run(source_code_config=source_code_config)

# Example 2

In [None]:
import pandas as pd

def preprocess_titanic_data():
    # Load the Titanic dataset from a URL
    url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
    data = pd.read_csv(url)

    # Select relevant features and target
    features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
    target = 'Survived'

    # Handle missing values
    data = data.assign(Age=data['Age'].fillna(data['Age'].median()))
    data = data.assign(Embarked=data['Embarked'].fillna(data['Embarked'].mode()[0]))

    # Convert categorical features to numerical
    data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})
    data['Embarked'] = data['Embarked'].map({'C': 0, 'Q': 1, 'S': 2})

    # Select features and target
    X = data[features]
    y = data[target]

    # Combine features and target into a single DataFrame
    processed_data = pd.concat([X, y], axis=1)

    return processed_data

In [None]:
import os
data_dir = "data"
file_name = "titanic_processed.csv"
os.makedirs(data_dir, exist_ok=True)

data = preprocess_titanic_data()
data.to_csv(os.path.join(data_dir, file_name), index=False)

In [None]:
from trainer import LocalTrainer, SourceCodeConfig, DataChannel

trainer = LocalTrainer(
    image="python:3.10-slim",
    output_path=output_path,
)

In [None]:
test_data_channel = DataChannel(
    channel_name="test",
    path=data_dir,
)

source_code_config = SourceCodeConfig(
    source_code_dir='src',
    command= f"pip install -r requirements.txt && python train.py --data_path $$INPUT_DATA_TEST/titanic_processed.csv",
)

trainer.run(
    input_data_channels=[test_data_channel],
    source_code_config=source_code_config
)