In [1]:
from azureml.core import Dataset
from azureml.core import Workspace
import pandas as pd
from io import StringIO

workspace = Workspace.from_config()

datastore = workspace.get_default_datastore()

data = """ age, income, target
25,50000,0
30,60000,1
35, 55000,0
40, 65000, 1
"""
dataset_file = StringIO(data)

df = pd.read_csv(dataset_file)
df.to_csv("training_data.csv", index=False)

datastore.upload_files(["training_data.csv"], target_path="data/", overwrite=True)

dataset = Dataset.Tabular.from_delimited_files(path=(datastore,"data/training_data.csv"))
dataset = dataset.register(workspace=workspace, name='training_dataset', create_new_version=True)
print("dataset registered succesfully.")

Uploading an estimated of 1 files
Uploading training_data.csv
Uploaded training_data.csv, 1 files out of an estimated total of 1
Uploaded 1 files
dataset registered succesfully.


In [None]:
import logging

# Set up logging to a file
logging.basicConfig(filename='ml_pipeline.log', level=logging.INFO)

# Example log message
logging.info("Logging setup complete.")

In [None]:
import pandas as pd

# Log the start of data loading
logging.info("Loading dataset...")

# Load the dataset
df = pd.read_csv('your-dataset.csv')
logging.info("Dataset loaded successfully.")

# Log the start of preprocessing
logging.info("Starting data preprocessing...")

# Example preprocessing: handling missing values
df.fillna(0, inplace=True)
logging.info("Missing values filled with 0.")

# Log the completion of preprocessing
logging.info("Data preprocessing completed.")

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Log the start of model training
logging.info("Starting model training...")

try:
    # Train the decision tree model
    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)
    logging.info("Model trained successfully.")
except Exception as e:
    logging.error(f"Error during model training: {e}")

# Example logging of training accuracy (if applicable)
accuracy = model.score(X_train, y_train)
logging.info(f"Training accuracy: {accuracy:.2f}")

In [None]:
# Log the start of predictions
logging.info("Starting model predictions...")

try:
    # Make predictions
    predictions = model.predict(X_test)
    logging.info("Predictions made successfully.")
except Exception as e:
    logging.error(f"Error during predictions: {e}")

# Log the output (in production systems, limit the amount of data logged)
logging.info(f"Prediction output: {predictions[:5]}")  # Log only first 5 predictions

In [None]:
# Example: logging an exception during data validation
def validate_data(data):
    try:
        if not isinstance(data, pd.DataFrame):
            raise ValueError("Input must be a pandas DataFrame.")
        logging.info("Data validation successful.")
    except ValueError as e:
        logging.error(f"Data validation error: {e}")

# Validate the dataset
validate_data(df)