# MNIST BDP V2

Now that we have the first version done, we are going to enrich our components even further. First, we load the previous JSON.

In [1]:
import sys
sys.path.append("../../pybdp")
from src import pybdp
#import pybdp
from pprint import pprint

# Start with an empty project
project = pybdp.load_from_json("../JSON/Supervised Learning V1.json")
print(project)

< Project
Toolbox:

< Toolbox
Blocks: ['Experiment', 'Load Supervised Features', 'Supervised Learning']
Spaces: ['Model', 'Evaluation Metrics', 'X Train', 'Y Train', 'X Test', 'Y Test'] >

Workbench:

<Workbench
Processors: ['MNIST Experiment', 'Load MNIST', 'Default Supervised Learning']
Wires: ['W1', 'W2', 'W3', 'W4']
Systems: ['MNIST Experiment System'] > >


## Load MNIST

Currently "Load MNIST" is a primitive processor, but we are going to expand it to be a composite processor entailing a few specific things:

1. Loading the dataset
2. Conducting a test-train split
3. Doing image normalization for training data
4. Doing image normalization for testing data

In [2]:
# Add spaces
project.add_space(id="X",
                  name="X",
                  description="X Data")

project.add_space(id="Y",
                  name="Y",
                  description="Y Data")

# Add blocks
project.add_block(id="Load Supervised Dataset",
                  name="Load Supervised Dataset",
                  description="Loads the data for a supervised learning problem",
                  domain=[],
                  codomain=["X", "Y"])

project.add_block(id="Cross Validation Split",
                  name="Cross Validation Split",
                  description="Splits data into training and testing sets",
                  domain=["X", "Y"],
                  codomain=["X Train", "Y Train", "X Test", "Y Test"])

project.add_block(id="Training Data Preprocessing",
                  name="Training Data Preprocessing",
                  description="Preprocesses training data for model training",
                  domain=["X Train", "Y Train"],
                  codomain=["X Train", "Y Train"])

project.add_block(id="Testing Data Preprocessing",
                  name="Testing Data Preprocessing",
                  description="Preprocesses testing data for model evaluation, optionally can use the training data in addition",
                  domain=["X Train", "Y Train", "X Test", "Y Test"],
                  codomain=["X Test", "Y Test"])

# Add processors
project.add_processor(id="Load MNIST Dataset",
                      name="Load MNIST Dataset",
                      description="Loads the MNIST dataset",
                      parent_id="Load Supervised Dataset")

project.add_processor(id="Test-Train Split",
                      name="Test-Train Split",
                      description="Splits data into one training and one test set",
                      parent_id="Cross Validation Split")

project.add_processor(id="Image Normalization Preprocessing - Training",
                      name="Image Normalization Preprocessing - Training",
                      description="Preprocesses image-based training data",
                      parent_id="Training Data Preprocessing")

project.add_processor(id="Image Normalization Preprocessing - Testing",
                      name="Image Normalization Preprocessing - Testing",
                      description="Preprocesses testing data for model evaluation",
                      parent_id="Testing Data Preprocessing",)

In [3]:
      {
        "ID": "Load MNIST System",
        "Name": "Load MNIST System",
        "Description": "The system representing loading the MNIST dataset, preparing the cross validation split, and pre-processing the data",
        "Processors": [
          "Load MNIST Dataset",
          "Test-Train Split",
          "Image Normalization Preprocessing - Training",
          "Image Normalization Preprocessing - Testing"
        ],
        "Wires": ["W1", "W2", "W3", "W4", "W5", "W6", "W7", "W8"]
      },

({'ID': 'Load MNIST System',
  'Name': 'Load MNIST System',
  'Description': 'The system representing loading the MNIST dataset, preparing the cross validation split, and pre-processing the data',
  'Processors': ['Load MNIST Dataset',
   'Test-Train Split',
   'Image Normalization Preprocessing - Training',
   'Image Normalization Preprocessing - Testing'],
  'Wires': ['W1', 'W2', 'W3', 'W4', 'W5', 'W6', 'W7', 'W8']},)

In [4]:
{
        "Description": "Load MNIST",
        "ID": "Load MNIST",
        "Name": "Load MNIST",
        "Parent": "Load Supervised Features",
        "Ports": [],
        "Subsystem": {
          "Port Mappings": [],
          "System ID": "Load MNIST System",
          "Terminal Mappings": [
            {
              "Index": 0,
              "Processor": "Image Normalization Preprocessing - Training"
            },
            {
              "Index": 1,
              "Processor": "Image Normalization Preprocessing - Training"
            },
            {
              "Index": 0,
              "Processor": "Image Normalization Preprocessing - Testing"
            },
            {
              "Index": 1,
              "Processor": "Image Normalization Preprocessing - Testing"
            }
          ]
        },
        "Terminals": [
          "X Train Array",
          "Y Train Array",
          "X Test Array",
          "Y Test Array"
        ]
      },


{
        "Description": "Conducts supervised learning using the defaults of the model",
        "ID": "Default Supervised Learning",
        "Name": "Default Supervised Learning",
        "Parent": "Supervised Learning",
        "Ports": [
          "Model",
          "X Train Array",
          "Y Train Array",
          "X Test Array",
          "Y Test Array"
        ],
        "Subsystem": {
          "Port Mappings": [
            {
              "Index": 0,
              "Processor": "Fit Supervised Model - Default"
            },
            {
              "Index": 1,
              "Processor": "Fit Supervised Model - Default"
            },
            {
              "Index": 2,
              "Processor": "Fit Supervised Model - Default"
            },
            {
              "Index": 1,
              "Processor": "Evaluate Supervised Model - Default"
            },
            {
              "Index": 2,
              "Processor": "Evaluate Supervised Model - Default"
            }
          ],
          "System ID": "Default Supervised Learning System",
          "Terminal Mappings": [
            {
              "Index": 0,
              "Processor": "No Post-processing"
            }
          ]
        },
        "Terminals": ["Evaluation Metrics"]
      },

({'Description': 'Conducts supervised learning using the defaults of the model',
  'ID': 'Default Supervised Learning',
  'Name': 'Default Supervised Learning',
  'Parent': 'Supervised Learning',
  'Ports': ['Model',
   'X Train Array',
   'Y Train Array',
   'X Test Array',
   'Y Test Array'],
  'Subsystem': {'Port Mappings': [{'Index': 0,
     'Processor': 'Fit Supervised Model - Default'},
    {'Index': 1, 'Processor': 'Fit Supervised Model - Default'},
    {'Index': 2, 'Processor': 'Fit Supervised Model - Default'},
    {'Index': 1, 'Processor': 'Evaluate Supervised Model - Default'},
    {'Index': 2, 'Processor': 'Evaluate Supervised Model - Default'}],
   'System ID': 'Default Supervised Learning System',
   'Terminal Mappings': [{'Index': 0, 'Processor': 'No Post-processing'}]},
  'Terminals': ['Evaluation Metrics']},)