In [5]:
import os  # Provides a way of using operating system-dependent functionality like reading or writing to the file system.
import os.path as osp  # Provides functions for manipulating file paths in a way that is compatible with the operating system.
from collections import OrderedDict  # Provides a dictionary subclass that maintains the order of keys as they are added.

import yaml  # Provides functionality to parse and emit YAML, a human-readable data serialization standard.

Function: ordered_yaml

Purpose:
The ordered_yaml function is designed to support the use of OrderedDict when loading and dumping YAML data. This ensures that the order of keys in the YAML file is preserved when it is read into a Python dictionary and when it is written back to a YAML file.

Functionality:
Imports: Attempts to import the C-based Loader and Dumper for performance. If unavailable, it falls back to the pure Python versions.

Custom Representer and Constructor:
dict_representer: A custom representer function that tells the YAML dumper how to serialize an OrderedDict by iterating over its items.

dict_constructor: A custom constructor function that tells the YAML loader how to deserialize a mapping node into an OrderedDict.
Integration with YAML:
Registers the custom representer and constructor with the Dumper and Loader, respectively, to handle OrderedDict objects.
Returns: The function returns the customized Loader and Dumper that can be used to load and dump YAML data while preserving key order.

In [14]:
def ordered_yaml():
    """Support OrderedDict for yaml.

    Returns:
        yaml Loader and Dumper.
    """
    try:
        # Attempt to import the C-based Dumper and Loader for better performance
        from yaml import CDumper as Dumper
        from yaml import CLoader as Loader
    except ImportError:
        # Fallback to the pure Python Dumper and Loader if C-based ones are unavailable
        from yaml import Dumper, Loader

    # Default mapping tag used by YAML
    _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG

    def dict_representer(dumper, data):
        # Custom representer to serialize OrderedDict as a regular dictionary
        return dumper.represent_dict(data.items())

    def dict_constructor(loader, node):
        # Custom constructor to deserialize a YAML mapping node into an OrderedDict
        return OrderedDict(loader.construct_pairs(node))

    # Register the custom representer and constructor with the Dumper and Loader
    Dumper.add_representer(OrderedDict, dict_representer)
    Loader.add_constructor(_mapping_tag, dict_constructor)

    # Return the customized Loader and Dumper
    return Loader, Dumper

Example Usage
Here's how you might use the ordered_yaml function in practice:

import yaml

# Get the customized Loader and Dumper
Loader, Dumper = ordered_yaml()

# Load a YAML file while preserving the order of keys
with open('config.yaml', 'r') as f:
    config = yaml.load(f, Loader=Loader)

# Dump the OrderedDict back to a YAML file
with open('output.yaml', 'w') as f:
    yaml.dump(config, f, Dumper=Dumper)

# Function: parse
# Purpose: Parse an option file and set up the environment for training or testing

In [13]:
# Function: parse
# Purpose: Parse an option file and set up the environment for training or testing.

def parse(opt_path, is_train=True):
    """Parse option file.

    Args:
        opt_path (str): Option file path.
        is_train (bool): Indicate whether in training or not. Default: True.

    Returns:
        dict: Parsed options.
    """
    # Open the option file and load its contents using the ordered YAML loader
    with open(opt_path, mode='r') as f:
        Loader, _ = ordered_yaml()  # Get the customized YAML Loader
        opt = yaml.load(f, Loader=Loader)  # Load the YAML file into a dictionary

    # Prepare the GPU list as a comma-separated string
    gpu_list = ','.join(str(x) for x in opt['gpu_ids'])
    # Set the CUDA_VISIBLE_DEVICES environment variable if specified
    if opt.get('set_CUDA_VISIBLE_DEVICES', None):
        os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list
        print('export CUDA_VISIBLE_DEVICES=' + gpu_list, flush=True)
    else:
        print('gpu_list: ', gpu_list, flush=True)

    # Set the training mode flag in the options
    opt['is_train'] = is_train

    # Set up paths for the experiment
    opt['path'] = {}
    # Determine the root path of the project
    opt['path']['root'] = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir))
    if is_train:
        # Determine the path for experiments based on debug mode
        if opt.get('debug_path', None):
            experiments_path = 'experiments_debug'
        else:
            experiments_path = 'experiments'
        # Set up paths for models, logs, and visualizations
        experiments_root = osp.join(opt['path']['root'], experiments_path, opt['name'])
        opt['path']['experiments_root'] = experiments_root
        opt['path']['models'] = osp.join(experiments_root, 'models')
        opt['path']['log'] = experiments_root
        opt['path']['visualization'] = osp.join(experiments_root, 'visualization')

        # Adjust options for debug mode
        if 'debug' in opt['name']:
            opt['debug'] = True
            opt['val_freq'] = 1
            opt['print_freq'] = 1
            opt['save_checkpoint_freq'] = 1
    else:  # If not in training mode, set up paths for results
        results_root = osp.join(opt['path']['root'], 'results', opt['name'])
        opt['path']['results_root'] = results_root
        opt['path']['log'] = results_root
        opt['path']['visualization'] = osp.join(results_root, 'visualization')

    return opt  # Return the parsed options

# exmaple usage of parse function
# Configuration Parsing 
Purpose
This code is designed to parse a YAML configuration file for our experiment, set up the necessary environment, and organize paths for training or testing. It ensures that the configuration options are loaded into a structured format, allowing for easy access and manipulation of experiment settings.

In [18]:
import os
import yaml
from collections import OrderedDict

# Define the ordered_yaml function (as previously discussed)
def ordered_yaml():
    try:
        from yaml import CDumper as Dumper
        from yaml import CLoader as Loader
    except ImportError:
        from yaml import Dumper, Loader

    _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG

    def dict_representer(dumper, data):
        return dumper.represent_dict(data.items())

    def dict_constructor(loader, node):
        return OrderedDict(loader.construct_pairs(node))

    Dumper.add_representer(OrderedDict, dict_representer)
    Loader.add_constructor(_mapping_tag, dict_constructor)

    return Loader, Dumper

# Define the parse function (as previously discussed)
def parse(opt_path, is_train=True):
    with open(opt_path, mode='r') as f:
        Loader, _ = ordered_yaml()
        opt = yaml.load(f, Loader=Loader)

    gpu_list = ','.join(str(x) for x in opt['gpu_ids'])
    if opt.get('set_CUDA_VISIBLE_DEVICES', None):
        os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list
        print('export CUDA_VISIBLE_DEVICES=' + gpu_list, flush=True)
    else:
        print('gpu_list: ', gpu_list, flush=True)

    opt['is_train'] = is_train

    opt['path'] = {}
    # Use the current working directory as the root path
    opt['path']['root'] = osp.abspath(osp.join(os.getcwd(), osp.pardir, osp.pardir))
    if is_train:
        if opt.get('debug_path', None):
            experiments_path = 'experiments_debug'
        else:
            experiments_path = 'experiments'
        experiments_root = os.path.join(opt['path']['root'], experiments_path, opt['name'])
        opt['path']['experiments_root'] = experiments_root
        opt['path']['models'] = os.path.join(experiments_root, 'models')
        opt['path']['log'] = experiments_root
        opt['path']['visualization'] = os.path.join(experiments_root, 'visualization')

        if 'debug' in opt['name']:
            opt['debug'] = True
            opt['val_freq'] = 1
            opt['print_freq'] = 1
            opt['save_checkpoint_freq'] = 1
    else:
        results_root = os.path.join(opt['path']['root'], 'results', opt['name'])
        opt['path']['results_root'] = results_root
        opt['path']['log'] = results_root
        opt['path']['visualization'] = os.path.join(results_root, 'visualization')

    return opt

# Use the parse function to load the configuration
options = parse('config.yaml', is_train=True)

# Print the parsed options
print(options)

export CUDA_VISIBLE_DEVICES=0,1
OrderedDict({'name': 'my_experiment', 'gpu_ids': [0, 1], 'set_CUDA_VISIBLE_DEVICES': True, 'debug_path': False, 'is_train': True, 'path': {'root': '/Users/oliviashen/uva/ds6050_deep_learning/final_project_styleswap', 'experiments_root': '/Users/oliviashen/uva/ds6050_deep_learning/final_project_styleswap/experiments/my_experiment', 'models': '/Users/oliviashen/uva/ds6050_deep_learning/final_project_styleswap/experiments/my_experiment/models', 'log': '/Users/oliviashen/uva/ds6050_deep_learning/final_project_styleswap/experiments/my_experiment', 'visualization': '/Users/oliviashen/uva/ds6050_deep_learning/final_project_styleswap/experiments/my_experiment/visualization'}})


# EXAMPLE USAGE
# Use the parse function to load the configuration
options = parse('config.yaml', is_train=True)

# Print the parsed options
print(options)

Function: dict2str
Purpose:
The dict2str function is designed to convert a dictionary into a formatted string. This is particularly useful for printing configuration options or other structured data in a readable format, especially when dealing with nested dictionaries.
Functionality:
Input: The function takes two arguments:
opt: The dictionary to be converted into a string.
indent_level: An optional integer that specifies the level of indentation for nested dictionaries. The default is 1.
Output: It returns a string that represents the dictionary in a structured and indented format.


In [8]:
# Function: dict2str
# Purpose: Convert a dictionary to a formatted string for easy printing of options.

def dict2str(opt, indent_level=1):
    """Convert a dictionary to a formatted string for printing options.

    Args:
        opt (dict): The dictionary containing options to be converted.
        indent_level (int): The level of indentation for nested dictionaries. Default is 1.

    Returns:
        str: A formatted string representation of the dictionary.
    """
    msg = ''  # Initialize an empty string to build the message
    for k, v in opt.items():
        if isinstance(v, dict):
            # If the value is a dictionary, recursively convert it to a string
            msg += ' ' * (indent_level * 2) + k + ':[\n'
            msg += dict2str(v, indent_level + 1)
            msg += ' ' * (indent_level * 2) + ']\n'
        else:
            # Otherwise, add the key-value pair to the message
            msg += ' ' * (indent_level * 2) + k + ': ' + str(v) + '\n'
    return msg  # Return the formatted string


# Define a sample configuration dictionary
config = {
    'learning_rate': 0.001,
    'batch_size': 32,
    'model': {
        'type': 'ResNet',
        'layers': 50
    },
    'optimizer': 'Adam',
    'epochs': 100
}

# Use the dict2str function to convert the dictionary to a formatted string
formatted_config = dict2str(config)

# Print the formatted configuration
print("Configuration Options:\n", formatted_config)


# Configuration Options:
   learning_rate: 0.001
  batch_size: 32
  model:[
    type: ResNet
    layers: 50
  ]
  optimizer: Adam
  epochs: 100

In [12]:
# # Define a sample configuration dictionary
# config = {
#     'learning_rate': 0.001,
#     'batch_size': 32,
#     'model': {
#         'type': 'ResNet',
#         'layers': 50
#     },
#     'optimizer': 'Adam',
#     'epochs': 100
# }

# # Use the dict2str function to convert the dictionary to a formatted string
# formatted_config = dict2str(config)

# # Print the formatted configuration
# print("Configuration Options:\n", formatted_config)

Configuration Options:
   learning_rate: 0.001
  batch_size: 32
  model:[
    type: ResNet
    layers: 50
  ]
  optimizer: Adam
  epochs: 100



# OLIVIA: Function: dict_to_nonedict , I THINK WE CAN DELETE THIS FUNCTION

When to Keep It
Frequent Use: If you often need to convert dictionaries to a form that safely handles missing keys, keeping this function is beneficial.
Code Readability: It encapsulates the logic for converting dictionaries, making your code cleaner and more readable.
Reusability: If you foresee needing this functionality in multiple parts of your project, it's efficient to have a dedicated function.

When to Remove It
Unused Functionality: If you find that this function is not being used anywhere in your codebase, it might be unnecessary to keep it.
Simplification: If you're trying to simplify your code and reduce the number of functions, removing unused or rarely used functions can help.
Alternative Solutions: If you've implemented a different approach to handle missing keys or if your project requirements have changed, this function might no longer be relevant.

# OLIVIA: Function: dict_to_nonedict
# Purpose: Convert a standard dictionary into a NoneDict, which returns None for missing keys.


In [10]:
# Function: dict_to_nonedict
# Purpose: Convert a standard dictionary into a NoneDict, which returns None for missing keys.

class NoneDict(dict):
    """None dict. It will return none if key is not in the dict."""

    def __missing__(self, key):
        return None
    
    
def dict_to_nonedict(opt):
    """Convert to NoneDict, which returns None for missing keys.

    Args:
        opt (dict): Option dict.

    Returns:
        (dict): NoneDict for options.
    """
    if isinstance(opt, dict):
        # Create a new dictionary to store converted items
        new_opt = dict()
        # Recursively convert each key-value pair
        for key, sub_opt in opt.items():
            new_opt[key] = dict_to_nonedict(sub_opt)
        # Return a NoneDict initialized with the converted dictionary
        return NoneDict(**new_opt)
    elif isinstance(opt, list):
        # If the input is a list, recursively convert each element
        return [dict_to_nonedict(sub_opt) for sub_opt in opt]
    else:
        # Return the item as is if it's neither a dict nor a list
        return opt

# EXAMPLE USAGE
# Define a sample configuration dictionary
config = {
    'learning_rate': 0.001,
    'batch_size': 32,
    'model': {
        'type': 'ResNet',
        'layers': 50
    }
}

# Convert the configuration dictionary to a NoneDict
none_dict_config = dict_to_nonedict(config)

# Access existing and non-existing keys
print(none_dict_config['learning_rate'])  # Output: 0.001
print(none_dict_config['optimizer'])      # Output: None (instead of KeyError)