## Generate New Files

* Given a template and a definition of the variables, generate a set of new files

In [13]:
import csv
from pathlib import Path
import re

class TemplateProcessor:
    def __init__(self, template_path, verbose=False, output_file_path=None, variables_table_path=None):
        self.template_path = Path(template_path)
        self._verbose = verbose
        if not self.template_path.exists():
            raise FileNotFoundError(f"Template file '{self.template_path}' not found.")

        #Number and types of parameters per distribution
        self._valid_distributions = {
            'uniform':     {'parameters':2, 'types':['int','float']},
            'normal':      {'parameters':2, 'types':['float']},
            'lognormal':   {'parameters':2, 'types':['float']},
            'triangular':  {'parameters':3, 'types':['float']},
            'constant':    {'parameters':1, 'types':['int','float','str']},
            'categorical': {'parameters':2, 'types':['int','float','str']},
            'table':       {'parameters':0, 'types':['str']}
        }

        self.variables_raw = self._extract_variables()
        self.variables = self._parse_variables()

        if variables_table_path is not None:
            self.set_variables_table(variables_table_path)

        self.set_output_file(output_file_path)
        if output_file_path is not None:
            pass #try to generate files

    def _extract_variables(self):
        variables_raw = []
        with open(self.template_path, 'r') as file:
            for line_num, line in enumerate(file, start=1):
                parts = line.split('<\\var>')
                if len(parts) > 1:
                    for part in parts[1:]:
                        var = part.split('<var>')[0].strip()
                        if var == '':
                            raise ValueError(f"Empty variable name at line {line_num}.")
                        variables_raw.append(var)
                if line.count('<\\var>') != line.count('<var>'):
                    raise ValueError(f"Unclosed <\\var> <var> at line {line_num}.")
        return variables_raw

    def _extract_contents(self, text, open=r'\(', close=r'\)'):
        pattern = f'{open}(.*?){close}'  # Regular expression pattern to match text between parentheses
        matches = re.findall(pattern, text)  # Find all matches
        return matches

    def _transform_variable(self, variable, variable_type):
        if isinstance(variable, list):
            var = list()
            for i in range(len(variable)):
                var.append(self._transform_variable(variable=variable[i], variable_type=variable_type))
                if var[-1] is None:
                    return None
            return var
        else:                
            try:
                var_type = eval(variable_type)
                return var_type(variable)
            except (ValueError, TypeError, NameError):
                return None

    def _custom_split(self, text, sep=',', open='{', close='}'):
        result = []
        current_token = ''
        paren_count = 0

        for char in text:
            if char == sep and paren_count == 0:
                result.append(current_token.strip())
                current_token = ''
            elif char == open:
                paren_count += 1
                current_token += char
            elif char == close:
                paren_count -= 1
                current_token += char
            else:
                current_token += char

        result.append(current_token.strip())
        return result

    def _parse_distribution(self, text, var_type):
        if text is None:
            if self._verbose:
                print("  No distribution provided. Will assume 'table'.")
            return 'table', list()

        parameters = self._custom_split(text)
        distribution = parameters[0]
        if distribution not in self._valid_distributions:
            raise ValueError(f"Invalid distribution: '{distribution}'.")
        parameters = parameters[1:]
        if len(parameters) != self._valid_distributions[distribution]['parameters']:
            raise ValueError(f"Invalid number of parameters for distribution '{distribution}'. Expected {self._valid_distributions[distribution]['parameters']}, found {len(parameters)}.")

        if var_type is not None:
            if var_type not in self._valid_distributions[distribution]['types']:
                raise ValueError(f"Invalid type ({var_type}) for distribution '{distribution}'. Valid option(s): {', '.join(self._valid_distributions[distribution]['types'])}.")
            
            if distribution == 'categorical':
                param_list = parameters[0].lstrip('{').rstrip('}').split(',')
            else:
                param_list = parameters

            for i in range(len(param_list)):
                new_value = self._transform_variable(variable=param_list[i], variable_type=var_type)
                if new_value is None:
                    raise ValueError(f"Parameters for distribution '{distribution}' must be of type {var_type}. Cannot transform '{param_list[i]}'.")
                param_list[i] = new_value
    
            if distribution == 'categorical':
                parameters[0] = param_list
            else:
                parameters = param_list

        if distribution == 'categorical':
            param_list = parameters[1].lstrip('{').rstrip('}').split(',')
            if len(parameters[0]) != len(param_list):
                raise ValueError(f"Inconsistent number of values in distribution '{distribution}'. Values found: {len(parameters[0])}, associated probabilities: {len(param_list)}.")
            for i in range(len(param_list)):
                new_value = self._transform_variable(variable=param_list[i], variable_type='float')
                if new_value is None:
                    raise ValueError(f"Probabilities for distribution '{distribution}' must be of type float. Cannot transform '{param_list[i]}'.")
                param_list[i] = new_value
            parameters[1] = param_list

        return distribution, parameters
    
    def _check_variable_type(self, default, distribution, parameters):
        var_type = None
        for test_type in self._valid_distributions[distribution]['types']:
            if var_type is None:
                var_type = test_type
    
                if default is not None:
                    default = self._transform_variable(variable=default, variable_type=test_type)
                    if default is None:
                        var_type = None

                param_out = list()
                for param in parameters:
                    if var_type is not None:
                        param_out.append(self._transform_variable(variable=param, variable_type=var_type))
                        if param_out[-1] is None:
                            var_type = None
                
                if var_type is not None:
                    parameters = param_out

                    if self._verbose:
                        print(f"  No type provided. Will assume '{var_type}'.")
                
        return default, distribution, parameters, var_type

    def _parse_variable_options(self, text):
        default = None
        var_type = None

        distribution_text = self._extract_contents(text=text, open=r'\(', close=r'\)')
        if len(distribution_text) > 1:
            raise ValueError(f"Bad distribution options format in: '{text}'. Only one distribution with ( and ) can be defined.")

        options = self._custom_split(text=text, open='(', close=')')
        if len(distribution_text) == 1:
            if '(' not in options[-1] or ')' not in options[-1]:
                raise ValueError(f"Distribution options with ( and ) must be the last information in: '{options}'.")
            options = options[:-1]
            distribution_text = distribution_text[0].strip()
        else:
            distribution_text = None

        if len(options) > 2:
            raise ValueError(f"Bad options format in: '{options}'. Too many options.")
        elif len(options) > 0:
            if len(options) > 1 or options[0] != '':
                default = options[-1].strip()
                if len(options) == 2:
                    var_type = options[0].strip()
                    default = self._transform_variable(variable=default, variable_type=var_type)
                    if default is None:
                        raise ValueError(f"Default value ({options[-1]}) must be of the defined type ({var_type}).")

        distribution, parameters = self._parse_distribution(distribution_text, var_type)

        if var_type is None:
            default, distribution, parameters, var_type = self._check_variable_type(default, distribution, parameters)
        if var_type is None:
            raise ValueError(f"Couldn't find the variable type based on provided data: '{text}'. Possible type(s) for {distribution[0]} are: {', '.join(self._valid_distributions[distribution]['types'])}.")

        return {'active': True, 'distribution': distribution, 'parameters': parameters, 'default': default, 'type': var_type}

    def _parse_variable(self, text):
        # General pattern: Variable[type, default, (distribution, par1, par2)],
        key = text.split('[')[0].strip()
        if len(key) == 0:
            raise ValueError(f"Undefined variable name in: '{text}'.")

        options = self._extract_contents(text=text, open=r'\[', close=r'\]')
        if len(options) > 1:
            raise ValueError(f"Bad options format in: '{text}'. Only one list with [ and ] can be defined.")

        if len(options) == 1:
            options_dict = self._parse_variable_options(options[0])
        else:
            options_dict = self._parse_variable_options('')

        return key, options_dict

    def _parse_variables(self):
        variables = {}
        for var in self.variables_raw:
            if self._verbose:
                print(f'Command found: {var}')
            key, options = self._parse_variable(var)
            variables[key] = options
            if self._verbose:
                print(f'  key: {key}')
                print(f'  options: {options}')
        return variables

    def set_output_file(self, output_file_path):
        self.output_file_path = Path(output_file_path)

    def set_variables_table(self, variables_table_path):
        if not Path(variables_table_path).exists():
            print(f"CSV file '{variables_table_path}' not found.")
        else:
            try:
                with open(variables_table_path, 'r') as csvfile:
                    reader = csv.reader(csvfile)
                    header = next(reader)
                    for key in header:
                        if key not in self.variables:
                            print(f"Variable '{key}' not found in template. Will ignore data.")
                            continue
                        if self.variables[key]['distribution'] != 'table':
                            print(f"Variable '{key}' already has a distribution. Will ignore data.")
                            continue
                        values = []
                        for row in reader:
                            values.append(row[header.index(key)])
                        self.variables[key]['values'] = values
            except (ValueError, TypeError, NameError):
                print(f'Error reading variables table file: {variables_table_path}') 

template = TemplateProcessor(template_path=r'..\template\no_errors.dat',
                             verbose=True,
                             output_file_path=r'..\template\out\no_errors.dat')


Command found: No_Default
  No distribution provided. Will assume 'table'.
  No type provided. Will assume 'str'.
  key: No_Default
  options: {'active': True, 'distribution': 'table', 'parameters': [], 'default': None, 'type': 'str'}
Command found: With_Default_12[12]
  No distribution provided. Will assume 'table'.
  No type provided. Will assume 'str'.
  key: With_Default_12
  options: {'active': True, 'distribution': 'table', 'parameters': [], 'default': '12', 'type': 'str'}
Command found: With_Type_int[int,42]
  No distribution provided. Will assume 'table'.
  key: With_Type_int
  options: {'active': True, 'distribution': 'table', 'parameters': [], 'default': 42, 'type': 'int'}
Command found: normal_variable[float,0.5,(normal,0, 2.5)]
  key: normal_variable
  options: {'active': True, 'distribution': 'normal', 'parameters': [0.0, 2.5], 'default': 0.5, 'type': 'float'}
Command found: normal_variable_no_type[0.5,(normal,0, 2.5)]
  No type provided. Will assume 'float'.
  key: normal

IndexError: list index out of range