In [2]:
import re
import json
import random
from tqdm import trange
from fractions import Fraction
from collections import Counter, defaultdict
from typing import Tuple, Dict, Sequence, List, Set

In [13]:
def read_data(filename: str):
    """
    filename: the name of a file
    returns the data of in the file
    """
    if filename[-5:] == '.json' or filename[-3:] == '.js':
        directory = 'formatData/' + filename
        with open(directory, 'r', encoding='utf-8') as file:
            output = json.loads(file.read())
    
    elif filename == 'config/pair-params':
        directory = 'formatData/' + filename
        with open(directory, 'r', encoding='utf-8') as file:
            data = file.read().split('\n\n')

        output = defaultdict()
        for pair in data:
            pair = pair.replace(' ', '').replace('-', '').replace('(', '').replace(')', '').split('/')
            output[pair[0]] = pair[1]
   
    else:
        raise ValueError('The input is not valid.')
    return output

In [20]:
def generate(item: dict, classname: str) -> dict:
    """
    item: a description
    classname: Capacitors or Resistors
    """
    with open('config/config_cap&res_patterns.json','r', encoding='utf-8') as f:
        patterns = json.loads(f.read())
    
    des = []
    output = {'description': '', 'labels': [], 'class': classname}
    pattern = random.sample(patterns[classname], 1)[0]
    
    for feature in pattern:
        feature = feature.replace(' (User input)', '')
        if feature in ['input class', 'SizeCode', 'TemperatureCharacteristicsCode', 'MfrPartNumber']:
            if feature == 'input class':
                if random.uniform(0, 1) > 0.5:
                    value = random.sample(config_class_name[classname], 1)[0]
                    output[feature] = value
                    output['labels'].append(feature)
                    des.append(value)
            else:
                value = item[feature]
                output[feature] = value
                output['labels'].append(feature)
                des.append(value)
            
        if feature in ['Capacitance', 'Resistance', 'RatedDCVoltageURdc', 'WorkingVoltage', 'RatedPowerDissipationP']:
            numeric = str(random.sample(['1/' + str(random.randint(2, 9)), random.randint(0, 100), round(random.uniform(0, 1), 2)], 1)[0])
            value = numeric + random.sample(list(config_dynamic_units[config_field_units[feature]]), 1)[0]
            output[feature] = value
            output['labels'].append(feature)
            des.append(value)
            
        if feature in ['Tolerance', 'PositiveTolerance', 'NegativeTolerance']:
            if random.uniform(0, 1) > 0.5:
                value = str(round(random.randint(0, 100), 2)) + '%'
            else:
                value = str(round(random.uniform(0, 1), 2))
            output[feature] = value
            output['labels'].append(feature)
            des.append(value)
                
        if feature in ['others']:
            if classname == 'Capacitors':
                noise_feature = random.sample(['IHSObjectID', 'DielectricMaterial', 'PackingMethod', 'PackageStyle', 'Series'], 1)[0]
            if classname == 'Resistors':
                noise_feature = random.sample(['IHSObjectID', 'PackingMethod', 'PackageStyle', 'Series', 'Technology'], 1)[0]
            noise_value = item[noise_feature]
            if noise_value not in des:
                output['labels'].append('others')
                des.append(noise_value)
        
    # insert deliminter
    deliminter_list = ['#', ',', '/', ';', ':', '-', '_']
    deliminter = random.sample(deliminter_list, 1)[0]
    for i in range(len(des)):
        if des[i] != '':
            if random.uniform(0, 1) > 0.7:
                des[i] += deliminter
            else:
                des[i] += random.sample(deliminter_list, 1)[0]
    output['description'] = re.sub('(' + '|'.join(deliminter_list) + ')$', '', ''.join(des))
    
    return output

In [14]:
# load config files
config_class_name = read_data('config/config-class-name.json')
config_dynamic_units = read_data('config/config-dynamic-units.json')
config_field_units = {
    'Capacitance': 'F',
    'Resistance': 'Ω',
    'RatedDCVoltageURdc': 'V',
    'WorkingVoltage': 'V',
    'RatedPowerDissipationP': 'W'
}

In [4]:
# # read standard data
# with open('preprocess/standard_cap.json','r', encoding='utf-8') as f:
#     standard_cap = json.loads(f.read())
    
# with open('preprocess/standard_res.json','r', encoding='utf-8') as f:
#     standard_res = json.loads(f.read())

In [21]:
# # generate data
# cap_data, res_data = [], []
# for item in standard_cap[:10]:
#     cap_data.append(generate(item, 'Capacitors'))
    
# for item in standard_res[:10]:
#     res_data.append(generate(item, 'Resistors'))