# import new/updated characteristics from google sheet file exported as csv


In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:90% !important;}</style>"))

In [2]:
import csv
csv_file_path='./20200608 SEARCH-by strat. representation-Bin. obs_v1 - MiCorr.csv'
with open(csv_file_path,"r") as f:
    reader =csv.DictReader(f)
    print(reader.fieldnames)

def to_uid(name):
    camel_case_name=''.join(name.title().split(' '))

    return camel_case_name[0].lower() + camel_case_name[1:]


['Characteristic', 'Sub-characteristic', 'Observation mode', 'Values', 'Sub-values', 'Image', 'Variables', 'unit', 'uid', 'Definition from observation under binocular', 'Definition from observation on cross-section', 'Notice for modification of the current MiCorr interface', 'Comments', 'CP Binocular', 'CP CS', 'CM Binocular', 'CM CS', 'D Binocular', 'D CS', 'M Binocular', 'M CS', 'NMM Binocular', 'NMM CS', 'POM Binocular', 'POM CS', 'S Binocular', 'S CS', 'SV Binocular', 'SV CS']


# import image urls from html export


In [46]:
name_img_url = {}

In [7]:
from bs4 import BeautifulSoup
f=open('20200608 SEARCH-by strat. representation-Bin. obs_v1/MiCorr.html')
content=f.read()
soup=BeautifulSoup(content,'html.parser')
imgs=soup.find_all('img')
name_img_url={}
for im in imgs:
    parent_td=im.parent.parent if im.parent.parent.name=='td' else None
    if parent_td:
        name_img_url[parent_td.previous_sibling.previous_sibling.text]=im['src']

In [37]:

def new_characteristic(family_uid, uid, name, order, description=None, image_url=None, optgroup=None, natures=[]):
    cql_update_row = f"""{{family_uid:'{family_uid}', uid:'{uid}', name:'{name}', order:{order}, natures:{natures}"""
    arg_list=[cql_update_row]

    if image_url is None:
        # search characteristic name in name to image_url map
        image_url = name_img_url.get(name)
    if description:
        arg_list.append(f"description:'{description}'")
    if image_url:
        arg_list.append(f"image_url:'{image_url}'")
    if optgroup:
        arg_list.append(f"optgroup:'{optgroup}'")

    return ', '.join(arg_list) + '},'

def new_family(fg_uid, uid, name, order, observation, description='',visible=False, variable=False, unit=None, fieldset=None,natures=[]):
    # adapt csv column value to cql format
    observation=observation.lower()
    if observation=='cross-section':
        observation='cross_section'
    cql_update_row = f"""{{fg:'{fg_uid}', order:{order}, observation:{observation}, uid:'{uid}', name:'{name}', visible: {visible}, variable: {variable}, description:'{description}, natures:{natures}'"""
    arg_list=[cql_update_row]
    if unit:
        arg_list.append(f"unit:'{unit}'")
    if fieldset:
        arg_list.append(f"fieldset:'{fieldset}'")
    return ', '.join(arg_list) + '},'

def new_subcharacteristic(parent_uid, uid, name, order, description=None, image_url=None, optgroup=None,natures=[]):
    cql_update_row = f"""{{parent_uid:'{parent_uid}', uid:'{uid}', name:'{name}', order:{order}, natures:{natures}"""
    arg_list=[cql_update_row]

    if image_url is None:
        # search characteristic name in name to image_url map
        image_url = name_img_url.get(name)
    if description:
        arg_list.append(f"description:'{description}'")
    if image_url:
        arg_list.append(f"image_url:'{image_url}'")
    if optgroup:
        arg_list.append(f"optgroup:'{optgroup}'")

    return ', '.join(arg_list) + '},'


In [10]:
new_subcharacteristic('x','y','z',1)

"{parent_uid:'x', uid:'y', name:'z', order:1},"

## Microstructure Binocular special case

In [49]:

# we treat all values and sub-values as characteristics (with optgroup + property for Cristalline microstructure)
# and optgroup - (closure of previous group) for isolated aggregate


with open(csv_file_path,"r") as f:
    reader =csv.DictReader(f)
    current_value=''
    characteristic_order = 1
    family_order = 1
    sub_value_order = 1
    optgroup_level = 0
    description_field_in = {'Binocular':'Definition from observation under binocular',
                           'Cross-section': 'Definition from observation on cross-section'}

    for r in reader:
        if 'Microstructure' not in r['Characteristic']:
            continue
        observation = r['Observation mode']
        if observation != 'Binocular':
            continue

        if r['Values'] == '' and r['Sub-values'] == '':
            family_uid='microstructureFamily'
            cql_family_update_row=new_family(fg_uid='fgMicrostructure', uid=family_uid, name='Microstructure',
                                     order=family_order,observation='binocular', description=r[description_field_in[observation]])
            print(cql_family_update_row)
            print(f'\n// {family_uid} characteristics:')
            family_order+=1

        if r['Values'] != '':
            # print('\nNew characteristic:\n')
            current_value = r['Values']
            current_value_uid = to_uid(r['Characteristic'] + ' ' +current_value)+'Characteristic'
            # trick to detect end of optgroup in our particular case (1 optgroup only yet)
            if optgroup_level:
                optgroup=None
                optgroup_level-=1

            # print(new_characteristic(family_uid=family_uid, uid=current_value_uid,
            #                         name=current_value,order=characteristic_order,
            #                         description=r[description_field_in[observation]],optgroup=optgroup))
            # family_order+=1
            # characteristic_order = 1
            characteristic_order += 1

        # print(observation, r['Characteristic'], current_value,'None' if r['Sub-values'] == '' else r['Sub-values'])
        current_subvalue = r['Sub-values']
        if current_value!='' and current_subvalue!='':
            if sub_value_order==1:
                optgroup_level+=1
                optgroup=current_value
                print(f'// {current_value_uid} optgroup options:')
            current_subvalue_uid = to_uid(current_value + ' ' +current_subvalue)+'Characteristic'
            if observation=='Binocular':
                print(new_characteristic(family_uid=family_uid, uid=current_subvalue_uid,name=current_subvalue,
                                     order=characteristic_order))
            else:
                print(new_characteristic(family_uid=family_uid, uid=current_subvalue_uid,name=current_subvalue,
                                     order=characteristic_order))

            sub_value_order+=1
            characteristic_order+=1

{fg:'fgMicrostructure', order:1, observation:binocular, uid:'microstructureFamily', name:'Microstructure', visible: False, variable: False, description:'Assessed on the observed area. The blank option corresponds to a stratum which microstructure is not known.'},

// microstructureFamily characteristics:
// microstructureCristallineMicrostructureCharacteristic optgroup options:
{family_uid:'microstructureFamily', uid:'cristallineMicrostructureBlankCharacteristic', name:'Blank', order:4},
{family_uid:'microstructureFamily', uid:'cristallineMicrostructureDendriticCharacteristic', name:'Dendritic', order:5, image_url:'https://lh6.googleusercontent.com/FyP0N-Th3gRuKFukmtMQPekDzkTkk4rqsVoqWgZLep7-kvuIVy0tXhyBwC9BHm1TlRKAZdSNhPCAFfyNE-DmsKJbKz7YSqrn0SwjfX0WZbPeS_gJhDmiIizLf6osIuEfGqBLvBUXgA=w126-h119'},
{family_uid:'microstructureFamily', uid:'cristallineMicrostructureDrusyAggregateCharacteristic', name:'Drusy aggregate', order:6, image_url:'https://lh3.googleusercontent.com/i904_m1OrYim9EZy

In [54]:

FAMILY = 1
CHARACTERISTIC = 2
SUBCHARACTERISTIC = 4
NATURES = ['CP','CM', 'D','M','NMM','POM','S','SV']
NATURE_HEADER_SUFFIX = {'Binocular':' Binocular', 'Cross-section':' CS'}

def import_family_group(family_group, filter_observation=None, fieldsets=[], filter_print=FAMILY|CHARACTERISTIC|SUBCHARACTERISTIC, shared_sub_characteristics=False):
    with open(csv_file_path,"r") as f:
        reader =csv.DictReader(f)
        current_value=''
        characteristic_order = 1
        family_order = 1
        sub_value_order = 1
        optgroup = None
        optgroup_level = 0
        description_field_in = {'Binocular':'Definition from observation under binocular',
                               'Cross-section': 'Definition from observation on cross-section'}
        fieldset = None # group Values below under fieldset

        for r in reader:
            if family_group not in r['Characteristic']:
                continue
            observation = r['Observation mode']
            if observation != filter_observation:
                continue
            # strata Nature filtering
            r_natures = [N for N in NATURES if r[N+NATURE_HEADER_SUFFIX[observation]]]
            
            # print(f'family_group={r["Characteristic"]}')
            # print(f'observation={observation}')
            if r['Values'] == '' and r['Sub-values'] == '' and r['Sub-characteristic']!='':
                family_name=r['Sub-characteristic']
                if family_name in fieldsets:
                    fieldset = family_name
                    print(f'// new fieldset={fieldset}')
                else:
                    fieldset = None
                    if family_name != r['Characteristic']: 
                        family_uid=to_uid(r['Characteristic'] + ' ' +family_name)+ ('CS' if observation=='Cross-section' else '') +'Family'
                    else:
                        family_uid=to_uid(r['Characteristic'])+ ('CS' if observation=='Cross-section' else '') +'Family'
                    cql_family_update_row=new_family(fg_uid='fg'+family_group, uid=family_uid, name=family_name,
                                         order=family_order,observation=observation, description=r[description_field_in[observation]],
                                                    natures=r_natures)
                    if filter_print & FAMILY:
                        print(f'// new family_name={family_name}')
                        print(cql_family_update_row)
                    ## print(f'\n// {family_uid} characteristics:')
                sub_value_order=1
                family_order+=1
            elif r['Values'] == '' and r['Sub-values'] != '':
                current_subvalue = r['Sub-values']
                if not shared_sub_characteristics:
                    current_subvalue_uid = to_uid(current_value + ' ' +current_subvalue)+'Characteristic'
                else:
                    current_subvalue_uid = to_uid(current_subvalue)+'Characteristic'                

                cql_subcharacteristic_update_row=new_subcharacteristic(parent_uid=current_value_uid, uid=current_subvalue_uid,
                                                                       name=current_subvalue, order=sub_value_order,natures=r_natures)
                if filter_print & SUBCHARACTERISTIC:
                    # print(f'\n// {current_subvalue} sub-characteristic:')
                    print(cql_subcharacteristic_update_row)
                sub_value_order+=1

            if r['Values'] != '':
                current_value = r['Values']
                sub_value_order=1
                if r['Variables'] !='':
                    #print(f'\nNew {current_value} variable Family:\n')
                    current_value_uid = to_uid(r['Characteristic'] + ' ' +current_value)+ ('CS' if observation=='Cross-section' else '') +'VarFamily'
                    cql_family_update_row=new_family(fg_uid='fg'+family_group, uid=current_value_uid, name=current_value,
                        order=family_order,observation=observation.lower(), description=r[description_field_in[observation]],
                                                     variable=True, unit=r['unit'], fieldset=fieldset, natures=r_natures)
                    if filter_print & FAMILY:
                        print(cql_family_update_row)
                    family_order+=1
                else:
                    # print(f'\nNew {current_value} characteristic:\n')
                    current_value_uid = to_uid(r['Characteristic'] + ' ' +current_value)+ ('CS' if observation=='Cross-section' else '') +'Characteristic'

                    # trick to detect end of optgroup in our particular case (1 optgroup only yet)
                    if optgroup_level:
                        optgroup= None
                        optgroup_level-=1

                    cql_characteristic_update_row = new_characteristic(family_uid=family_uid, uid=current_value_uid,
                                         name=current_value,order=characteristic_order,
                                         description=r[description_field_in[observation]],optgroup=optgroup,natures=r_natures)
                    if filter_print & CHARACTERISTIC:
                        print(cql_characteristic_update_row)

                    # family_order+=1
                    # characteristic_order = 1
                    characteristic_order += 1



In [73]:
## Interface cross-section special case

In [16]:
# we treat Roughness as a fieldset (as it includes multiple families instead of characteristics)
# to do add group/fieldset column to spreadsheet

import_family_group('Interface', filter_observation='Cross-section', fieldsets=['Roughness'], filter_print=CHARACTERISTIC) #FAMILY|CHARACTERISTIC

// new family_name=Profile
{family_uid:'interfaceProfileCSFamily', uid:'interfaceBlankCSCharacteristic', name:'Blank', order:1},
{family_uid:'interfaceProfileCSFamily', uid:'interfaceStraightCSCharacteristic', name:'Straight', order:2, description:'No waves along the observed length.'},
{family_uid:'interfaceProfileCSFamily', uid:'interfaceWavyCSCharacteristic', name:'Wavy', order:3, description:'Shows variations or undulations with a low height to length ratio. Angles are greater than 90°.'},
{family_uid:'interfaceProfileCSFamily', uid:'interfaceBumpyCSCharacteristic', name:'Bumpy', order:4, description:'Presents variations that are close in height and length. Angles can be equal to or less than 90°.'},
{family_uid:'interfaceProfileCSFamily', uid:'interfaceIrregularCSCharacteristic', name:'Irregular', order:5, description:'Presents variations whose height and length are unrelated. Angles may be variable.'},
// new family_name=Transition
{family_uid:'interfaceTransitionCSFamily', uid:'

In [55]:
print('\n //FAMILY \n')
import_family_group('Microstructure', filter_observation='Cross-section', fieldsets=[], filter_print=FAMILY, shared_sub_characteristics=True) #FAMILY|CHARACTERISTI
print('\n //CHARACTERISTICS \n')
import_family_group('Microstructure', filter_observation='Cross-section', fieldsets=[], filter_print=CHARACTERISTIC, shared_sub_characteristics=True) #FAMILY|CHARACTERISTI
print('\n //SUBCHARACTERISTICS \n')
import_family_group('Microstructure', filter_observation='Cross-section', fieldsets=[], filter_print=SUBCHARACTERISTIC, shared_sub_characteristics=True) #FAMILY|CHARACTERISTI



 //FAMILY 

// new family_name=Microstructure
{fg:'fgMicrostructure', order:1, observation:cross_section, uid:'microstructureCSFamily', name:'Microstructure', visible: False, variable: False, description:'Assessed on the observed area. The blank option corresponds to a stratum which microstructure is not known., natures:['M']'},

 //CHARACTERISTICS 

{family_uid:'microstructureCSFamily', uid:'microstructureBlankCSCharacteristic', name:'Blank', order:1, natures:['M']},
{family_uid:'microstructureCSFamily', uid:'microstructureDeformedDendritesCSCharacteristic', name:'Deformed dendrites', order:2, natures:['M'], description:'Stratum containing crystals in the form of deformed trees with branches.'},
{family_uid:'microstructureCSFamily', uid:'microstructureDendritesCSCharacteristic', name:'Dendrites', order:3, natures:['M'], description:'Stratum containing crystals in the form of trees with branches.'},
{family_uid:'microstructureCSFamily', uid:'microstructureElongatedGrainsCSCharacterist

In [44]:
import_family_group('Microstructure', filter_observation='Cross-section', fieldsets=[], filter_print=FAMILY|CHARACTERISTIC, shared_sub_characteristics=True) #FAMILY|CHARACTERISTI



// new family_name=Microstructure
{fg:'fgMicrostructure', order:1, observation:cross_section, uid:'microstructureMicrostructureCS', name:'Microstructure', visible: False, variable: False, description:'Assessed on the observed area. The blank option corresponds to a stratum which microstructure is not known., natures:['M']'},
{family_uid:'microstructureMicrostructureCS', uid:'microstructureBlankCS', name:'Blank', order:1, natures:['M']},
{family_uid:'microstructureMicrostructureCS', uid:'microstructureDeformedDendritesCS', name:'Deformed dendrites', order:2, natures:['M'], description:'Stratum containing crystals in the form of deformed trees with branches.'},
{family_uid:'microstructureMicrostructureCS', uid:'microstructureDendritesCS', name:'Dendrites', order:3, natures:['M'], description:'Stratum containing crystals in the form of trees with branches.'},
{family_uid:'microstructureMicrostructureCS', uid:'microstructureElongatedGrainsCS', name:'Elongated grains', order:4, natures:['M'

## Interface cross-section special case

In [None]:
# we treat Roughness as a fieldset (as it includes multiple families instead of characteristics)
# to do add group/fieldset column to spreadsheet

import_family_group('Interface', filter_observation='Cross-section', fieldsets=['Roughness'], filter_print=CHARACTERISTIC) #FAMILY|CHARACTERISTIC

# we treat Roughness as a fieldset (as it includes multiple families instead of characteristics)
# to do add group/fieldset column to spreadsheet

import_family_group('Interface', filter_observation='Cross-section', fieldsets=['Roughness'], filter_print=CHARACTERISTIC) #FAMILY|CHARACTERISTIC

In [None]:
# we treat Roughness as a fieldset (as it includes multiple families instead of characteristics)
# to do add group/fieldset column to spreadsheet

import_family_group('Interface', filter_observation='Cross-section', fieldsets=['Roughness'], filter_print=CHARACTERISTIC) #FAMILY|CHARACTERISTIC

In [None]:
# we treat Roughness as a fieldset (as it includes multiple families instead of characteristics)
# to do add group/fieldset column to spreadsheet

import_family_group('Microstructure', filter_observation='Cross-section', fieldsets=[], filter_print=CHARACTERISTIC) #FAMILY|CHARACTERISTIC