In [9]:
import os
import pandas as pd
import openpyxl, uuid, datetime, getpass, json, csv, re;

# default datatype if unable to tell
g_default_datatype = 'UST';

# if unable to tell, use the first sheet
g_default_sheet_no = 1;


In [10]:
# Read files in the input directory for Excel or csv items without accompanying .json document

wlk = {};
for r,d,f in os.walk('input'):
    for file in f:
        if ( file.endswith(".xlsx") or file.endswith(".csv") ) and not file.startswith('~'):
            wlk[file] = True;
      
    for file in f:
        if file.endswith(".json") and os.path.splitext(file)[0] + '.xlsx' in wlk:
            wlk[os.path.splitext(file)[0] + '.xlsx'] = False;
        elif file.endswith(".json") and os.path.splitext(file)[0] + '.csv' in wlk:
            wlk[os.path.splitext(file)[0] + '.csv'] = False;
 
rez = {};
for k,v in wlk.items():
    if v is True:
        rez[k] = {};
        
print("Found " + str(len(rez)) + " files to process.")
for k in list(rez.keys()):
    print("  " + k);
    

Found 2 files to process.
  MO_LUST_for_geoprocessing-2023-06-22.csv
  MO_UST_for_geoprocessing-2023-06-21.csv


In [11]:
# Determine whether UST or LUST

for k,v in rez.items():
    print("Examining " + k);
    rez[k]['key'] = k;
    rez[k]['file'] = 'input' + os.sep + k;
    rez[k]['date_examined'] = datetime.datetime.today().strftime('%Y%m%d');
    rez[k]['username'] = getpass.getuser();
    
    if k.endswith(".xlsx"):
        rez[k]['format'] = 'xlsx';
    elif k.endswith(".csv"):
        rez[k]['format'] = 'csv';
        
    certainty = 0;
    
    lowname = k.lower();

    if re.search("\_lust[\.|\_]",lowname) is not None:
        rez[k]['datatype'] = 'LUST';
        certainty = 100;
        
    elif re.search("\_ust[\.|\_]",lowname) is not None:
        rez[k]['datatype'] = 'UST';
        certainty = 100;
        
    elif re.search("\_lust",lowname) is not None:
        rez[k]['datatype'] = 'LUST';
        certainty = 80;
        
    elif re.search("\_ust",lowname) is not None:
        rez[k]['datatype'] = 'UST';
        certainty = 80;
        
    elif re.search("^lust\_",lowname) is not None:
        rez[k]['datatype'] = 'LUST';
        certainty = 75;
        
    elif re.search("^ust\_",lowname) is not None:
        rez[k]['datatype'] = 'UST';
        certainty = 75;
        
    elif re.search("lust",lowname) is not None:
        rez[k]['datatype'] = 'LUST';
        certainty = 40;
        
    else:
        print("  unable to tell datatype from filename.");
        certainty = 0;
        
    if certainty < 100:
        
        if rez[k]['format'] == 'xlsx':
            xlsx = pd.ExcelFile('input' + os.sep + k);

            print("  checking sheet names for clues");
            for sht in xlsx.sheet_names:
                lowname = sht.lower();

                if lowname.find('_lust') > 0:
                    rez[k]['datatype'] = 'LUST';
                    certainty = 100;
                    break;

                elif lowname.find(' lust') > 0:
                    rez[k]['datatype'] = 'LUST';
                    certainty = 100;
                    break;
                    
                elif lowname.find('lust_') > 0:
                    rez[k]['datatype'] = 'LUST';
                    certainty = 100;
                    break;

                elif lowname.find('_ust') > 0:
                    rez[k]['datatype'] = 'UST';
                    certainty = 100;
                    break;

                elif lowname.find(' ust') > 0:
                    rez[k]['datatype'] = 'UST';
                    certainty = 100;
                    break;
                
                elif lowname.find('ust_') > 0:
                    rez[k]['datatype'] = 'UST';
                    certainty = 100;
                    break;

                if 'datatype' in rez[k]:
                    if rez[k]['datatype'] == 'LUST' and lowname.find('lust') > 0:
                        rez[k]['datatype'] = 'LUST';
                        certainty = 100;
                        break;

                    elif rez[k]['datatype'] == 'UST' and lowname.find('ust') > 0:
                        rez[k]['datatype'] = 'UST';
                        certainty = 100;
                        break;

            del xlsx
    
    if certainty == 0:   
        rez[k]['datatype'] = g_default_datatype;
        
    if 'error' in rez[k]:
        print("  " + rez[k]['error']);
    else:
        print("  determined datatype as " + rez[k]['datatype'] + " with " + str(certainty) + "% certainty.");


Examining MO_LUST_for_geoprocessing-2023-06-22.csv
  determined datatype as LUST with 100% certainty.
Examining MO_UST_for_geoprocessing-2023-06-21.csv
  determined datatype as UST with 100% certainty.


In [12]:
# Determine the Excel sheet to process

for k,v in rez.items():
        
    print("Examining " + k);
    
    if rez[k]['format'] == 'xlsx':
        
        xlsx = pd.ExcelFile('input' + os.sep + k);
        sht_count = len(xlsx.sheet_names)
        print("File has " + str(sht_count) + " sheets");

        if sht_count == 1:
            rez[k]['sheet_no']    = 0;
            rez[k]['sheet_name'] = xlsx.sheet_names[0];

        else:
            idx = 0;
            for sht in xlsx.sheet_names:
                lowname = sht.lower();

                if rez[k]['datatype'] == 'LUST' and lowname.find(' lust') > 0:
                    rez[k]['sheet_no']   = idx;
                    rez[k]['sheet_name'] = sht;
                    break;

                elif rez[k]['datatype'] == 'LUST' and lowname.find('_lust') > 0:
                    rez[k]['sheet_no']   = idx;
                    rez[k]['sheet_name'] = sht;
                    break;

                elif rez[k]['datatype'] == 'UST' and lowname.find('_ust') > 0:
                    rez[k]['sheet_no']   = idx;
                    rez[k]['sheet_name'] = sht;
                    break;

                elif rez[k]['datatype'] == 'UST' and lowname.find(' ust') > 0:
                    rez[k]['sheet_no']   = idx;
                    rez[k]['sheet_name'] = sht;
                    break;

                idx += 1;

            if rez[k]['sheet_no'] is None:
                rez[k]['error'] = 'Unable to determine sheet to process';

        del xlsx  
        
    else:
        rez[k]['sheet_no']    = 0;
        rez[k]['sheet_name'] = 'csv';
    
    if 'error' in rez[k]:
        print("  " + rez[k]['error']);
    else:
        print("  will process " + rez[k]['sheet_name'] + ", index " + str(rez[k]['sheet_no']));
          

Examining MO_LUST_for_geoprocessing-2023-06-22.csv
  will process csv, index 0
Examining MO_UST_for_geoprocessing-2023-06-21.csv
  will process csv, index 0


In [14]:
# Determine fields to geocode

for k,v in rez.items():
    
    if 'error' not in v:
        
        rez[k]['globalid'] = {};
        rez[k]['globalid']['exists']           = False;
        rez[k]['organization_id'] = {};
        rez[k]['organization_id']['exists']    = False;
        rez[k]['surrogate_key'] = {};
        rez[k]['surrogate_key']['exists']      = False;
        rez[k]['mapping'] = {};
        rez[k]['mapping']['sourceidentifier']  = {"colnum":None,"colname":None};
        rez[k]['mapping']['sourceidentifier2'] = {"colnum":None,"colname":None};
        rez[k]['mapping']['name']              = {"colnum":None,"colname":None};
        rez[k]['mapping']['address1']          = {"colnum":None,"colname":None};
        rez[k]['mapping']['address2']          = {"colnum":None,"colname":None};
        rez[k]['mapping']['address3']          = {"colnum":None,"colname":None};
        rez[k]['mapping']['city']              = {"colnum":None,"colname":None};
        rez[k]['mapping']['state']             = {"colnum":None,"colname":None};
        rez[k]['mapping']['zip5']              = {"colnum":None,"colname":None};
        rez[k]['mapping']['zip4']              = {"colnum":None,"colname":None};
        rez[k]['mapping']['county']            = {"colnum":None,"colname":None};
        rez[k]['mapping']['tribe']             = {"colnum":None,"colname":None};
        rez[k]['mapping']['phone']             = {"colnum":None,"colname":None};
        rez[k]['mapping']['originallat']       = {"colnum":None,"colname":None};
        rez[k]['mapping']['originallng']       = {"colnum":None,"colname":None};
        rez[k]['mapping']['originalcoordsrc']  = {"colnum":None,"colname":None};
        rez[k]['mapping']['eparegion']         = {"colnum":None,"colname":None};
        
        if rez[k]['format'] == 'xlsx':
        
            print("Examining " + rez[k]['datatype'] + ' file ' + k + ', sheet ' + str(rez[k]['sheet_no']));
            df = pd.read_excel('input' + os.sep + k,sheet_name=rez[k]['sheet_no']);
            tr = len(df.index);
            print("Sheet has " + str(tr) + " records.");

            rez[k]['record_count'] = tr;

            indx = 1; 
            for col in df.columns:
                lowname = col.lower();

                if lowname == 'globalid':
                    rez[k]['globalid']['exists']  = True;
                    rez[k]['globalid']['colnum']  = indx;
                    rez[k]['globalid']['colname'] = 'globalid';

                elif lowname == 'organization_id':
                    rez[k]['organization_id']['exists']  = True;
                    rez[k]['organization_id']['colnum']  = indx;
                    rez[k]['organization_id']['colname'] = col;
                    
                elif lowname == 'lust_location_id':
                    rez[k]['surrogate_key']['exists']  = True;
                    rez[k]['surrogate_key']['colnum']  = indx;
                    rez[k]['surrogate_key']['colname'] = col;
                    
                elif lowname == 'ust_facilities_id':
                    rez[k]['surrogate_key']['exists']  = True;
                    rez[k]['surrogate_key']['colnum']  = indx;
                    rez[k]['surrogate_key']['colname'] = col;

                # make sure we only take the first matching instance from left to right
                elif rez[k]['datatype'] == 'UST':
                    
                    if rez[k]['mapping']['sourceidentifier']['colname'] is None \
                    and lowname == 'facilityid':
                        rez[k]['mapping']['sourceidentifier']['colnum']  = indx;
                        rez[k]['mapping']['sourceidentifier']['colname'] = col;

                    elif rez[k]['mapping']['sourceidentifier2']['colname'] is None \
                    and lowname == 'tankid':
                        rez[k]['mapping']['sourceidentifier2']['colnum']  = indx;
                        rez[k]['mapping']['sourceidentifier2']['colname'] = col;

                    elif rez[k]['mapping']['name']['colname'] is None \
                    and lowname == 'facilityname':
                        rez[k]['mapping']['name']['colnum']  = indx;
                        rez[k]['mapping']['name']['colname'] = col;

                    elif rez[k]['mapping']['address1']['colname'] is None \
                    and lowname in ['facilityaddress','facilityaddress1']:
                        rez[k]['mapping']['address1']['colnum']  = indx;
                        rez[k]['mapping']['address1']['colname'] = col;

                    elif rez[k]['mapping']['address2']['colname'] is None \
                    and lowname == 'facilityaddress2':
                        rez[k]['mapping']['address2']['colnum']  = indx;
                        rez[k]['mapping']['address2']['colname'] = col;

                    elif rez[k]['mapping']['address3']['colname'] is None \
                    and lowname == 'facilityaddress3':
                        rez[k]['mapping']['address3']['colnum']  = indx;
                        rez[k]['mapping']['address3']['colname'] = col;

                    elif rez[k]['mapping']['city']['colname'] is None \
                    and lowname == 'facilitycity':
                        rez[k]['mapping']['city']['colnum']  = indx;
                        rez[k]['mapping']['city']['colname'] = col;

                    elif rez[k]['mapping']['county']['colname'] is None \
                    and lowname == 'facilitycounty':
                        rez[k]['mapping']['county']['colnum']  = indx;
                        rez[k]['mapping']['county']['colname'] = col;

                    elif rez[k]['mapping']['state']['colname'] is None \
                    and lowname == 'facilitystate':
                        rez[k]['mapping']['state']['colnum']  = indx;
                        rez[k]['mapping']['state']['colname'] = col;

                    elif rez[k]['mapping']['zip5']['colname'] is None \
                    and lowname in ['facilityzipcode','zipcode']:
                        rez[k]['mapping']['zip5']['colnum']  = indx;
                        rez[k]['mapping']['zip5']['colname'] = col;

                    elif rez[k]['mapping']['zip4']['colname'] is None \
                    and lowname == 'zip4':
                        rez[k]['mapping']['zip4']['colnum']  = indx;
                        rez[k]['mapping']['zip4']['colname'] = col;

                    elif rez[k]['mapping']['phone']['colname'] is None \
                    and lowname in ['facilityphone','facilityphonenumber']:
                        rez[k]['mapping']['phone']['colnum']  = indx;
                        rez[k]['mapping']['phone']['colname'] = col;

                    elif rez[k]['mapping']['tribe']['colname'] is None \
                    and lowname in ['facilitytribename','facilitytribe']:
                        rez[k]['mapping']['tribe']['colnum']  = indx;
                        rez[k]['mapping']['tribe']['colname'] = col;

                    elif rez[k]['mapping']['originallat']['colname'] is None \
                    and lowname == 'facilitylatitude':
                        rez[k]['mapping']['originallat']['colnum']  = indx;
                        rez[k]['mapping']['originallat']['colname'] = col;

                    elif rez[k]['mapping']['originallng']['colname'] is None \
                    and lowname == 'facilitylongitude':
                        rez[k]['mapping']['originallng']['colnum']  = indx;
                        rez[k]['mapping']['originallng']['colname'] = col;
                        
                    elif rez[k]['mapping']['originalcoordsrc']['colname'] is None \
                    and lowname == 'facilitycoordinatesource':
                        rez[k]['mapping']['originalcoordsrc']['colnum']  = indx;
                        rez[k]['mapping']['originalcoordsrc']['colname'] = col;

                    elif rez[k]['mapping']['eparegion']['colname'] is None \
                    and lowname == 'facilityeparegion':
                        rez[k]['mapping']['eparegion']['colnum']  = indx;
                        rez[k]['mapping']['eparegion']['colname'] = col;

                elif rez[k]['datatype'] == 'LUST':
                    
                    if rez[k]['mapping']['sourceidentifier']['colname'] is None \
                    and lowname == 'facilityid':
                        rez[k]['mapping']['sourceidentifier']['colnum']  = indx;
                        rez[k]['mapping']['sourceidentifier']['colname'] = col;

                    elif rez[k]['mapping']['sourceidentifier2']['colname'] is None \
                    and lowname == 'lustid':
                        rez[k]['mapping']['sourceidentifier2']['colnum']  = indx;
                        rez[k]['mapping']['sourceidentifier2']['colname'] = col;

                    elif rez[k]['mapping']['name']['colname'] is None \
                    and lowname == 'sitename':
                        rez[k]['mapping']['name']['colnum']  = indx;
                        rez[k]['mapping']['name']['colname'] = col;

                    elif rez[k]['mapping']['address1']['colname'] is None \
                    and lowname in ['siteaddress','siteaddress1']:
                        rez[k]['mapping']['address1']['colnum']  = indx;
                        rez[k]['mapping']['address1']['colname'] = col;

                    elif rez[k]['mapping']['address2']['colname'] is None \
                    and lowname == 'siteaddress2':
                        rez[k]['mapping']['address2']['colnum']  = indx;
                        rez[k]['mapping']['address2']['colname'] = col;

                    elif rez[k]['mapping']['address3']['colname'] is None \
                    and lowname == 'siteaddress3':
                        rez[k]['mapping']['address3']['colnum']  = indx;
                        rez[k]['mapping']['address3']['colname'] = col;

                    elif rez[k]['mapping']['city']['colname'] is None \
                    and lowname == 'sitecity':
                        rez[k]['mapping']['city']['colnum']  = indx;
                        rez[k]['mapping']['city']['colname'] = col;

                    elif rez[k]['mapping']['county']['colname'] is None \
                    and lowname in ['sitecounty','county']:
                        rez[k]['mapping']['county']['colnum']  = indx;
                        rez[k]['mapping']['county']['colname'] = col;

                    elif rez[k]['mapping']['state']['colname'] is None \
                    and lowname in ['sitestate']:
                        rez[k]['mapping']['state']['colnum']  = indx;
                        rez[k]['mapping']['state']['colname'] = col;

                    elif rez[k]['mapping']['zip5']['colname'] is None \
                    and lowname in ['sitezipcode','zipcode']:
                        rez[k]['mapping']['zip5']['colnum']  = indx;
                        rez[k]['mapping']['zip5']['colname'] = col;

                    elif rez[k]['mapping']['zip4']['colname'] is None \
                    and lowname == 'zip4':
                        rez[k]['mapping']['zip4']['colnum']  = indx;
                        rez[k]['mapping']['zip4']['colname'] = col;

                    elif rez[k]['mapping']['phone']['colname'] is None \
                    and lowname == 'sitephone':
                        rez[k]['mapping']['phone']['colnum']  = indx;
                        rez[k]['mapping']['phone']['colname'] = col;

                    elif rez[k]['mapping']['tribe']['colname'] is None \
                    and lowname == 'tribename':
                        rez[k]['mapping']['tribe']['colnum']  = indx;
                        rez[k]['mapping']['tribe']['colname'] = col;

                    elif rez[k]['mapping']['originallat']['colname'] is None \
                    and lowname == 'latitude':
                        rez[k]['mapping']['originallat']['colnum']  = indx;
                        rez[k]['mapping']['originallat']['colname'] = col;

                    elif rez[k]['mapping']['originallng']['colname'] is None \
                    and lowname == 'longitude':
                        rez[k]['mapping']['originallng']['colnum']  = indx;
                        rez[k]['mapping']['originallng']['colname'] = col;
                        
                    elif rez[k]['mapping']['originalcoordsrc']['colname'] is None \
                    and lowname == 'coordinatesource':
                        rez[k]['mapping']['originalcoordsrc']['colnum']  = indx;
                        rez[k]['mapping']['originalcoordsrc']['colname'] = col;

                    elif rez[k]['mapping']['eparegion']['colname'] is None \
                    and lowname == 'eparegion':
                        rez[k]['mapping']['eparegion']['colnum']  = indx;
                        rez[k]['mapping']['eparegion']['colname'] = col;

                indx += 1;

            del df;
            
        else:
            print("Examining " + rez[k]['datatype'] + ' file ' + k);
            
            with open('input' + os.sep + k) as f:
                line_count = sum(1 for line in f);
            print("Sheet has " + str(line_count - 1) + " records.");
            
            rez[k]['record_count'] = line_count - 1;
                  
            with open('input' + os.sep + k) as csv_file:
                csv_reader = csv.reader(csv_file, delimiter = ',');
                column_names = [];
                  
                headers = next(csv_reader);

                indx = 1;
                for col in headers:
                    lowname = col.lower();

                    if lowname == 'globalid':
                        rez[k]['globalid']['exists']  = True;
                        rez[k]['globalid']['colnum']  = indx;
                        rez[k]['globalid']['colname'] = 'globalid';

                    elif lowname == 'organization_id':
                        rez[k]['organization_id']['exists']  = True;
                        rez[k]['organization_id']['colnum']  = indx;
                        rez[k]['organization_id']['colname'] = col;
                        
                    elif lowname in ['lust_location_id']:
                        rez[k]['surrogate_key']['exists']  = True;
                        rez[k]['surrogate_key']['colnum']  = indx;
                        rez[k]['surrogate_key']['colname'] = col;

                    elif lowname in ['ust_facilities_id']:
                        rez[k]['surrogate_key']['exists']  = True;
                        rez[k]['surrogate_key']['colnum']  = indx;
                        rez[k]['surrogate_key']['colname'] = col;

                    elif rez[k]['datatype'] == 'UST':
                        
                        if rez[k]['mapping']['sourceidentifier']['colname'] is None \
                        and lowname == 'facilityid':
                            rez[k]['mapping']['sourceidentifier']['colnum']  = indx;
                            rez[k]['mapping']['sourceidentifier']['colname'] = col;

                        elif rez[k]['mapping']['sourceidentifier2']['colname'] is None \
                        and lowname == 'tankid':
                            rez[k]['mapping']['sourceidentifier2']['colnum']  = indx;
                            rez[k]['mapping']['sourceidentifier2']['colname'] = col;

                        elif rez[k]['mapping']['name']['colname'] is None \
                        and lowname == 'facilityname':
                            rez[k]['mapping']['name']['colnum']  = indx;
                            rez[k]['mapping']['name']['colname'] = col;

                        elif rez[k]['mapping']['address1']['colname'] is None \
                        and lowname in ['facilityaddress','facilityaddress1']:
                            rez[k]['mapping']['address1']['colnum']  = indx;
                            rez[k]['mapping']['address1']['colname'] = col;

                        elif rez[k]['mapping']['address2']['colname'] is None \
                        and lowname == 'facilityaddress2':
                            rez[k]['mapping']['address2']['colnum']  = indx;
                            rez[k]['mapping']['address2']['colname'] = col;

                        elif rez[k]['mapping']['address3']['colname'] is None \
                        and lowname == 'facilityaddress3':
                            rez[k]['mapping']['address3']['colnum']  = indx;
                            rez[k]['mapping']['address3']['colname'] = col;

                        elif rez[k]['mapping']['city']['colname'] is None \
                        and lowname == 'facilitycity':
                            rez[k]['mapping']['city']['colnum']  = indx;
                            rez[k]['mapping']['city']['colname'] = col;

                        elif rez[k]['mapping']['county']['colname'] is None \
                        and lowname == 'facilitycounty':
                            rez[k]['mapping']['county']['colnum']  = indx;
                            rez[k]['mapping']['county']['colname'] = col;

                        elif rez[k]['mapping']['state']['colname'] is None \
                        and lowname == 'facilitystate':
                            rez[k]['mapping']['state']['colnum']  = indx;
                            rez[k]['mapping']['state']['colname'] = col;

                        elif rez[k]['mapping']['zip5']['colname'] is None \
                        and lowname in ['facilityzipcode','zipcode']:
                            rez[k]['mapping']['zip5']['colnum']  = indx;
                            rez[k]['mapping']['zip5']['colname'] = col;

                        elif rez[k]['mapping']['zip4']['colname'] is None \
                        and lowname == 'zip4':
                            rez[k]['mapping']['zip4']['colnum']  = indx;
                            rez[k]['mapping']['zip4']['colname'] = col;

                        elif rez[k]['mapping']['phone']['colname'] is None \
                        and lowname in ['facilityphone','facilityphonenumber']:
                            rez[k]['mapping']['phone']['colnum']  = indx;
                            rez[k]['mapping']['phone']['colname'] = col;

                        elif rez[k]['mapping']['tribe']['colname'] is None \
                        and lowname in ['facilitytribename','facilitytribe']:
                            rez[k]['mapping']['tribe']['colnum']  = indx;
                            rez[k]['mapping']['tribe']['colname'] = col;

                        elif rez[k]['mapping']['originallat']['colname'] is None \
                        and lowname == 'facilitylatitude':
                            rez[k]['mapping']['originallat']['colnum']  = indx;
                            rez[k]['mapping']['originallat']['colname'] = col;

                        elif rez[k]['mapping']['originallng']['colname'] is None \
                        and lowname == 'facilitylongitude':
                            rez[k]['mapping']['originallng']['colnum']  = indx;
                            rez[k]['mapping']['originallng']['colname'] = col;
                            
                        elif rez[k]['mapping']['originalcoordsrc']['colname'] is None \
                        and lowname == 'facilitycoordinatesource':
                            rez[k]['mapping']['originalcoordsrc']['colnum']  = indx;
                            rez[k]['mapping']['originalcoordsrc']['colname'] = col;

                        elif rez[k]['mapping']['eparegion']['colname'] is None \
                        and lowname == 'facilityeparegion':
                            rez[k]['mapping']['eparegion']['colnum']  = indx;
                            rez[k]['mapping']['eparegion']['colname'] = col;

                    elif rez[k]['datatype'] == 'LUST':
                        
                        if rez[k]['mapping']['sourceidentifier']['colname'] is None \
                        and lowname == 'lustid':
                            rez[k]['mapping']['sourceidentifier']['colnum']  = indx;
                            rez[k]['mapping']['sourceidentifier']['colname'] = col;

                        elif rez[k]['mapping']['name']['colname'] is None \
                        and lowname == 'sitename':
                            rez[k]['mapping']['name']['colnum']  = indx;
                            rez[k]['mapping']['name']['colname'] = col;

                        elif rez[k]['mapping']['address1']['colname'] is None \
                        and lowname in ['siteaddress','siteaddress1']:
                            rez[k]['mapping']['address1']['colnum']  = indx;
                            rez[k]['mapping']['address1']['colname'] = col;

                        elif rez[k]['mapping']['address2']['colname'] is None \
                        and lowname == 'siteaddress2':
                            rez[k]['mapping']['address2']['colnum']  = indx;
                            rez[k]['mapping']['address2']['colname'] = col;

                        elif rez[k]['mapping']['address3']['colname'] is None \
                        and lowname == 'siteaddress3':
                            rez[k]['mapping']['address3']['colnum']  = indx;
                            rez[k]['mapping']['address3']['colname'] = col;

                        elif rez[k]['mapping']['city']['colname'] is None \
                        and lowname == 'sitecity':
                            rez[k]['mapping']['city']['colnum']  = indx;
                            rez[k]['mapping']['city']['colname'] = col;

                        elif rez[k]['mapping']['county']['colname'] is None \
                        and lowname in ['sitecounty','county']:
                            rez[k]['mapping']['county']['colnum']  = indx;
                            rez[k]['mapping']['county']['colname'] = col;

                        elif rez[k]['mapping']['state']['colname'] is None \
                        and lowname in ['sitestate','state']:
                            rez[k]['mapping']['state']['colnum']  = indx;
                            rez[k]['mapping']['state']['colname'] = col;

                        elif rez[k]['mapping']['zip5']['colname'] is None \
                        and lowname in ['sitezipcode','zipcode']:
                            rez[k]['mapping']['zip5']['colnum']  = indx;
                            rez[k]['mapping']['zip5']['colname'] = col;

                        elif rez[k]['mapping']['zip4']['colname'] is None \
                        and lowname == 'zip4':
                            rez[k]['mapping']['zip4']['colnum']  = indx;
                            rez[k]['mapping']['zip4']['colname'] = col;

                        elif rez[k]['mapping']['phone']['colname'] is None \
                        and lowname == 'sitephone':
                            rez[k]['mapping']['phone']['colnum']  = indx;
                            rez[k]['mapping']['phone']['colname'] = col;

                        elif rez[k]['mapping']['tribe']['colname'] is None \
                        and lowname == 'tribename':
                            rez[k]['mapping']['tribe']['colnum']  = indx;
                            rez[k]['mapping']['tribe']['colname'] = col;

                        elif rez[k]['mapping']['originallat']['colname'] is None \
                        and lowname == 'latitude':
                            rez[k]['mapping']['originallat']['colnum']  = indx;
                            rez[k]['mapping']['originallat']['colname'] = col;

                        elif rez[k]['mapping']['originallng']['colname'] is None \
                        and lowname == 'longitude':
                            rez[k]['mapping']['originallng']['colnum']  = indx;
                            rez[k]['mapping']['originallng']['colname'] = col;
                            
                        elif rez[k]['mapping']['originalcoordsrc']['colname'] is None \
                        and lowname == 'coordinatesource':
                            rez[k]['mapping']['originalcoordsrc']['colnum']  = indx;
                            rez[k]['mapping']['originalcoordsrc']['colname'] = col;

                        elif rez[k]['mapping']['eparegion']['colname'] is None \
                        and lowname == 'eparegion':
                            rez[k]['mapping']['eparegion']['colnum']  = indx;
                            rez[k]['mapping']['eparegion']['colname'] = col;

                    indx += 1;
                    
        print("  surrogate_key: " + str(rez[k]['surrogate_key']));     
        print("  organization_id: " + str(rez[k]['organization_id']));

        for k1,v1 in sorted(rez[k]['mapping'].items()):
            if v1['colname'] is not None:
                print("  " + k1 + ": " + str(v1));
            
        print(" ");
        

Examining LUST file MO_LUST_for_geoprocessing-2023-06-22.csv
Sheet has 48 records.
  surrogate_key: {'exists': True, 'colnum': 1, 'colname': 'lust_location_id'}
  organization_id:{'exists': True, 'colnum': 3, 'colname': 'organization_id'}
  address1: {'colnum': 6, 'colname': 'SiteAddress'}
  address2: {'colnum': 7, 'colname': 'SiteAddress2'}
  city: {'colnum': 8, 'colname': 'SiteCity'}
  county: {'colnum': 10, 'colname': 'County'}
  name: {'colnum': 5, 'colname': 'SiteName'}
  originallat: {'colnum': 12, 'colname': 'Latitude'}
  originallng: {'colnum': 13, 'colname': 'Longitude'}
  sourceidentifier: {'colnum': 4, 'colname': 'LUSTID'}
  state: {'colnum': 11, 'colname': 'State'}
  zip5: {'colnum': 9, 'colname': 'Zipcode'}
 
Examining UST file MO_UST_for_geoprocessing-2023-06-21.csv
Sheet has 451 records.
  surrogate_key: {'exists': True, 'colnum': 1, 'colname': 'ust_facilities_id'}
  organization_id:{'exists': True, 'colnum': 3, 'colname': 'organization_id'}
  address1: {'colnum': 6, 'co

In [15]:
# Write out the companion json file

for k,v in rez.items():
    if 'error' not in v:
        
        fn = 'input' + os.sep + os.path.splitext(k)[0] + '.json';
        if os.path.exists(fn):
            os.remove(fn);
        
        print("writing out companion file for " + k);
        with open(fn, 'w') as fp:
            json.dump(v,fp,indent = 3);
        

writing out companion file for MO_LUST_for_geoprocessing-2023-06-22.csv
writing out companion file for MO_UST_for_geoprocessing-2023-06-21.csv
