In [None]:
import pandas as pd

# Add the parent directory of the 'utils' directory to the Python path
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))


import utils.utils as u

-------------------------------------------------------------------------------------------------------------------

# Assignation of blanks and unknowns

-------------------------------------------------------------------------------------------------------------------

#### Load dataset

In [None]:
dataset_DF = pd.read_json('../data_catastici/data_post-processing/pipeline_steps/catastici_1741_step1.json')

#### Select TODO entries and format

In [None]:
dataset_unresolved_DF = dataset_DF[dataset_DF['owner_code'] == 'TODO'].copy()
dataset_unresolved_DF.loc[:, 'owner_name'] = dataset_unresolved_DF['owner_name'].map(lambda x: u.text_to_minimal(x))
print("There are", len(dataset_unresolved_DF), "unresolved parcel entries.")

In [None]:
dataset_unresolved_DF.head()

#### Load necessary dictionaries

In [None]:
unknowns = sorted(pd.read_json('../dictionaries/MSC_dictionary/unknown_owners.json')[0].to_list(), key=len, reverse=True)

## Start parcel assignation

In [None]:
modifications_to_commit = []

In [None]:
for index, row in dataset_unresolved_DF.iterrows():
    uidx = row['uidx']
    owner_text_minimal = row['owner_name']

    if len(owner_text_minimal) == 0 or owner_text_minimal == 'nan':
        modifications_to_commit.append({ 'uidx': uidx, 'owner_text_updated': 'blank' })
    
    else:
        for unk in unknowns:
            if unk == owner_text_minimal:
                modifications_to_commit.append({ 'uidx': uidx, 'owner_text_updated': 'unknown' })
                print('----------------------------------------------------------')
                print("UIDX", uidx)
                print("UNKNOWN TEXT  -", owner_text_minimal)
                break

print()
print()
print("There are", len(modifications_to_commit), "total modifications.")

### Commit modifications in dataset

In [None]:
for m in modifications_to_commit:
    owner_code = 'BLK' if m['owner_text_updated'] == 'blank' else 'UNK'
    dataset_DF.loc[m['uidx'], 'owner_code'] = owner_code
    dataset_DF.loc[m['uidx'], 'owner_count'] = 0

### ⚠️⚠️⚠️ Write the updated dataset to file

In [None]:
dataset_DF.to_json('../data_catastici/data_post-processing/pipeline_steps/catastici_1741_step2.json', orient='records', index=['uidx'])

----------------------------------------------------------

In [None]:
import time
print("Current time:", time.strftime("%H:%M:%S", time.localtime()))