# ISO TC/211 Harmonised Model Dependency Controls

Connect to the EA app and model repository

In [1]:
from Parameters import *
from EAConnect import *
from HM_Controls import *
import sys
import pandas as pd

# Open EA Repository and find Model
eaApp = openEAapp()
eaRepo = openEArepo(eaApp,repo_path)
try:
    cmMod = eaRepo.Models.GetByName(modelName)
    printTS('Model "' + modelName + '" found with PackageGUID ' + cmMod.PackageGUID )
    printTS('Number of main packages: ' + str(cmMod.Packages.Count))
except Exception as e:
    printTS('Model  "' + modelName + '" not found!')
    # closeEA(eaRepo)
    # sys.exit()

timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

2025-04-11 09:04:06   Hi EA - are you there? 
2025-04-11 09:04:06   I am here
2025-04-11 09:04:06   Hi EA - Please open this repository: C:\Data\GitHub\ISO TC211\HMMG\EditorialVersion\ISOTC211_HM EditorialVersion.qea
2025-04-11 09:04:07   OK! Repository C:\Data\GitHub\ISO TC211\HMMG\EditorialVersion\ISOTC211_HM EditorialVersion.qea is ready!
2025-04-11 09:04:07   Model "Conceptual Models" found with PackageGUID {7B6B28E9-C583-4363-9E9C-F37A37AE06C9}
2025-04-11 09:04:07   Number of main packages: 5


Get the TC 211 main package

In [2]:
tcmName = 'ISO TC211'
try:
    tcMod = cmMod.Packages.GetByName(tcmName)
    printTS('Model "' + tcmName + '" found with PackageGUID ' + tcMod.PackageGUID )
except Exception as e:
    printTS('Model  "' + tcmName + '" not found!')

printTS('Number of main packages: ' + str(tcMod.Packages.Count))

2025-04-11 09:04:09   Model "ISO TC211" found with PackageGUID {CAB2E56D-50FA-4904-A16C-B34D7AE325B6}
2025-04-11 09:04:09   Number of main packages: 59


 # Create DataFrame for elements
 - Loop through the complete model:
    - For each element: add content to rows for ElementID, GUID, main standard, standard and element name

In [3]:
# This script lists all elements (classifiers) in the whole HM

def recElements(pck,df):
    for eaEl in pck.Elements:
        # print('Element: ' + el.Name)
        if eaEl.Type.upper() in ["CLASS","INTERFACE", "DATATYPE","ENUMERATION"]:
           df.loc[len(df)] = [eaEl.ElementID, eaEl.ElementGUID, stPck.PackageGUID, stPck.Name,edPck.PackageGUID, edPck.Name,eaEl.Name,eaEl.Type] 

    for p in pck.Packages:
        recElements(p,df)
    return df

dfEl = pd.DataFrame(columns=['ElementID','GUID','msGUID','MainStandard','edGUID', 'Edition', 'ElementName','Type'])
# 'ElementID' = Internal ElementID for the classifier
# 'GUID' = GUID for the classifiser
# 'msGUID' = GUID for the main standard (e.g. GUID for the package ISO 19115 Metadata)
# 'MainStandard' = Name of the main standard
# 'edGUID' = GUID for the edition package (e.g. the GUID for the package ISO 19115-1 Edition 1 (Amendment 1))
# 'Edition' = Name of the edition
# 'ElementName' = Name of the classifier
# 'Type' = Classifier type (Class, Interface etc)

for stPck in tcMod.Packages:
    printTS('Standard: ' + stPck.Name)
    if stPck.Elements.Count > 0:
        edPck = stPck
        printTS('Edition: ' + edPck.Name)
        dfEl = recElements(edPck,dfEl)        
    for edPck in stPck.Packages:
        printTS('Edition: ' + edPck.Name)
        dfEl = recElements(edPck,dfEl)


2025-04-11 09:04:13   Standard: Common types
2025-04-11 09:04:13   Standard: Package Context Diagrams
2025-04-11 09:04:13   Standard: ISO 6709 Standard representation of geographic point location by coordinates
2025-04-11 09:04:13   Edition: ISO 6709 Edition 2
2025-04-11 09:04:13   Edition: ISO 6709 Edition 1 (Corrigendum 1)
2025-04-11 09:04:13   Standard: ISO 19101 Reference model
2025-04-11 09:04:13   Edition: ISO 19101-1 Edition 1
2025-04-11 09:04:14   Edition: ISO 19101-2 Edition 1
2025-04-11 09:04:14   Edition: ISO/TS 19101-2 Edition 1
2025-04-11 09:04:14   Standard: ISO 19103 Conceptual schema language
2025-04-11 09:04:14   Edition: ISO 19103 Edition 2
2025-04-11 09:04:14   Edition: ISO 19103 Edition 2 Informative content
2025-04-11 09:04:15   Edition: ISO 19103 Edition 1
2025-04-11 09:04:15   Edition: ISO TS 19103 Edition 1
2025-04-11 09:04:16   Standard: ISO 19104 Terminology
2025-04-11 09:04:16   Edition: ISO 19104 Edition 1
2025-04-11 09:04:16   Standard: ISO 19105 Conformanc

 # Create DataFrame for properties
 - Loop through the complete model:
    - For each attribute or association end: add content to rows for elementID, GUID, main standard, standard, attribute/association name and refElementIDIdentifies all properties and which element it is referring to.

Only navigable aggregation and association ends are considered. All other types of connectors are ignored.
- Dependencies and realisations are considered more weak relations
- Generalizations may be relevant on the specialization side

In [4]:
# This script lists all properties (attributes and association ends) in the whole HM
# NOTE: Only navigable aggregation and association ends are considered. All other types of connectors are ignored.
# Dependencies and realisations are considered more weak relations
# Generalizations may be relevant from the specialization side

def recProperties(pck,df):
    for eaEl in pck.Elements:
        # print('Element: ' + el.Name)
        if eaEl.Type.upper() in ["CLASS","INTERFACE", "DATATYPE","ENUMERATION"]:
            for eaProp in eaEl.Attributes:
                df.loc[len(df)] = [eaEl.ElementID, eaEl.ElementGUID, stPck.PackageGUID, stPck.Name,edPck.PackageGUID,edPck.Name, eaProp.Name,'Attribute',eaProp.ClassifierID] 
             #Loop for connector dependencies
            for eaCon in eaEl.Connectors:
                if eaCon.Type in ['Aggregation','Association','Generalization']:
                    if eaCon.SupplierID == eaEl.ElementID:
                        cEnd = eaCon.ClientEnd
                        ClassifierID = eaCon.ClientID 
                    else:
                        cEnd = eaCon.SupplierEnd
                        ClassifierID = eaCon.SupplierID

                    if (not cEnd.Navigable == 'Non-Navigable') or (eaCon.Type == 'Generalization'): # and eaCon.ClientID == eaEl.ElementID):
                        df.loc[len(df)] = [eaEl.ElementID, eaEl.ElementGUID, stPck.PackageGUID, stPck.Name,edPck.PackageGUID,edPck.Name, cEnd.Role,eaCon.Type,ClassifierID] 


    for p in pck.Packages:
        recProperties(p,df)
    return df

dfProp = pd.DataFrame(columns=['ElementID','GUID','msGUID','MainStandard','edGUID', 'Edition', 'PropertyName','Type','refElementID'])
# 'ElementID' = Internal ElementID for the classifier that the property is part of
# 'GUID' = GUID for the classifiser
# 'msGUID'= GUID for the main standard (e.g. GUID for the package ISO 19115 Metadata)
# 'MainStandard' = Name of the main standard
# 'edGUID' = GUID for the edition package (e.g. the GUID for the package ISO 19115-1 Edition 1 (Amendment 1
# 'Edition'= Name of the edition
# 'PropertyName' =  Name of the property
# 'Type' = Type of property (attribute, association, aggregation etc)
# 'refElementID' = The ElementID that the property refers to


for stPck in tcMod.Packages:
    printTS('Standard: ' + stPck.Name)
    if stPck.Elements.Count > 0:
        edPck = stPck
        printTS('Edition: ' + edPck.Name)
        dfProp = recProperties(edPck,dfProp)        
    for edPck in stPck.Packages:
        printTS('Edition: ' + edPck.Name)
        dfProp = recProperties(edPck,dfProp)

2025-04-11 09:05:34   Standard: Common types
2025-04-11 09:05:34   Standard: Package Context Diagrams
2025-04-11 09:05:34   Standard: ISO 6709 Standard representation of geographic point location by coordinates
2025-04-11 09:05:35   Edition: ISO 6709 Edition 2
2025-04-11 09:05:35   Edition: ISO 6709 Edition 1 (Corrigendum 1)
2025-04-11 09:05:35   Standard: ISO 19101 Reference model
2025-04-11 09:05:35   Edition: ISO 19101-1 Edition 1
2025-04-11 09:05:35   Edition: ISO 19101-2 Edition 1
2025-04-11 09:05:35   Edition: ISO/TS 19101-2 Edition 1
2025-04-11 09:05:36   Standard: ISO 19103 Conceptual schema language
2025-04-11 09:05:36   Edition: ISO 19103 Edition 2
2025-04-11 09:05:36   Edition: ISO 19103 Edition 2 Informative content
2025-04-11 09:05:37   Edition: ISO 19103 Edition 1
2025-04-11 09:05:39   Edition: ISO TS 19103 Edition 1
2025-04-11 09:05:40   Standard: ISO 19104 Terminology
2025-04-11 09:05:40   Edition: ISO 19104 Edition 1
2025-04-11 09:05:40   Standard: ISO 19105 Conformanc

In [5]:
#Print statistics for property types
dfpropCounts = dfProp.groupby(['Type'])['ElementID'].count().reset_index()
dfpropCounts

Unnamed: 0,Type,ElementID
0,Aggregation,1084
1,Association,4445
2,Attribute,17035
3,Generalization,7476


# Count references to each element
- per element
- per element and edition
- per element and main standard

In [6]:
# NOTE: The output of this script is dfElStat, which becommes the sheet Elements in Excel

# Count the total number of references to each element. 
# Group by referenced element (refElementID) and count references from (ElementID)
dfElCounts = dfProp.groupby(['refElementID'])['ElementID'].count().reset_index()
dfElCounts.rename(columns={'ElementID': 'TotalReferences'}, inplace=True)
# print(dfElCounts)
# Merge the statistics (TotalReferences) to the Element table 
result_df = pd.merge(dfEl, dfElCounts, left_on='ElementID', right_on='refElementID', how='left')
result_df.drop(columns=['refElementID'], inplace=True)
result_df.fillna({'TotalReferences':0},inplace=True)
result_df['TotalReferences'] = result_df['TotalReferences'].astype(int)
dfElStat = result_df

# Count the total number of references from each edition package to each element
# Group by Edition (edGUID) and the referenced Element (refElement)
dfElCounts = dfProp.groupby(['refElementID','edGUID'])['ElementID'].count().reset_index()
dfElCounts.rename(columns={'ElementID': 'EditionInternalReferences'}, inplace=True)
# Merge the statistics to the Element table by joining on ElementID and Edition (edGUID)
# NOTE: This join gives the number of references to the elements from elements within the same edition package (internal references)
result_df = pd.merge(dfElStat, dfElCounts, left_on=['ElementID','edGUID'], right_on=['refElementID','edGUID'], how='left')
result_df.drop(columns=['refElementID'], inplace=True)
result_df.fillna({'EditionInternalReferences':0},inplace=True)
result_df['EditionInternalReferences'] = result_df['EditionInternalReferences'].astype(int)
dfElStat = result_df

# Count the total number of references from each main standards package to each element
# Group by msGUID and the referenced Element (refElement)
dfElCounts = dfProp.groupby(['refElementID','msGUID'])['ElementID'].count().reset_index()
dfElCounts.rename(columns={'ElementID': 'StandardInternalReferences'}, inplace=True)
# Merge on the edGUID and refElement
# NOTE: This join gives the number of references to the elements from elements within the same main standard package (standard internal references)
result_df = pd.merge(dfElStat, dfElCounts, left_on=['ElementID','msGUID'], right_on=['refElementID','msGUID'], how='left')
result_df.drop(columns=['refElementID'], inplace=True)
result_df.fillna({'StandardInternalReferences':0},inplace=True)
result_df['StandardInternalReferences'] = result_df['StandardInternalReferences'].astype(int)
dfElStat = result_df
# Count external references (The difference between standard internal and the total)
dfElStat['ExternalReferences'] = dfElStat['TotalReferences'] - dfElStat['StandardInternalReferences']
# dfElStat.drop(columns=['StandardInternalReferences','EditionInternalReferences'], inplace=True)


In [None]:
# Statistics per element name per main standard (to summarize all references for different versions of the same element)
# # NOTE: The output of this script is elStatByName, which becommes the sheet Element by standard in Excel

elStatByName = dfElStat.groupby(['msGUID', 'MainStandard','ElementName']).agg(
    Versions=pd.NamedAgg(column='ElementID', aggfunc='count'),
    ExternalReferences=pd.NamedAgg(column='ExternalReferences', aggfunc='sum'),
    TotalReferences=pd.NamedAgg(column='TotalReferences', aggfunc='sum')
).reset_index()


In [25]:
# Extend the properties dataframe with element name, edition etc for the referenced element 

dfRefEls = dfElStat[['ElementID','ElementName','msGUID','MainStandard','edGUID','Edition']].add_prefix('ref')
dfPropWithNames = pd.merge(dfProp, dfRefEls, left_on='refElementID', right_on='refElementID', how='left')
print(str(len(dfPropWithNames)))
# Add the total number of references for referenced elements
dfRefElStat= elStatByName.add_prefix('count')
dfPropWithNames = pd.merge(dfPropWithNames, dfRefElStat, left_on=['refMainStandard','refElementName'], right_on=['countMainStandard','countElementName'], how='left')
dfPropWithNames.drop(columns=['countMainStandard','countElementName','countmsGUID'], inplace=True)
# dfPropWithNames.rename(columns={'countExternalReferences': 'refExternalReferences'}, inplace=True)


30040


In [None]:
# More statistics: Number of external editions and external mian standards that refer to each element

#Create filtered DataFrame with only properties that refer to an element outside of its main standard 
filtered_dfPropWithNames = dfPropWithNames[(dfPropWithNames['MainStandard'] != dfPropWithNames['refMainStandard']) & (dfPropWithNames['refElementID'] != 0)]
print(str(len(filtered_dfPropWithNames)))

# Count the number of external editions that refer to each element                                   
# Group dfPropCounts by refElementID and edGUID
result_df = filtered_dfPropWithNames.groupby(['refElementID','edGUID'])['ElementID'].count().reset_index()
result_df.drop(columns=['ElementID'], inplace=True)
dfPropCounts = result_df
print(dfPropCounts.head())
# Group the result by refElementID, count edGUID
result_df = dfPropCounts.groupby(['refElementID'])['edGUID'].count().reset_index()
dfPropCounts = result_df
dfPropCounts.rename(columns={'edGUID': 'countExternalEditions'}, inplace=True)
# Merge (Left join) to dfPropWithNames
print(dfPropCounts.head())
result_df = pd.merge(dfPropWithNames, dfPropCounts, left_on='refElementID', right_on='refElementID', how='left')
result_df.fillna({'countExternalEditions':0},inplace=True)
result_df['countExternalEditions'] = result_df['countExternalEditions'].astype(int)
dfPropWithNames = result_df

#Count the number of external standards that refer to each element
# Group filtered_dfPropWithNames by refElementID and msGUID
result_df = filtered_dfPropWithNames.groupby(['refElementID','msGUID'])['ElementID'].count().reset_index()
result_df.drop(columns=['ElementID'], inplace=True)
dfPropCounts = result_df
print(dfPropCounts.head())
# Group the result by refElementID, count msGUID
result_df = dfPropCounts.groupby(['refElementID'])['msGUID'].count().reset_index()
dfPropCounts = result_df
dfPropCounts.rename(columns={'msGUID': 'countExternalStandards'}, inplace=True)
# Merge (Left join) to dfPropWithNames
print(dfPropCounts.head())
result_df = pd.merge(dfPropWithNames, dfPropCounts, left_on='refElementID', right_on='refElementID', how='left')
result_df.fillna({'countExternalStandards':0},inplace=True)
result_df['countExternalStandards'] = result_df['countExternalStandards'].astype(int)
dfPropWithNames = result_df

# TODO
# Count the number og individual editions and main standard that refer to an element based on unique names per main standard
# Group dfPropCounts by refmsGUID, refElementName and edGUID
result_df = filtered_dfPropWithNames.groupby(['refmsGUID','refElementName','edGUID'])['ElementID'].count().reset_index()
result_df.drop(columns=['ElementID'], inplace=True)
dfPropCounts = result_df
print(dfPropCounts.head())
# Group the result by refElementName, count edGUID
result_df = dfPropCounts.groupby(['refmsGUID','refElementName'])['edGUID'].count().reset_index()
dfPropCounts = result_df
dfPropCounts.rename(columns={'edGUID': 'countExternalEditionsName'}, inplace=True)
# Merge (Left join) to dfPropWithNames
print(dfPropCounts.head())
result_df = pd.merge(dfPropWithNames, dfPropCounts, left_on=['refmsGUID','refElementName'], right_on=['refmsGUID','refElementName'], how='left')
result_df.fillna({'countExternalEditionsName':0},inplace=True)
result_df['countExternalEditionsName'] = result_df['countExternalEditionsName'].astype(int)
dfPropWithNames = result_df
# Merge to elStatbyName on msGUID and refElementName - for statistics


# Group dfPropCounts by refElementName and msGUID
result_df = filtered_dfPropWithNames.groupby(['refmsGUID','refElementName','msGUID'])['ElementID'].count().reset_index()
result_df.drop(columns=['ElementID'], inplace=True)
dfPropCounts = result_df
print(dfPropCounts.head())
# Group the result by refElementName, count msGUID
result_df = dfPropCounts.groupby(['refmsGUID','refElementName'])['msGUID'].count().reset_index()
dfPropCounts = result_df
dfPropCounts.rename(columns={'msGUID': 'countExternalStandardsName'}, inplace=True)
# Merge (Left join) to dfPropWithNames
print(dfPropCounts.head())
result_df = pd.merge(dfPropWithNames, dfPropCounts, left_on=['refmsGUID','refElementName'], right_on=['refmsGUID','refElementName'], how='left')
result_df.fillna({'countExternalStandardsName':0},inplace=True)
result_df['countExternalStandardsName'] = result_df['countExternalStandardsName'].astype(int)
dfPropWithNames = result_df
# Merge to elStatbyName on msGUID and refElementName - for statistics



#Create filtered DataFrame with only properties that refer to an element outside of its main standard and where that element is being referred to by at least 5 external properties
# filtered_dfPropWithNames = dfPropWithNames[(dfPropWithNames['MainStandard'] != dfPropWithNames['refMainStandard']) & (dfPropWithNames['countExternalReferences'] >= 5)]
# print(str(len(filtered_dfPropWithNames)))


6523
   refElementID                                  edGUID
0            64  {D25A7C1E-8D72-4614-BCF0-2E6C3769853A}
1           119  {80442A97-FB74-42b1-92E0-DDAB27EA18F5}
2           119  {C6593764-1DF3-44f8-9658-D42F172B225B}
3           125  {C6593764-1DF3-44f8-9658-D42F172B225B}
4           126  {C6593764-1DF3-44f8-9658-D42F172B225B}
   refElementID  countExternalEditions
0            64                      1
1           119                      2
2           125                      1
3           126                      1
4           131                      1
   refElementID                                  msGUID
0            64  {0EBB14A6-285B-440c-B6AF-20A47ACB4D76}
1           119  {74404EA6-E630-4be2-B1BB-057C76FD4759}
2           125  {74404EA6-E630-4be2-B1BB-057C76FD4759}
3           126  {74404EA6-E630-4be2-B1BB-057C76FD4759}
4           131  {74404EA6-E630-4be2-B1BB-057C76FD4759}
   refElementID  countExternalStandards
0            64                       1
1        

# Export to Excel

In [20]:
file_path = mainFolder + '\\HM_Dependencies.xlsx'
# Export to Excel 
writer = pd.ExcelWriter(file_path)

# The list of referenced elements and the count of internal and external references (dfElStat)
dfElStatOut = dfElStat.drop(columns=['ElementID','GUID','msGUID','edGUID'])
dfElStatOut.to_excel(writer,'Elements') 
print(f'Exported elements report to file: {file_path}' + '(count ' +  str(len(dfElStatOut)) + ')' )  

# The list of references with total references per main standard
elStatByNameOut = elStatByName.drop(columns=['msGUID'])
elStatByNameOut.to_excel(writer,'Elements by standard') 
print(f'Exported elements report per main standards to file: {file_path}' + '(count ' +  str(len(elStatByNameOut)) + ')' )  

# The list of properties with the elements they are refering and statistics for that element
dfPropWithNamesOut = dfPropWithNames.drop(columns=['GUID','msGUID','edGUID'])
dfPropWithNamesOut.to_excel(writer,'Properties') 
print(f'Exported properties report to file: {file_path}' + '(count ' +  str(len(dfPropWithNamesOut)) + ')' )  

# Properties that refer to an element outside of its main standard
# and where that element is being referred to by at least 5 properties
filtered_dfPropWithNames = filtered_dfPropWithNames.drop(columns=['GUID','msGUID','edGUID'])
filtered_dfPropWithNames.to_excel(writer,'PropertiesExternal') 
print(f'Exported filtered properties report to file: {file_path} ' + '(count ' +  str(len(filtered_dfPropWithNames)) + ')' ) 

writer.close()


  dfElStatOut.to_excel(writer,'Elements')


Exported elements report to file: C:\Data\GitHub\ISO TC211\HMMG\EditorialVersion\HM_Dependencies.xlsx(count 7486)


  elStatByNameOut.to_excel(writer,'Elements by standard')


Exported elements report per main standards to file: C:\Data\GitHub\ISO TC211\HMMG\EditorialVersion\HM_Dependencies.xlsx(count 5343)


  dfPropWithNamesOut.to_excel(writer,'Properties')


Exported properties report to file: C:\Data\GitHub\ISO TC211\HMMG\EditorialVersion\HM_Dependencies.xlsx(count 30040)


  filtered_dfPropWithNames.to_excel(writer,'PropertiesExternal')


Exported filtered properties report to file: C:\Data\GitHub\ISO TC211\HMMG\EditorialVersion\HM_Dependencies.xlsx (count 6523)
