In [10]:
import pandas as pd
import networkx as nx

df = pd.read_csv("./data/onto_x.csv")

print(df.head())
print(df.info())

                                Class ID          Preferred Label  \
0          http://entity/CST/HYPOCHLOREM            HYPOCHLOREMIA   
1      http://entity/CST/EXTRAPYR%20SYND  EXTRAPYRAMIDAL SYNDROME   
2  http://entity/CST/VASCULITIS%20KIDNEY        KIDNEY VASCULITIS   
3            http://entity/CST/SKIN/DERM               Dermatoses   
4       http://entity/CST/FIBRO%20KIDNEY          KIDNEY FIBROSIS   

                                             Parents  
0  http://entity/CST/METGEN|http://entity/CST/MAN...  
1                          http://entity/CST/NERMOVE  
2  http://entity/CST/PATHCOLLAGEN|http://entity/C...  
3                                                NaN  
4  http://entity/CST/UG/UT/K/M|http://entity/CST/...  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1707 entries, 0 to 1706
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Class ID         1707 non-null   object
 1   Preferred

In [11]:
no_parents = df[df['Parents'].isna() | (df['Parents'] == '')]
print("Entities without parents:")
print(no_parents[['Class ID', 'Preferred Label']])


Entities without parents:
                              Class ID  \
3          http://entity/CST/SKIN/DERM   
24         http://entity/CST/SKIN/SBGL   
102              http://entity/CST/IOS   
103    http://entity/CST/STENO%20ESOPH   
105        http://entity/CST/SKIN/SUBQ   
108        http://entity/CST/SKIN/HAIR   
190              http://entity/CST/FBC   
236              http://entity/CST/FET   
283              http://entity/CST/FDU   
344    http://entity/CST/ENDO/PIT/POST   
362              http://entity/CST/TTT   
458              http://entity/CST/OOT   
463              http://entity/CST/MTD   
478         http://entity/CST/SKIN/PIG   
572              http://entity/CST/TTO   
580     http://entity/CST/ENDO/PIT/ANT   
614              http://entity/CST/NAI   
750              http://entity/CST/OTT   
759              http://entity/CST/IBM   
825             http://entity/STY/T071   
868              http://entity/CST/TOT   
871        http://entity/CST/SKIN/NAIL   
874     

In [12]:
import networkx as nx
import pandas as pd

# Build graph safely
G = nx.DiGraph()
for _, row in df.iterrows():
    child = row['Class ID'].strip()
    parents_raw = str(row['Parents']).strip()
    if parents_raw and parents_raw.lower() != 'nan':  
        # Split by '|', strip each parent
        parents = [p.strip() for p in parents_raw.split('|') if p.strip()]
        for parent in parents:
            G.add_edge(child, parent)

# Detect cycles
cycles = list(nx.simple_cycles(G))
if cycles:
    print("Cycles detected (potential infinite loop):")
    print(cycles)
else:
    print("No cycles detected, DFS safe to use.")


Cycles detected (potential infinite loop):
[['http://entity/CST/HEM', 'http://entity/CST/HEMHMRG']]


In [13]:
def dfs_check_cycle(node, visited=None, rec_stack=None):
    if visited is None:
        visited = set()
    if rec_stack is None:
        rec_stack = set()
    if node in rec_stack:
        return True  # cycle detected
    rec_stack.add(node)
    # Follow parents (edges going from child to parent)
    for parent in G.successors(node):
        if dfs_check_cycle(parent, visited, rec_stack):
            return True
    rec_stack.remove(node)
    visited.add(node)
    return False

# Check all nodes
has_cycle = False
for node in G.nodes():
    if dfs_check_cycle(node):
        has_cycle = True
        break

print("Cycles detected?" , has_cycle)


Cycles detected? True
