In [5]:
from collections import defaultdict

input_file = 'MRCONSO_ENG.txt'

sab_count = defaultdict(int)

with open(input_file, 'r', encoding='utf-8') as file:
    for line in file:
        fields = line.strip().split('|')
        sab = fields[11]  # SAB is at index 11
        sab_count[sab] += 1



    



Top 8 sources:
MEDCIN: 1030279
MSH: 1010573
SNOMEDCT_US: 988281
NCBI: 959266
LNC: 710351
NCI: 448134
ICD10PCS: 271054
MTH: 255269


In [6]:
# 10 max counts
max_count = 10
print(f"\nTop {max_count} sources:")
for sab, count in sorted(sab_count.items(), key=lambda x: x[1], reverse=True)[:max_count]:
    print(f"{sab}: {count}")


Top 10 sources:
MEDCIN: 1030279
MSH: 1010573
SNOMEDCT_US: 988281
NCBI: 959266
LNC: 710351
NCI: 448134
ICD10PCS: 271054
MTH: 255269
HGNC: 234434
RXNORM: 198770


In MRCONSOENG.RFF I want to retrieve samples from source vocabulary RX_NORM and MTH. For RXNORM select only lines about TTY  IN and BN. For MTH select only lines about TTY BD and PN. I want the concept ID and the string of the concerned lines. 

In [7]:
# Define the input and output file paths
input_file = 'MRCONSO_ENG.txt'
output_file = 'CONCEPT_RXNORM_MTH.txt'

# Define conditions for source vocabularies and their relevant TTYs
conditions = {
    'RXNORM': {'IN', 'BN'},  # Include only these TTYs for RXNORM
    'MTH': {'BD', 'PN'}      # Include only these TTYs for MTH
}

# Open the output file for writing
with open(output_file, 'w', encoding='utf-8') as outfile:
    # Read the MRCONSO_ENG.txt file
    with open(input_file, 'r', encoding='utf-8') as infile:
        for line in infile:
            fields = line.strip().split('|')
            sab = fields[11]  # SAB is at index 11
            tty = fields[12]  # TTY is at index 12
            cui = fields[0]   # CUI is at index 0
            string = fields[14]  # STR is at index 14

            # Check if the line matches any of the conditions
            if sab in conditions and tty in conditions[sab]:
                # Write concept ID and string to the output file
                outfile.write(f"{cui}|{string}\n")

print(f"Filtered entries have been written to {output_file}")


Filtered entries have been written to CONCEPT_RXNORM_MTH.txt


In [8]:
# Define the input file path
input_file = 'MRCONSO_ENG.txt'

# Define output file paths for each source vocabulary
output_file_rxnorm = 'CONCEPT_RXNORM.txt'
output_file_mth = 'CONCEPT_MTH.txt'

# Define conditions for source vocabularies and their relevant TTYs
conditions = {
    'RXNORM': {'IN', 'BN'},  # Include only these TTYs for RXNORM
    'MTH': {'BD'}      # Include only these TTYs for MTH
}

# Open the output files for writing
with open(output_file_rxnorm, 'w', encoding='utf-8') as outfile_rxnorm, \
     open(output_file_mth, 'w', encoding='utf-8') as outfile_mth:

    # Read the MRCONSO_ENG.txt file
    with open(input_file, 'r', encoding='utf-8') as infile:
        for line in infile:
            fields = line.strip().split('|')
            sab = fields[11]  # SAB is at index 11
            tty = fields[12]  # TTY is at index 12
            cui = fields[0]   # CUI is at index 0
            string = fields[14]  # STR is at index 14

            # Check if the line matches any of the conditions
            if sab in conditions and tty in conditions[sab]:
                formatted_line = f"{cui}|{string}\n"
                if sab == 'RXNORM':
                    outfile_rxnorm.write(formatted_line)
                elif sab == 'MTH':
                    outfile_mth.write(formatted_line)

print(f"Filtered entries for RXNORM have been written to {output_file_rxnorm}")
print(f"Filtered entries for MTH have been written to {output_file_mth}")


Filtered entries for RXNORM have been written to CONCEPT_RXNORM.txt
Filtered entries for MTH have been written to CONCEPT_MTH.txt


LETS FOCUS ON RXNORM
induced_by
contraindicated_mechanism_of_action_of
induces
modified_by
has_contraindicated_drug
modifies
effect_may_be_inhibited_by
may_inhibit_effect_of

In [9]:
# Define input file paths
rxnorm_concepts_file = 'CONCEPT_RXNORM.txt'
mrrel_file = 'MRREL.RRF'
output_file = 'RXNORM_interactions.txt'

# Load RXNORM Concept IDs into a set
rxnorm_concepts = set()
with open(rxnorm_concepts_file, 'r', encoding='utf-8') as file:
    for line in file:
        cui = line.strip().split('|')[0]
        rxnorm_concepts.add(cui)

# Define desired relationship types
desired_relationships = {
    'induced_by', 
    'contraindicated_mechanism_of_action_of', 
    'induces', 
    'modified_by', 
    'has_contraindicated_drug', 
    'modifies', 
    'effect_may_be_inhibited_by', 
    'may_inhibit_effect_of'
}

# Open the output file for writing filtered interactions
with open(output_file, 'w', encoding='utf-8') as outfile:
    # Read MRREL.RRF and filter interactions
    with open(mrrel_file, 'r', encoding='utf-8') as infile:
        for line in infile:
            fields = line.strip().split('|')
            cui1 = fields[0]
            cui2 = fields[4]
            rel = fields[7]

            # Check if both CUIs are in RXNORM concepts and the relationship type is desired
            if cui1 in rxnorm_concepts and cui2 in rxnorm_concepts and rel in desired_relationships:
                outfile.write(line)

print(f"Filtered interactions have been written to {output_file}")


Filtered interactions have been written to RXNORM_interactions.txt


In [10]:
# Define file paths
rxnorm_concepts_file = 'CONCEPT_RXNORM.txt'
mrrel_file = 'MRREL.RRF'
output_interactions_file = 'translated_interactions.txt'

# Load RXNORM Concept IDs and names into a dictionary
concept_names = {}
with open(rxnorm_concepts_file, 'r', encoding='utf-8') as file:
    for line in file:
        parts = line.strip().split('|')
        cui = parts[0]
        name = parts[1]
        concept_names[cui] = name

# Define desired relationship types
desired_relationships = {
    'induced_by', 
    'contraindicated_mechanism_of_action_of', 
    'induces', 
    'modified_by', 
    'has_contraindicated_drug', 
    'modifies', 
    'effect_may_be_inhibited_by', 
    'may_inhibit_effect_of'
}

# Open the output file for writing translated interactions
with open(output_interactions_file, 'w', encoding='utf-8') as outfile:
    # Read MRREL.RRF and filter interactions
    with open(mrrel_file, 'r', encoding='utf-8') as infile:
        for line in infile:
            fields = line.strip().split('|')
            cui1 = fields[0]
            cui2 = fields[4]
            rel = fields[7]

            # Check if both CUIs are in concept_names and the relationship type is desired
            if cui1 in concept_names and cui2 in concept_names and rel in desired_relationships:
                concept1_name = concept_names[cui1]
                concept2_name = concept_names[cui2]
                # Write the interaction with concept names to the output file
                outfile.write(f"{concept1_name}, {rel}, {concept2_name}\n")

print(f"Translated interactions have been written to {output_interactions_file}")


Translated interactions have been written to translated_interactions.txt


if not pais but just one concept is in both of the realtion what happens

In [14]:
# # Define file paths
# rxnorm_concepts_file = './RXNORM/CONCEPT_RXNORM.txt'
# mrrel_file = 'MRREL.RRF'
# output_interactions_file = 'translated_interactions_atleast1.txt'

# # Load RXNORM Concept IDs and names into a dictionary
# concept_names = {}
# with open(rxnorm_concepts_file, 'r', encoding='utf-8') as file:
#     for line in file:
#         parts = line.strip().split('|')
#         cui = parts[0]
#         name = parts[1]
#         concept_names[cui] = name

# # Define desired relationship types
# desired_relationships = {
#     'induced_by', 
#     'contraindicated_mechanism_of_action_of', 
#     'induces', 
#     'modified_by', 
#     'has_contraindicated_drug', 
#     'modifies', 
#     'effect_may_be_inhibited_by', 
#     'may_inhibit_effect_of'
# }

# # Open the output file for writing translated interactions
# with open(output_interactions_file, 'w', encoding='utf-8') as outfile:
#     # Read MRREL.RRF and filter interactions
#     with open(mrrel_file, 'r', encoding='utf-8') as infile:
#         for line in infile:
#             fields = line.strip().split('|')
#             cui1 = fields[0]
#             cui2 = fields[4]
#             rel = fields[7]

#             # Modify the condition to check if at least one of the CUIs is in concept_names
#             if (cui1 in concept_names or cui2 in concept_names) and rel in desired_relationships:
#                 # Use the names from concept_names if available, otherwise use the CUI
#                 concept1_name = concept_names.get(cui1, cui1)
#                 concept2_name = concept_names.get(cui2, cui2)

#                 # Write the interaction with concept names to the output file
#                 outfile.write(f"{concept1_name}, {rel}, {concept2_name}\n")

# print(f"Translated interactions have been written to {output_interactions_file}")


FOCUS ON MTH 

In [18]:
# Define the input file path
input_file = 'MRCONSO_ENG.txt'

# Define output file path for MTH
output_file_mth = 'CONCEPT_MMSL.txt'

# Define conditions for source vocabularies and their relevant TTYs
conditions = {
    'MMSL': {'BN'}      # Include only these TTYs for MTH
}

# Open the output file for writing
with open(output_file_mth, 'w', encoding='utf-8') as outfile_mth:

    # Read the MRCONSO_ENG.txt file
    with open(input_file, 'r', encoding='utf-8') as infile:
        for line in infile:
            fields = line.strip().split('|')
            sab = fields[11]  # SAB is at index 11
            tty = fields[12]  # TTY is at index 12
            cui = fields[0]   # CUI is at index 0
            string = fields[14]  # STR is at index 14

            # Check if the line matches any of the conditions
            if sab == 'MMSL' and tty in conditions[sab]:
                formatted_line = f"{cui}|{string}\n"
                outfile_mth.write(formatted_line)

print(f"Filtered entries for MTH have been written to {output_file_mth}")


Filtered entries for MTH have been written to CONCEPT_MMSL.txt


In [20]:
# concatenate CONCEPT_RXNORM.txt and CONCEPT_MMSL.txt
# Define the input file paths
rxnorm_file = 'RXNORM/CONCEPT_RXNORM.txt'
mmsl_file = 'CONCEPT_MMSL.txt'

# Define the output file path
output_file = 'CONCEPTS_RXNORM_MMSL.txt'

# Open the output file for writing
with open(output_file, 'w', encoding='utf-8') as outfile:
    # Read RXNORM and write to the output file
    with open(rxnorm_file, 'r', encoding='utf-8') as infile:
        for line in infile:
            outfile.write(line)

    # Read MMSL and write to the output file
    with open(mmsl_file, 'r', encoding='utf-8') as infile:
        for line in infile:
            outfile.write(line)
            
print(f"Concatenated entries have been written to {output_file}")


Concatenated entries have been written to CONCEPTS_RXNORM_MMSL.txt


In [21]:
# Define input file paths
rxnorm_mmsl_file = 'CONCEPTS_RXNORM_MMSL.txt'
mrrel_file = 'MRREL.RRF'
output_file = 'RXNORM_MMSL_interactions.txt'

# Load RXNORM Concept IDs into a set
rxnorm_mmsl_consept= set()
with open(rxnorm_mmsl_file, 'r', encoding='utf-8') as file:
    for line in file:
        cui = line.strip().split('|')[0]
        rxnorm_mmsl_consept.add(cui)
        
        

# Define desired relationship types
desired_relationships = {
    'induced_by', 
    'contraindicated_mechanism_of_action_of', 
    'induces', 
    'modified_by', 
    'has_contraindicated_drug', 
    'modifies', 
    'effect_may_be_inhibited_by', 
    'may_inhibit_effect_of'
}

# Open the output file for writing filtered interactions
with open(output_file, 'w', encoding='utf-8') as outfile:
    # Read MRREL.RRF and filter interactions
    with open(mrrel_file, 'r', encoding='utf-8') as infile:
        for line in infile:
            fields = line.strip().split('|')
            cui1 = fields[0]
            cui2 = fields[4]
            rel = fields[7]

            # Check if both CUIs are in RXNORM_MMSL concepts and the relationship type is desired
            if cui1 in rxnorm_mmsl_consept and cui2 in rxnorm_mmsl_consept and rel in desired_relationships:
                outfile.write(line)

print(f"Filtered interactions have been written to {output_file}")


Filtered interactions have been written to RXNORM_MMSL_interactions.txt
