In [118]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter

# --- Connect to SQLite ---
conn = sqlite3.connect("../../sql/openfda_base_updated.db")
cursor = conn.cursor()

In [115]:
query = """
SELECT COUNT(DISTINCT medicinalproduct) AS unique_medicinalproducts
FROM drug;
"""
cursor.execute(query)
result = cursor.fetchone()
print(f"Unique medicinal products: {result[0]}")

Unique medicinal products: 8223


In [132]:
query = """
SELECT medicinalproduct, COUNT(*) AS entry_count
FROM drug
GROUP BY medicinalproduct
ORDER BY entry_count DESC;
"""
cursor.execute(query)
results = cursor.fetchall()
df = pd.DataFrame(results, columns=["medicinalproduct", "entry_count"])
df.head(20)

Unnamed: 0,medicinalproduct,entry_count
0,MOUNJARO,10191
1,DUPIXENT,3700
2,INFLECTRA,2569
3,PREDNISONE,2386
4,METHOTREXATE,2106
5,REPATHA,1803
6,ACETAMINOPHEN,1479
7,RITUXIMAB,1419
8,DEXAMETHASONE,1306
9,ASPIRIN,1295


In [133]:
# list of medicinal products
medicinal_products = df["medicinalproduct"].tolist()
print(medicinal_products[:5])
print(len(medicinal_products))

['MOUNJARO', 'DUPIXENT', 'INFLECTRA', 'PREDNISONE', 'METHOTREXATE']
8223


In [None]:
# get field names from drug, drug_openfda, and drug_optional tables
query = """
SELECT name
FROM pragma_table_info('drug')
"""
cursor.execute(query)
drug_fields = [row[0] for row in cursor.fetchall()]

query = """
SELECT name
FROM pragma_table_info('drug_openfda')
"""
cursor.execute(query)
drug_openfda_fields = [row[0] for row in cursor.fetchall()]

query = """
SELECT name
FROM pragma_table_info('drug_optional')
"""
cursor.execute(query)
drug_optional_fields = [row[0] for row in cursor.fetchall()]

all_drug_fields = set(drug_fields + drug_openfda_fields + drug_optional_fields)
all_drug_fields.remove("id")
all_drug_fields.remove("medicinalproduct")
all_drug_fields.remove("safetyreportid")
all_drug_fields.to_list()


## Check drug table

In [48]:
def check_drug_table(table_name, field_name, medical):
    # print(f"Checking {field_name} in {table_name} for {medical}")
    query = f"""
    SELECT {field_name}, COUNT(*)
    FROM {table_name}
    WHERE medicinalproduct = '{medical}'
    GROUP BY {field_name}
    ORDER BY COUNT(*) DESC;
    """
    cursor.execute(query)
    results = cursor.fetchall()
    return results

for drug in medicinal_products[:8]:
    print(" ")
    for field in drug_fields[3:]:
        # print(f"Checking {field} in {drug}")
        results = check_drug_table(table_name= "drug", field_name= field ,medical= drug)
        if len(results) > 3:
            # print(f"Results for {field}: {len(results)}")
            continue
        else:
            print(f" Drug field candidate {field},checking {drug}")
            print(results)

 
 Drug field candidate drugcharacterization,checking MOUNJARO
[(1, 10153), (2, 26), (3, 12)]
 Drug field candidate drugauthorizationnumb,checking MOUNJARO
[('215866', 8001), (None, 2186), ('99', 4)]
 
 Drug field candidate drugcharacterization,checking DUPIXENT
[(1, 3670), (2, 27), (3, 3)]
 
 Drug field candidate drugcharacterization,checking INFLECTRA
[(1, 2556), (2, 13)]
 Drug field candidate drugauthorizationnumb,checking INFLECTRA
[('125544', 2483), (None, 86)]
 
 Drug field candidate drugcharacterization,checking PREDNISONE
[(1, 1275), (2, 1091), (3, 20)]
 
 Drug field candidate drugcharacterization,checking METHOTREXATE
[(1, 1666), (2, 417), (3, 23)]
 
 Drug field candidate drugcharacterization,checking REPATHA
[(1, 1769), (2, 34)]
 
 Drug field candidate drugcharacterization,checking ACETAMINOPHEN
[(2, 802), (1, 654), (3, 23)]
 Drug field candidate drugenddateformat,checking ACETAMINOPHEN
[(None, 1252), (102, 216), (610, 11)]
 
 Drug field candidate drugcharacterization,checkin

In [55]:
query = """
SELECT drugdosageform, COUNT(*)
FROM drug
WHERE medicinalproduct = 'RITUXIMAB'
GROUP BY drugdosageform
ORDER BY COUNT(*) DESC;
"""
cursor.execute(query)
results = cursor.fetchall()
print(results)

[(None, 589), ('Concentrate for solution for infusion', 482), ('Solution for infusion', 98), ('Unknown', 85), ('Infusion, Solution', 85), ('Injection', 33), ('Unknown formulation', 12), ('UNK', 8), ('Infusion', 7), ('Solution for injection', 6), ('INJECTION, SOLUTION', 5), ('Formulation unknown', 5), ('Herbal tea', 2), ('Solution for injection/infusion', 1), ('Intravenous infusion', 1)]


## Check drug_optional table

In [112]:
def check_d_opt(table_name, field_name, medical):
    # print(f"Checking {field_name} in {table_name} for {medical}")
    query = f"""
    SELECT {field_name}, COUNT(*)
    FROM {table_name}
    JOIN drug ON drug_optional.drug_id = drug.id
    WHERE drug.medicinalproduct = '{medical}'
    GROUP BY {field}
    ORDER BY COUNT(*) DESC;
    """
    cursor.execute(query)
    results = cursor.fetchall()
    return results

candidates = []
nogo = set()

for drug in medicinal_products[:10]:
    for field in drug_optional_fields[1:]:
        # print(f"Checking {field} in {drug}")
        results = check_d_opt(table_name= "drug_optional", field_name= field ,medical= drug)
        if len(results) > 3:
            # print(f"Results for {field}: {len(results)}")
            # print(results)
            nogo.add(field)
        elif 4 > len(results) > 1:
            print(f" Drug optional field candidate !!!!{field},checking {drug}")
            candidates.append(field)
            print(results)


 Drug optional field candidate !!!!drugcumulativedosagenumb,checking MOUNJARO
[(None, 10184), (2.5, 4), (17.5, 3)]
 Drug optional field candidate !!!!drugcumulativedosageunit,checking MOUNJARO
[(None, 10184), (3, 7)]
 Drug optional field candidate !!!!drugintervaldosagedefinition,checking MOUNJARO
[(None, 9508), (803, 683)]
 Drug optional field candidate !!!!drugintervaldosageunitnumb,checking MOUNJARO
[(None, 9508), (1.0, 683)]
 Drug optional field candidate !!!!drugseparatedosagenumb,checking MOUNJARO
[(None, 9508), (1.0, 683)]
 Drug optional field candidate !!!!drugtreatmentdurationunit,checking MOUNJARO
[(None, 10149), (804, 41), (802, 1)]
 Drug optional field candidate !!!!drugcumulativedosageunit,checking DUPIXENT
[(None, 3678), (3, 22)]
 Drug optional field candidate !!!!drugseparatedosagenumb,checking DUPIXENT
[(1.0, 3185), (None, 515)]
 Drug optional field candidate !!!!drugstructuredosageunit,checking DUPIXENT
[(3, 3125), (None, 509), (32, 66)]
 Drug optional field candidate 

In [77]:
candidates = list(set(candidates))

for candiatade in candidates:
    print(candiatade)
print(len(candidates))    

drugcumulativedosageunit
drugseparatedosagenumb
drugtreatmentdurationunit
drugcumulativedosagenumb
drugintervaldosagedefinition
drugintervaldosageunitnumb
drugtreatmentduration
drugrecurreadministration
drugstructuredosageunit
9


In [68]:
x = set(candidates) - nogo
print(x)

set()


In [110]:
def check_d_opt(table_name, field_name, medical):
    # print(f"Checking {field_name} in {table_name} for {medical}")
    query = f"""
    SELECT {field_name}, COUNT(*)
    FROM {table_name}
    JOIN drug ON drug_optional.drug_id = drug.id
    WHERE drug.medicinalproduct = '{medical}'
    GROUP BY {field}
    ORDER BY COUNT(*) DESC;
    """
    cursor.execute(query)
    results = cursor.fetchall()
    return results

dupa = set()

for drug in medicinal_products[:15]:
    for field in candidates:
        # print(f"Checking {field} in {drug}")
        results = check_d_opt(table_name= "drug_optional", field_name= field ,medical= drug)
        if len(results) > 5:
            # print(f"Results for {field}: {len(results)}")
            # print(results)
            dupa.add(field)
        elif 4 > len(results) > 1:
            # print(f" Drug optional field candidate !!!!{field},checking {drug}")
            # candidates.append(field)
            # print(results)
            continue

In [111]:
x = set(candidates) - dupa
print(len(candidates))
print(len(dupa))
print(x)
print(len(x))

0
0
set()
0


In [74]:
print(dupa)

{'drugtreatmentduration', 'drugstructuredosageunit', 'drugcumulativedosagenumb', 'drugintervaldosageunitnumb'}


## Openfda table

In [95]:
def check_d_open(table_name, field_name, medical):
    # print(f"Checking {field_name} in {table_name} for {medical}")
    query = f"""
    SELECT {field_name}, COUNT(*)
    FROM {table_name}
    JOIN drug ON drug_openfda.drug_id = drug.id
    WHERE drug.medicinalproduct = '{medical}'
    GROUP BY {field}
    ORDER BY COUNT(*) DESC;
    """
    cursor.execute(query)
    results = cursor.fetchall()
    return results

essa = set()
mialmial = set()

for drug in medicinal_products[:15]:
    # print(drug)
    for field in drug_openfda_fields[2:]:
        # print(field)
        # print(f"Checking {field} in {drug}")
        results = check_d_open(table_name= "drug_openfda", field_name= field ,medical= drug)
        if len(results) < 2:
            # print(f"BINGO for {field}: {len(results)}")
            essa.add(field)

        elif len(results) >= 2:
            print(f"Results for {field}: {len(results)}")
            # print(results)
            mialmial.add(field)
        elif len(results) > 3:
            print(f" Uwaga !!!!{field},checking {drug}")
            print(results)

In [85]:
print(drug_openfda_fields[2:])

['application_number', 'brand_name', 'generic_name', 'manufacturer_name', 'nui', 'package_ndc', 'pharm_class_cs', 'pharm_class_epc', 'pharm_class_moa', 'pharm_class_pe', 'product_ndc', 'product_type', 'route', 'rxcui', 'spl_id', 'spl_set_id', 'substance_name', 'unii']


In [96]:
print(mialmial)

set()


In [97]:
print(essa)

{'pharm_class_epc', 'spl_set_id', 'pharm_class_pe', 'substance_name', 'product_ndc', 'product_type', 'nui', 'brand_name', 'package_ndc', 'generic_name', 'pharm_class_moa', 'route', 'rxcui', 'spl_id', 'manufacturer_name', 'pharm_class_cs', 'unii', 'application_number'}


## active substance

In [135]:
def check_d_acti(table_name, field_name, medical):
    # print(f"Checking {field_name} in {table_name} for {medical}")
    query = f"""
    SELECT {field_name}, COUNT(*)
    FROM {table_name}
    JOIN drug ON drug_activesubstance.drug_id = drug.id
    WHERE drug.medicinalproduct = '{medical}'
    GROUP BY {field_name}
    ORDER BY COUNT(*) DESC;
    """
    cursor.execute(query)
    results = cursor.fetchall()
    return results

upsi = set()

for drug in medicinal_products:
    # print(drug)
    results = check_d_acti(table_name= "drug_activesubstance", field_name= "activesubstancename", medical= drug)
    if len(results) < 2:
        # print(f"BINGO for {field}: {len(results)}")
        # essa.add(field)
        continue
    elif len(results) >= 2:
        print(f"Results for {drug}: {len(results)}")
        # print(results)
        upsi.add(drug)
    elif len(results) > 3:
        print(f" Uwaga !!!!{field},checking {drug}")
        print(results)

Results for LEVOTHYROXINE: 2
Results for OXYCODONE: 2
Results for CITALOPRAM: 2
Results for NOREPINEPHRINE: 2
Results for ESOMEPRAZOLE MAGNESIUM: 2
Results for PRAMIPEXOLE: 2
Results for OXYBUTYNIN: 2
Results for LEUPROLIDE ACETATE: 2
Results for RUCONEST: 2
Results for OCTREOTIDE: 2
Results for RASAGILINE: 2
Results for ATOMOXETINE: 2
Results for SARCLISA: 2
Results for BETAXOLOL: 2


KeyboardInterrupt: 

In [126]:
upsi = list(set(upsi))
print(len(upsi))

0


In [137]:
conn.close()