# OpenFDA Database Validation Notebook

In [1]:
import sqlite3
import pandas as pd

db_path = r"C:\Users\macie\OneDrive\Documents\Edukacja\YEAR 3\SM2\BEP\OpenFDA\notebooks\SQL_DB\OpenFDA_sample.db"
conn = sqlite3.connect(db_path)


## 1. Count total safety reports

In [2]:
pd.read_sql_query('SELECT COUNT(*) AS total_reports FROM safety_reports;', conn)

Unnamed: 0,total_reports
0,36000


## 2. Validate foreign key: patients -> safety_reports

In [3]:
query = '''
SELECT COUNT(*) AS invalid_patients
FROM patients
WHERE safetyreportid NOT IN (SELECT safetyreportid FROM safety_reports);
'''
pd.read_sql_query(query, conn)


Unnamed: 0,invalid_patients
0,0


## 3. Check NULL values in patientsex

In [4]:
pd.read_sql_query('SELECT COUNT(*) AS null_sex FROM patients WHERE patientsex IS NULL;', conn)


Unnamed: 0,null_sex
0,5044


## 4. Distribution of patientsex values

In [5]:
pd.read_sql_query('SELECT patientsex, COUNT(*) AS count FROM patients GROUP BY patientsex;', conn)


Unnamed: 0,patientsex,count
0,,5044
1,0.0,113
2,1.0,12818
3,2.0,18025


## 5. Join Check: Reports with associated drugs

In [6]:
query = '''
SELECT sr.safetyreportid, COUNT(dr.id) AS num_drugs
FROM safety_reports sr
JOIN drugs dr ON sr.safetyreportid = dr.safetyreportid
GROUP BY sr.safetyreportid
LIMIT 10;
'''
pd.read_sql_query(query, conn)


Unnamed: 0,safetyreportid,num_drugs
0,10209590,7
1,10236864,66
2,10253739,15
3,10392517,19
4,10423027,3
5,10466564,1
6,10552417,2
7,10626813,4
8,10710817,8
9,10756952,2


## 6. Sample Check: Report ID 23541411

In [7]:
pd.read_sql_query("SELECT * FROM safety_reports WHERE safetyreportid = '23541411';", conn)


Unnamed: 0,safetyreportid,safetyreportversion,reporttype,serious,seriousnessdeath,seriousnesslifethreatening,seriousnesshospitalization,seriousnessdisabling,seriousnesscongenitalanomali,seriousnessother,receivedate,receiptdate,transmissiondate,occurcountry,fulfillexpeditecriteria,authoritynumb
0,23541411,1,1,1,1,1,1,1,,1,20240219,20240219,20240410,CA,2,
