In [1]:
import pandas as pd

In [None]:
def load_data(year, q):
    # Demographic file
    demo_path = 'FAERS/faers_ascii_{}q{}/ascii/DEMO{}Q{}.txt'.format(year, q, year % 100, q)
    df_demo = pd.read_csv(demo_path, sep='$', encoding='latin-1')

    # Drug file
    drug_path = 'FAERS/faers_ascii_{}q{}/ascii/DRUG{}Q{}.txt'.format(year, q, year % 100, q)
    df_drug = pd.read_csv(drug_path, sep='$', encoding='latin-1')

    # Indication file
    indi_path = 'FAERS/faers_ascii_{}q{}/ascii/INDI{}Q{}.txt'.format(year, q, year % 100, q)
    df_indi = pd.read_csv(indi_path, sep='$', encoding='latin-1')

    # Outcome file
    outc_path = 'FAERS/faers_ascii_{}q{}/ascii/OUTC{}Q{}.txt'.format(year, q, year % 100, q)
    df_outc = pd.read_csv(outc_path, sep='$', encoding='latin-1')

    # Reaction file
    reac_path = 'FAERS/faers_ascii_{}q{}/ascii/REAC{}Q{}.txt'.format(year, q, year % 100, q)
    df_reac = pd.read_csv(reac_path, sep='$', encoding='latin-1')

    # Therapy file
    ther_path = 'FAERS/faers_ascii_{}q{}/ascii/THER{}Q{}.txt'.format(year, q, year % 100, q)
    df_ther = pd.read_csv(ther_path, sep='$', encoding='latin-1')
    
    return df_demo, df_drug, df_indi, df_outc, df_reac, df_ther

In [None]:
def filter_data(df_drug, df_demo, df_indi, df_outc, df_reac, df_ther, drugname):

    # Filter drug data with drugname
    df_drug = df_drug[df_drug['prod_ai'].str.contains(drugname, na=False)]

    # Filter other data with 'primaryid' and 'caseid' from filtered drug data
    filter_values = set(zip(df_drug['primaryid'], df_drug['caseid']))

    df_demo = df_demo[df_demo.apply(lambda x: (x['primaryid'], x['caseid']) in filter_values, axis=1)]
    df_indi = df_indi[df_indi.apply(lambda x: (x['primaryid'], x['caseid']) in filter_values, axis=1)]
    df_outc = df_outc[df_outc.apply(lambda x: (x['primaryid'], x['caseid']) in filter_values, axis=1)]
    df_reac = df_reac[df_reac.apply(lambda x: (x['primaryid'], x['caseid']) in filter_values, axis=1)]
    # df_rpsr = df_rpsr[df_rpsr.apply(lambda x: (x['primaryid'], x['caseid']) in filter_values, axis=1)]
    df_ther = df_ther[df_ther.apply(lambda x: (x['primaryid'], x['caseid']) in filter_values, axis=1)]

    return df_demo, df_drug, df_indi, df_outc, df_reac, df_ther

Non-group AEs

In [4]:
# data_filtered = []

# for year in range(2020, 2025):
#     for quarter in range(1, 5):

#         # Load data
#         df_demo, df_drug, df_indi, df_outc, df_reac, df_ther = load_data(year, quarter)

#         # Lowercase all values
#         df_demo = df_demo.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
#         df_drug = df_drug.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
#         df_indi = df_indi.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
#         df_outc = df_outc.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
#         df_reac = df_reac.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
#         # rpsr = df_rpsr.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
#         ther = df_ther.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)

#         # Filter 'aflibercept' data
#         demo, drug, indi, outc, reac, ther = filter_data(df_drug, df_demo, df_indi, df_outc, df_reac, df_ther, 'adalimumab')

#         # Remove columns
#         demo.drop(['auth_num', 'lit_ref', 'to_mfr'], axis=1, inplace=True)
#         drug.drop(['lot_num', 'exp_dt', 'nda_num', 'cum_dose_chr', 'cum_dose_unit', 'dechal', 'rechal'], axis=1, inplace=True)
#         reac.drop(['drug_rec_act'], axis=1, inplace=True)
#         ther.drop(['dur', 'dur_cod'], axis=1, inplace=True)

#         # Rename column 'drug_seq' to 'indi_drug_seq'
#         drug.rename(columns={'drug_seq': 'seq'}, inplace=True)
#         indi.rename(columns={'indi_drug_seq': 'seq'}, inplace=True)
#         ther.rename(columns={'dsg_drug_seq': 'seq'}, inplace=True)

#         # Merge data
#         merged_drug = pd.merge(demo, drug, on=['primaryid', 'caseid'], how='right')
#         merged_indi = pd.merge(merged_drug, indi, on=['primaryid', 'caseid', 'seq'], how='left')
#         merged_outc = pd.merge(merged_indi, outc, on=['primaryid', 'caseid'], how='left')
#         merged_reac = pd.merge(merged_outc, reac, on=['primaryid', 'caseid'], how='inner')
#         merge_ther = pd.merge(merged_reac, ther, on=['primaryid', 'caseid', 'seq'], how='left')

#         # # Remove column 'seq'
#         # merge_ther.drop('seq', axis=1, inplace=True)

#         # Drop duplicates
#         merge_ther.drop_duplicates()

#         # # Drop rows with missing values in 'pt' column
#         # merge_ther.dropna(subset=['pt'], inplace=True)

#         # Append data
#         data_filtered.append(merge_ther)
        
#     # Concatenate all data
#     data_filtered_all = pd.concat(data_filtered, ignore_index=True)

#     # Save data
#     data_filtered_all.to_csv('data/faers_filtered_adalimumab_' + str(year) + '.csv', index=False)

# # Save data
# data_filtered_all.to_csv('data/faers_filtered_adalimumab_2020-2024.csv', index=False)

Group AEs

In [None]:
data_filtered = []

for year in range(2020, 2025):
    for quarter in range(1, 5):

        # Load data
        df_demo, df_drug, df_indi, df_outc, df_reac, df_ther = load_data(year, quarter)

        # Lowercase all values
        df_demo = df_demo.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
        df_drug = df_drug.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
        df_indi = df_indi.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
        df_outc = df_outc.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
        df_reac = df_reac.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
        # rpsr = df_rpsr.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)
        ther = df_ther.apply(lambda col: col.str.lower() if col.dtype == 'object' else col)

        # Filter 'aflibercept' data
        demo, drug, indi, outc, reac, ther = filter_data(df_drug, df_demo, df_indi, df_outc, df_reac, df_ther, 'adalimumab')

        # Remove columns
        demo.drop(['auth_num', 'lit_ref', 'to_mfr'], axis=1, inplace=True)
        demo.drop(['i_f_code', 'mfr_dt', 'rept_cod', 'mfr_num', 'mfr_sndr', 'e_sub', 'reporter_country', 'occr_country'], axis=1, inplace=True)
        drug.drop(['lot_num', 'exp_dt', 'nda_num', 'cum_dose_chr', 'cum_dose_unit', 'val_vbm', 'dechal', 'rechal'], axis=1, inplace=True)
        reac.drop(['drug_rec_act'], axis=1, inplace=True)
        ther.drop(['dur', 'dur_cod'], axis=1, inplace=True)

        # Rename column 'drug_seq' to 'indi_drug_seq'
        drug.rename(columns={'drug_seq': 'seq'}, inplace=True)
        indi.rename(columns={'indi_drug_seq': 'seq'}, inplace=True)
        ther.rename(columns={'dsg_drug_seq': 'seq'}, inplace=True)

        # Group 'pt' column in 'reac' data
        reac = reac.groupby(['primaryid', 'caseid']).agg({'pt': list}).reset_index()

        # Merge data
        merged_drug = pd.merge(demo, drug, on=['primaryid', 'caseid'], how='right')
        merged_indi = pd.merge(merged_drug, indi, on=['primaryid', 'caseid', 'seq'], how='left')
        merged_outc = pd.merge(merged_indi, outc, on=['primaryid', 'caseid'], how='left')
        merged_reac = pd.merge(merged_outc, reac, on=['primaryid', 'caseid'], how='inner')
        merge_ther = pd.merge(merged_reac, ther, on=['primaryid', 'caseid', 'seq'], how='left')

        # Append data
        data_filtered.append(merge_ther)
        
# Concatenate all data
data_filtered_all = pd.concat(data_filtered, ignore_index=True)

# Save data
data_filtered_all.to_csv('data/faers_filtered_adalimumab_2020-2024_grouped.csv', index=False)

  df_demo = pd.read_csv(demo_path, sep='$', encoding='latin-1')
  df_drug = pd.read_csv(drug_path, sep='$', encoding='latin-1')
  df_ther = pd.read_csv(ther_path, sep='$', encoding='latin-1')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo.drop(['auth_num', 'lit_ref', 'to_mfr'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo.drop(['i_f_code', 'mfr_dt', 'rept_cod', 'mfr_num', 'mfr_sndr', 'e_sub', 'reporter_country', 'occr_country'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-vi