# PREPARE DATA

The *main_figures.ipynb* and *supp_figures.ipynb* notebooks generate all plots in the manuscript, but in order to do so they make use of supplementary files that are generated by running this notebook (this may take some time). 

In [5]:
from functions import *

# PREPARE DATASET
First we need to download the ATLAS dataset (https://amr.theodi.org/programmes/atlas) from the following URL:
https://s3-eu-west-1.amazonaws.com/amr-prototype-data/Open+Atlas_Reuse_Data.xlsx

We need to play a little with the original dataset.

In [None]:
#WE READ THE EXCEL FILE
DF=pd.read_excel('data/Open+Atlas_Reuse_Data.xlsx', skiprows=[0, 1, 2, 3, 4]))
DF=DF.rename(columns={'In / Out Patient': 'InOutPatient'})
#WRITE DATAFRAME'S KEYS (USEFUL FOR LATER ANALYSES)
keys=['Year', 'Country', 'Species']
mkeys={}
for key in keys:
    dL=DF[key].tolist()
    mkeys[key]={}
    for i,z in enumerate(dL):
        z=str(z)
        if z not in mkeys[key]:
            mkeys[key][z]=0  
        mkeys[key][z]+=1
    with open('data/key_'+key+'.txt','w') as f:
        for z in sorted(mkeys[key], key=lambda x: (str(type(x)), x)):
            f.write(str(z)+'\t'+str(mkeys[key][z])+'\n')

#WE WANT TO LOG2-TRANSFORM THE MIC VALUES IN THE DATASET
#BUT MIC VALUES APPEAR IN A VARIETY OF WAYS THROUGHOUT THE DATASET
#FILE data/key_MIC_values.txt HOLDS ALL MIC VALUES IN THE DATASET
#IN THE FIRST COLUMN. IN THE SECOND WE HAVE MODIFIED THEM, AND IN THE
#THIRD WE HAVE THE LOG2-TRANSFORMED VALUES
MICdict=[]
with open('data/key_MIC_values.txt', 'r') as f:
    for line in f:
        w=line.split()
        k1=w[0]
        k3=float(w[2])
        MICdict.append([k1, str(k3), len(k1)])
MICdict=sorted(MICdict, key=itemgetter(2), reverse=True)

#WE ASSIGN A RANDOM KEY TO EACH MIC VALUE BEFORE SUBSTITUTING
import string
import random
def randomString(stringLength=10):
    """Generate a random string of fixed length """
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(stringLength))
eggs=[randomString() for z in MICdict]

#NOW WE SUBSTITUTE THEM IN THE DATASET
drugs=read_key('Drugs', 'data/')
for dr in drugs:
    DF[dr]=DF[dr].astype('str')
    for x1,y1 in zip(MICdict,eggs):
        DF[dr]=DF[dr].str.replace(x1[0],y1)
    for x1,y1 in zip(eggs,MICdict):
        DF[dr]=DF[dr].str.replace(x1,y1[1])
    DF[dr]=DF[dr].astype('float64')
DF['Species']=DF['Species'].astype('str')
#FINALLY, WE UNIFORMIZE SOME SPECIES
DF['Species']=DF['Species'].str.replace('Enterobacter aerogenes', 'Klebsiella aerogenes', regex=False)
DF['Species']=DF['Species'].str.replace('Klebsiella (Enterobacter) aerogenes', 'Klebsiella aerogenes', regex=False)
DF['Species']=DF['Species'].str.replace('Klebsiella planticola', 'Raoultella planticola', regex=False)
DF.to_excel('data/atlas.xlsx')   
#WRITE NEW SPECIES FILE
mkeys={}
dL=DF['Species'].tolist()
for i,z in enumerate(dL):
    z=str(z)
    if z not in mkeys:
        mkeys[z]=0  
    mkeys[z]+=1
with open('data/key_Species.txt','w') as f:
    for z in sorted(mkeys, key=lambda x: (str(type(x)), x)):
        f.write(str(z)+'\t'+str(mkeys[z])+'\n')

# DATABASES

The ATLAS program is the amalgamation of two previous antimicrobial surveillance programs: TEST (2004-2017) and INFORM (2012-2017). Here we separate these two in order to assess the consistency of our results (run this cell immediately after running the previous one).

In [None]:
DFT=DF[DF.Study=='TEST']
DFT.to_excel('data/test.xlsx')
DFI=DF[DF.Study=='Inform']
DFI.to_excel('data/inform.xlsx')
DF2012=DF[DF.Year>2011]
DF2012.to_excel('data/atlas2012.xlsx')
#COUNTRIES DATABASES
DFA=DF[DF.Country=='United States']
DFA.to_excel('data/atlas_usa.xlsx')
RD=pd.read_csv('data/europe_resistance_data.csv')
Europe=get_key(RD, 'Country')#GET LIST OF EUROPEAN COUNTRIES FROM ECDC DATA
for i,c in enumerate(Europe):
    if c=='Slovakia':
        Europe[i]='Slovak Republic'
DFE=DF[DF.Country.isin(Europe)]
DFE.to_excel('data/atlas_europe.xlsx')
West=['United States']+Europe
DFR=DF[~DF.Country.isin(West)]
DFR.to_excel('data/atlas_rest.xlsx')

# DATAPOINTS PER YEAR
Datafile to represent how many species and datapoints are recorded each year in ATLAS (Fig S1D).

In [None]:
DF=pd.read_excel('data/atlas.xlsx')
yL=list(range(2004,2018))
spL=[]
dataL=[]
for year in yL:
    DFy=DF[DF.Year==year]
    k1=get_key(DFy,'Species')
    dataL.append(len(DFy))
    spL.append(len(k1))
DFnew=pd.DataFrame({'Year':yL, 'Species': spL, 'Datapoints': dataL})
DFnew.to_csv('data/datapoints_per_year.csv', index=False)

# READ DATASET
Useful for later cells.

In [3]:
DF, pL, bP, drugs=read_dataset('atlas', complete=True)#READS ALL OF ATLAS

In [6]:
DF, pL, bP, drugs=read_dataset('atlas')#READS ONLY THE RESTRICTED VERSION OF ATLAS THAT WE USED FOR THE PAPER

In [None]:
DF, pL, bP, drugs=read_dataset('test')#READS TEST ONLY

In [None]:
DF, pL, bP, drugs=read_dataset('inform')#READS INFORM ONLY

# AVERAGE MIC PER YEAR
Datafile to represent average MIC for each country (Figs 4A, 4C).

In [None]:
DF, pL, bP, drugs=read_dataset('atlas')
L=[]
pC=get_key(DF,'Country')
for country in pC:#for every country
    DFc=DF[DF.Country==country]
    if DFc.empty: continue   
    for sp in pL:#for every pathogen
        DFsp=DFc[DFc.Species==sp]
        if DFsp.empty: continue
        for dr in drugs:#for every drug
            if dr not in bP[sp]: continue
            DFnew=pd.DataFrame({'Year': DFsp['Year'], dr: DFsp[dr]})
            DFnew=DFnew.dropna()
            if DFnew.empty: continue
            yL=get_key(DFnew, 'Year')
            for y in yL:#for every year
                DFy=DFnew[DFnew.Year==y]
                if DFy.empty: continue
                #DATA
                rcases=DFy[DFy[dr]>bP[sp][dr][0]].count()[dr]  
                m=DFy[dr].mean()-bP[sp][dr][0]
                merr=DFy[dr].sem()
                mstd=DFy[dr].std()
                N=len(DFy)
                R=rcases
                F=rcases/N
                #RECORD
                L.append([sp,dr,country,y,m,merr,mstd,N,R,F])
#SAVE ALL IN DATAFRAME AND WRITE TO FILE
MICdf=pd.DataFrame.from_records(L, columns=['Species', 'Antibiotic', 'Country', 'Year', 'Average MIC', 'Average MIC SEM', 'Average MIC SD', 'Number of cases', 'Number of resistant cases', 'Fraction of resistant cases'])
MICdf.to_csv('data/average_mic_per_year.csv', float_format='%.3f', na_rep='NaN', index=False)

# COMPARE DATASETS
Compare resistance values in ATLAS against ECDC and ResistanceMap (Fig 1A,Supp Figs 2 and 3).

## ATLAS VS ECDC

In [5]:
#READ AVERAGE MIC DATA
DFavg=pd.read_csv('data/average_mic_per_year.csv')
#READ ECDC DATA
RD=pd.read_csv('data/europe_resistance_data.csv')
#we want to compare fraction of resistance in both datasets
RD=RD[RD.Indicator=='Nonsusceptible proportion']
RD['Year']=RD['Year'].astype('int64')
RD['Value']=RD['Value'].astype('float64')
spR=get_key(RD, 'Species')
cR=get_key(RD, 'Country')
dR=get_key(RD, 'Antibiotic')
#ECDC resistance data comes aggregated for drug classes and for different
#pathogens. For instance, for Acinetobacter it reports resistance to
#'Carbapenems'. We therefore compare with resistance to all drugs tested
#in ATLAS for that pathogen-drug class combination, as specified in the
#ECDC website.
#Also, Acinetobacter is reported as a genus, but we here only compare with
#A.baumannii, which is by far the most important pathogen
RD['Species']=RD['Species'].str.replace('Acinetobacter spp.', 'Acinetobacter baumannii')
Rant={}
Rant['Acinetobacter baumannii']={'Carbapenems':['Imipenem', 'Meropenem']}
Rant['Enterococcus faecalis']={'Highlevel gentamicin':['Gentamicin'],
    'Vancomycin':['Vancomycin']}
Rant['Enterococcus faecium']={'Highlevel gentamicin':['Gentamicin'],
    'Vancomycin':['Vancomycin']}
Rant['Escherichia coli']={'Carbapenems':['Imipenem', 'Meropenem']}
Rant['Klebsiella pneumoniae']={'Carbapenems':['Imipenem, Meropenem']}
Rant['Pseudomonas aeruginosa']={'Carbapenems':['Imipenem', 'Meropenem'],
    'Ceftazidime':['Ceftazidime'],
    'PiperacillinTazobactam':['Piperacillin tazobactam']}
Rant['Staphylococcus aureus']={'Meticillin (MRSA)':['Oxacillin']}
Rant['Streptococcus pneumoniae']={'Macrolides':['Erythromycin', 'Clarithromycin', 'Azithromycin'],
    'Penicillins': ['Penicillin', 'Oxacillin']}

#COMPARE RESISTANCE RATES
yL=list(range(2004,2018))
L=[]
for sp, country in itertools.product(spR, cR):
    DFsp=DFavg[(DFavg.Species==sp) & (DFavg.Country==country)]
    RDsp=RD[(RD.Species==sp) & (RD.Country==country)]
    if DFsp.empty or RDsp.empty: continue
    for key,value in Rant[sp].items():
        DFc=DFsp[DFsp.Antibiotic.isin(value)]
        RDc=RDsp[RDsp.Antibiotic==key]
        for y in yL:
            DFy=DFc[DFc.Year==y]
            RDy=RDc[RDc.Year==y]
            if DFy.empty or RDy.empty: continue
            x1=RDy.Value.mean()
            if np.isnan(x1): continue
            y1=DFy['Fraction of resistant cases'].mean()*100
            diff=y1-x1
            N=DFy['Number of cases'].sum()
            L.append([sp,key,y,country,x1,y1,diff,N])
NewDF=pd.DataFrame.from_records(L, columns=['Species','Antibiotic','Year','Country','ECDCValue','ATLASValue','Diff','N'])
NewDF.to_excel('data/atlas_vs_ecdc.xls', float_format='%.3f', index=False)

## ATLAS VS RESISTANCEMAP

In [7]:
#READ AVERAGE MIC DATA
DFavg=pd.read_csv('data/average_mic_per_year.csv')
#READ RESISTANCEMAP DATA
RD=pd.read_csv('data/resistmap_data.csv')
RD['year']=RD['year'].astype('int64')
RD['value']=RD['value'].astype('float64')
RD['IsolatesTested']=RD['IsolatesTested'].astype('float64')
spR=get_key(RD, 'Organism')
cR=get_key(RD, 'CountryName')
dR=get_key(RD, 'Antimicrobial')
#DATA IN RESISTANCEMAP IS AGGREGATED IN A SIMILAR WAY AS IN ECDC
Rant={}
Rant['Acinetobacter baumannii']={'Amikacin':['Amikacin'], 
    'Ampicillin-sulbactam':['Ampicillin sulbactam'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Ceftazidime':['Ceftazidime'],
    'Glycylcyclines':['Tigecycline']}
Rant['Enterobacter aerogenes/cloacae']={'Amoxicillin-clavulanate':['Amoxicillin clavulanate'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Glycylcyclines':['Tigecycline'],
    'Piperacillin-tazobactam':['Piperacillin tazobactam']}
Rant['Enterococcus faecalis']={'Aminoglycosides (high-level)':['Gentamicin'],
    'Vancomycin':['Vancomycin']}
Rant['Enterococcus faecium']={'Vancomycin':['Vancomycin']}
Rant['Escherichia coli']={'Amoxicillin-clavulanate':['Amoxicillin clavulanate'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Glycylcyclines':['Tigecycline'],
    'Piperacillin-tazobactam':['Piperacillin tazobactam']}
Rant['Klebsiella pneumoniae']={'Amoxicillin-clavulanate':['Amoxicillin clavulanate'],
    'Carbapenems':['Imipenem, Meropenem'],
    'Glycylcyclines':['Tigecycline'],
    'Piperacillin-tazobactam':['Piperacillin tazobactam']}
Rant['Pseudomonas aeruginosa']={'Amikacin':['Amikacin'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Ceftazidime':['Ceftazidime'],
    'Piperacillin-tazobactam':['Piperacillin tazobactam']}
Rant['Staphylococcus aureus']={'Linezolid':['Linezolid'],
    'Oxacillin (MRSA)':['Oxacillin'],
    'Vancomycin':['Vancomycin']}
Rant['Streptococcus pneumoniae']={'Macrolides':['Erythromycin', 'Clarithromycin', 'Azithromycin']}

#COMPARE RESISTANCE RATES
yL=list(range(2004,2018))
L=[]
for sp, country in itertools.product(spR, cR):
    DFsp=DFavg[(DFavg.Species==sp) & (DFavg.Country==country)]
    RDsp=RD[(RD.Organism==sp) & (RD.CountryName==country)]
    if DFsp.empty or RDsp.empty: continue
    for key,value in Rant[sp].items():
        DFc=DFsp[DFsp.Antibiotic.isin(value)]
        RDc=RDsp[RDsp.Antimicrobial==key]
        for y in yL:
            DFy=DFc[DFc.Year==y]
            RDy=RDc[RDc.year==y]
            if DFy.empty or RDy.empty: continue
            x1=RDy.value.mean()
            xhigh=RDy.CIHigh.mean()
            xlow=RDy.CILow.mean()
            if np.isnan(x1): continue
            y1=DFy['Fraction of resistant cases'].mean()*100
            diff=y1-x1
            N=DFy['Number of cases'].sum()
            L.append([sp,key,y,country,x1,xhigh,xlow,y1,diff,N])

NewDF=pd.DataFrame.from_records(L, columns=['Species','Antibiotic','Year','Country','RMapValue','RMapCIHigh', 'RMapCILow','ATLASValue','Diff','N'])
NewDF.to_excel('data/atlas_vs_resistancemap.xls', float_format='%.3f', index=False)

## ATLAS VS ESPAUR

In [10]:
#READ AVERAGE MIC DATA
DFavg=pd.read_csv('data/average_mic_per_year.csv')
#READ RESISTANCEMAP DATA
RD=pd.read_excel('data/espaur.xlsx', engine='openpyxl')
RD['Year']=RD['Year'].astype('int64')
RD['Value']=RD['Value'].astype('float64')
spR=get_key(RD, 'Species')
dR=get_key(RD, 'Antimicrobial')
#DATA IN RESISTANCEMAP IS AGGREGATED IN A SIMILAR WAY AS IN ECDC
Rant={}
Rant['Acinetobacter']={'Colistin':['Colistin']}
Rant['Enterococcus']={'Vancomycin':['Vancomycin']}
Rant['Escherichia coli']={'Ciprofloxacin': ['Levofloxacin'],
    'Gentamicin': ['Gentamicin'], 
    'Cephalosporins':['Ceftazidime', 'Ceftriaxone'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Co-amoxiclav': ['Amoxicillin clavulanate'],
    'Piperacillin-Tazobactam':['Piperacillin tazobactam']}
Rant['Klebsiella pneumoniae']={'Ciprofloxacin': ['Levofloxacin'],
    'Gentamicin': ['Gentamicin'], 
    'Cephalosporins':['Ceftazidime', 'Ceftriaxone'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Co-amoxiclav': ['Amoxicillin clavulanate'],
    'Piperacillin-Tazobactam':['Piperacillin tazobactam']}
Rant['Klebsiella oxytoca']={'Ciprofloxacin': ['Levofloxacin'],
    'Gentamicin': ['Gentamicin'], 
    'Cephalosporins':['Ceftazidime', 'Ceftriaxone'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Co-amoxiclav': ['Amoxicillin clavulanate'],
    'Piperacillin-Tazobactam':['Piperacillin tazobactam']}
Rant['Pseudomonas aeruginosa']={'Ciprofloxacin': ['Levofloxacin'],
    'Gentamicin': ['Gentamicin'], 
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Ceftazidime':['Ceftazidime'],
    'Piperacillin-Tazobactam':['Piperacillin tazobactam']}
Rant['Streptococcus pneumoniae']={'Macrolides':['Erythromycin', 'Clarithromycin', 'Azithromycin'],
    'Penicillin': ['Penicillin']}

#COMPARE RESISTANCE RATES
yL=list(range(2011,2018))
L=[]
for sp in spR:
    DFsp=DFavg[(DFavg.Species.str.contains(sp)) & (DFavg.Country=='United Kingdom')]
    RDsp=RD[RD.Species==sp]
    if DFsp.empty or RDsp.empty: continue
    for key,value in Rant[sp].items():
        DFc=DFsp[DFsp.Antibiotic.isin(value)]
        RDc=RDsp[RDsp.Antimicrobial==key]
        for y in yL:
            DFy=DFc[DFc.Year==y]
            RDy=RDc[RDc.Year==y]
            if DFy.empty or RDy.empty: continue
            x1=RDy.Value.mean()
            if np.isnan(x1): continue
            y1=DFy['Fraction of resistant cases'].mean()*100
            diff=y1-x1
            N=DFy['Number of cases'].sum()
            L.append([sp,key,y,x1,y1,diff,N])

NewDF=pd.DataFrame.from_records(L, columns=['Species','Antibiotic','Year','ESPAURValue','ATLASValue','Diff','N'])
NewDF.to_excel('data/atlas_vs_espaur.xls', float_format='%.3f', index=False)

# ATLAS QUALITY
Datafiles to represent correlelograms (Figs 1B, 1C, 1D, 3, 7, Supp Figs 8A, 8B, 9, 10, 12, 22)

In [None]:
#CREATE NEW DIRECTORY
import os
try:
    os.mkdir('results/')
except OSError as error:
    print('results folder already created')
try:
    os.mkdir('results/correlations')
except OSError as error:
    print('correlations folder already created')

DF, pL, bP, drugs=read_dataset('atlas')
#COMPUTE CORRELATIONS BETWEEN YEARS FOR EACH PA
bins=np.arange(-10.5,11.5,1)
z=np.zeros(len(bins))
for sp,dr in itertools.product(pL,drugs):
    DFsp=DF[DF.Species==sp]
    DFc=pd.DataFrame({'Year': DFsp['Year'], dr: DFsp[dr]})
    DFc=DFc.dropna()
    if DFc.empty: continue
    Y=get_key(DFc,'Year')
    H=[z for Y in Y]#H will hold the distribution of MIC values each year
    for i1,y in enumerate(Y):
        DFy=DFc[DFc.Year==y]
        h,b1=np.histogram(DFy[dr], bins=bins, density=True)
        H[i1]=h
    H=np.array(H)
    R=np.corrcoef(H)#correlation between the MIC distributions in different years
    if R.size>1:
        np.savetxt('results/correlations/'+sp+'_'+dr+'.txt',R,fmt='%.3f')
        np.savetxt('results/correlations/'+sp+'_'+dr+'_years.txt',Y, fmt='%d')
#COMPUTE TAU FOR EVERY PA PAIR  
from os import path
Dl=[]
for sp,dr in itertools.product(pL,drugs):
    if path.exists('results/correlations/'+sp+'_'+dr+'.txt'):
        R,Y=read_corr(sp, dr)
        n,m=R.shape
        tau,tauN=tautest(R)
        Dl.append([sp,dr,tau,tauN,n])     
Dl=sorted(Dl, reverse=True, key=itemgetter(2))
DFdist=pd.DataFrame.from_records(Dl, columns=['Species', 'Antibiotic', 'Tau', 'TauN', 'N'])
DFdist.to_csv('results/correlations/tau_test.csv', float_format='%.3f', index=False)

# TEST QUALITY

In [None]:
#CREATE NEW DIRECTORY
import os
import os
try:
    os.mkdir('results_test/')
except OSError as error: 
    print('results_test folder already created')
try:
    os.mkdir('results_test/correlations')
except OSError as error: 
    print('results_test/correlations already created')

DF, pL, bP, drugs=read_dataset('test')
#COMPUTE CORRELATIONS BETWEEN YEARS FOR EACH PA
bins=np.arange(-10.5,11.5,1)
z=np.zeros(len(bins))
for sp,dr in itertools.product(pL,drugs):
    DFsp=DF[DF.Species==sp]
    DFc=pd.DataFrame({'Year': DFsp['Year'], dr: DFsp[dr]})
    DFc=DFc.dropna()
    if DFc.empty: continue
    Y=get_key(DFc,'Year')
    H=[z for Y in Y]#H will hold the distribution of MIC values each year
    for i1,y in enumerate(Y):
        DFy=DFc[DFc.Year==y]
        h,b1=np.histogram(DFy[dr], bins=bins, density=True)
        H[i1]=h
    H=np.array(H)
    R=np.corrcoef(H)#correlation between the MIC distributions in different years
    if R.size>1:
        np.savetxt('results_test/correlations/'+sp+'_'+dr+'.txt',R,fmt='%.3f')
        np.savetxt('results_test/correlations/'+sp+'_'+dr+'_years.txt',Y, fmt='%d')

# INFORM QUALITY

In [None]:
#CREATE NEW DIRECTORY
import os
try:
    os.mkdir('results_inform/')
except OSError as error: 
    print('results_inform folder already created')
try:
    os.mkdir('results_inform/correlations')
except OSError as error: 
    print('results_inform/correlations already created')

DF, pL, bP, drugs=read_dataset('inform')
#COMPUTE CORRELATIONS BETWEEN YEARS FOR EACH PA
bins=np.arange(-10.5,11.5,1)
z=np.zeros(len(bins))
for sp,dr in itertools.product(pL,drugs):
    DFsp=DF[DF.Species==sp]
    DFc=pd.DataFrame({'Year': DFsp['Year'], dr: DFsp[dr]})
    DFc=DFc.dropna()
    if DFc.empty: continue
    Y=get_key(DFc,'Year')
    H=[z for Y in Y]#H will hold the distribution of MIC values each year
    for i1,y in enumerate(Y):
        DFy=DFc[DFc.Year==y]
        h,b1=np.histogram(DFy[dr], bins=bins, density=True)
        H[i1]=h
    H=np.array(H)
    R=np.corrcoef(H)#correlation between the MIC distributions in different years
    if R.size>1:
        np.savetxt('results_inform/correlations/'+sp+'_'+dr+'.txt',R,fmt='%.3f')
        np.savetxt('results_inform/correlations/'+sp+'_'+dr+'_years.txt',Y, fmt='%d')

# TEST/INFORM CONSISTENCY
Check if both databases are consistent

In [None]:
#DF, pL, bP, drugs=read_dataset('atlas')
DFT, pLT, bPT, drugsT=read_dataset('test')
DFI, pLI, bPI, drugsI=read_dataset('inform')
#COMPUTE CORRELATIONS BETWEEN YEARS FOR EACH PA
bins=np.arange(-10.5,11.5,1)
z=np.zeros(len(bins))
for sp,dr in itertools.product(pL,drugs):
    DFTsp=DFT[DFT.Species==sp]
    DFIsp=DFI[DFI.Species==sp]
    DFTc=pd.DataFrame({'Year': DFTsp['Year'], dr: DFTsp[dr]})
    DFTc=DFTc.dropna()
    DFIc=pd.DataFrame({'Year': DFIsp['Year'], dr: DFIsp[dr]})
    DFIc=DFIc.dropna()
    if DFTc.empty or DFIc.empty: continue
    YT=get_key(DFTc,'Year')
    YI=get_key(DFIc,'Year')
    Y=[]
    for y in YT:
        if y in YI:
            Y.append(y)
    for y in YI:
        if y in YT and y not in Y:
            Y.append(y)
    Y=sorted(Y)
    print(sp,dr,Y)
    if not Y: continue
    HT=[z for _ in Y]#H will hold the distribution of MIC values each year
    HI=[z for _ in Y]#H will hold the distribution of MIC values each year
    for i1,y in enumerate(Y):
        DFTy=DFTc[DFTc.Year==y]
        h,b1=np.histogram(DFTy[dr], bins=bins, density=True)
        HT[i1]=h
        DFIy=DFIc[DFIc.Year==y]
        h,b1=np.histogram(DFIy[dr], bins=bins, density=True)
        HI[i1]=h
    HT=np.array(HT)
    HI=np.array(HI)
    R=np.corrcoef(HT,HI)#correlation between the MIC distributions in different years
    if R.size>1:
        np.savetxt('results/correlations/'+sp+'_'+dr+'_test_vs_inform.txt',R,fmt='%.3f')
        np.savetxt('results/correlations/'+sp+'_'+dr+'_test_vs_inform_years.txt',Y, fmt='%d')
#COMPUTE TAU FOR EVERY PA PAIR  
from os import path
Dl=[]
for sp,dr in itertools.product(pL,drugs):
    if path.exists('results/correlations/'+sp+'_'+dr+'_test_vs_inform.txt'):
        R=[]
        with open('results/correlations/'+sp+'_'+dr+'_test_vs_inform.txt','r') as f:
            for line in f:
                R.append([float(x) for x in line.split()])
        Y=[]
        with open('results/correlations/'+sp+'_'+dr+'_test_vs_inform_years.txt','r') as f:
            for line in f:
                w=line.split()
                Y.append(int(w[0]))
        R=np.array(R)
        n,m=R.shape
        tau,tauN=tautest(R)
        Dl.append([sp,dr,tau,tauN,n])     
Dl=sorted(Dl, reverse=True, key=itemgetter(2))
DFdist=pd.DataFrame.from_records(Dl, columns=['Species', 'Antibiotic', 'Tau', 'TauN', 'N'])
DFdist.to_csv('results/correlations/tau_test_test_vs_inform.csv', float_format='%.3f', index=False)

# MIC CHANGE
Data files to represent MIC change in clustermaps (Figs 4B, 5B, 8A, 8B, 8C, Supp Figs 14, 20A, 20B, 20C)

In [7]:
DF, pL, bP, drugs=read_dataset('atlas')
#GLOBAL MIC CHANGE (FOR CLUSTERMAPS)
L=[]
for sp, dr in itertools.product(pL, drugs):
    DFsp=DF[DF.Species==sp]
    M=mic_change(sp,dr,DFsp)
    if not M: continue
    L.append(M)
MIC=pd.DataFrame.from_records(L, columns=['Species', 'Antibiotic', 'MIC change', 'MIC change error', 'Intercept'])
#WRITE TO FILE
MIC.to_csv('data/mic_change_global.csv', na_rep='NaN', float_format='%.3f', index=False)
        
#EUROPEAN MIC CHANGE
RD=pd.read_csv('data/europe_resistance_data.csv')
#GET LIST OF EUROPEAN COUNTRIES FROM ECDC DATA
cR=get_key(RD, 'Country')
for i,c in enumerate(cR):
    if c=='Slovakia':
        cR[i]='Slovak Republic'
DFE=DF[DF.Country.isin(cR)]
for sp,dr in itertools.product(pL, drugs):
    DFsp=DFE[DFE.Species==sp]
    if not M: continue
    M=mic_change(sp,dr,DFsp)
    L.append(M)
MIC=pd.DataFrame.from_records(L, columns=['Species', 'Antibiotic', 'MIC change', 'MIC change error', 'Intercept'])
#WRITE TO FILE
MIC.to_csv('data/mic_change_europe.csv', na_rep='NaN', float_format='%.3f', index=False)

#MIC CHANGE PER COUNTRY
L=[]
pC=get_key(DF,'Country')
for sp, dr in itertools.product(pL, drugs):
    DFsp=DF[DF.Species==sp]
    for country in pC:
        DFc=DFsp[DFsp.Country==country]
        M=mic_change(sp,dr,DFc)
        if not M: continue
        L.append([country]+M)
MIC=pd.DataFrame.from_records(L, columns=['Country','Species', 'Antibiotic', 'MIC change', 'MIC change error', 'Intercept'])
#WRITE TO FILE
MIC.to_csv('data/mic_change_per_country.csv', na_rep='NaN', float_format='%.3f', index=False)

# MIC DISTRIBUTIONS, GAUSSIAN MIXTURE AND R CLUSTER
Datafiles to record the MIC distributions (raw and smooth) of every PA pair for all years (oH and sH), the Gaussian Mixture fit (Z), and the R cluster. Used in Figs 1B, 1C, 3, 5A, 5D, 6C, Supp Figs 11, 12, 15, 16, 18)

In [None]:
#CREATE NEW DIRECTORY
import os
try:
    os.mkdir('results/mic_distributions')
except OSError as error: 
    print('mic_distributions folder already created')
try:
    os.mkdir('results/Rcluster')
except OSError as error: 
    print('Rcluster folder already created')

DF, pL, bP, drugs=read_dataset('atlas')
#GET GAUSSIAN MIXTURE FOR EACH PA PAIR
from functions import *
for sp in pL:
    for dr in drugs:
        BP=0
        if dr in bP[sp]:
            BP=bP[sp][dr][0]
            continue#because we have done it already!
        pY,pM,sM=mic_dist(sp,dr,DF,BP)
        if not pY: continue
        #GAUSSIAN MIXTURE AND R CLUSTER
        result=False
        while result==False:#sometimes the Gaussian mixture doesn't work
            try:
                extract_rcluster(sp, dr, pY, pM, sM, DF)
                Z,gmm_x=gaussian_mixture(pY, sM)
                result=True
            except:
                pass
        np.savetxt('results/mic_distributions/'+sp+'_'+dr+'.txt',Z,fmt='%.3f')                
        np.savetxt('results/mic_distributions/'+sp+'_'+dr+'_years.txt',pY, fmt='%d')
        #WRITE HISTOGRAMS
        sH=[]
        oH=[]
        for i,y in enumerate(pY):
            # Make regular histogram (SMOOTHMIC)
            B1=np.arange(-10,10.5,0.5)
            n1, bins1=np.histogram(sM[i], bins=B1, density=True)                     
            # Make regular histogram (MIC)
            B2=np.arange(-10,10.5,1)
            n2, bins2=np.histogram(pM[i], bins=B2, density=True)
            sH.append(n1)
            oH.append(n2)
        sH=np.array(sH, dtype=float)
        oH=np.array(oH, dtype=float)
        np.savetxt('results/mic_distributions/'+sp+'_'+dr+'_original.txt',oH,fmt='%.3f')
        np.savetxt('results/mic_distributions/'+sp+'_'+dr+'_smooth.txt',sH,fmt='%.3f')
        #WRITE HISTOGRAM FOR R CLUSTER
        R=pd.read_csv('results/Rcluster/'+sp+'_'+dr+'_Rcluster.csv')
        sH=[]
        oH=[]
        for i,y in enumerate(pY):
            Ry=R[R.Year==y]
            mic=Ry.MIC
            smoothmic=mic+np.random.normal(0.0,1.0,len(mic))
            # Make regular histogram (SMOOTHMIC)
            B1=np.arange(-10,10.5,0.5)
            n1, bins1=np.histogram(smoothmic, bins=B1, density=True)                     
            # Make regular histogram (MIC)
            B2=np.arange(-10,10.5,1)
            n2, bins2=np.histogram(mic, bins=B2, density=True)
            oH.append(n2)
            sH.append(n1)
        sH=np.array(sH, dtype=float)
        oH=np.array(oH, dtype=float)
        np.savetxt('results/mic_distributions/'+sp+'_'+dr+'_Rcluster_original.txt',oH,fmt='%.3f')
        np.savetxt('results/mic_distributions/'+sp+'_'+dr+'_Rcluster_smooth.txt',sH,fmt='%.3f')

Staphylococcus aureus Amoxycillin clavulanate
Staphylococcus aureus Ampicillin
Staphylococcus aureus Ceftaroline avibactam
Staphylococcus aureus Ceftazidime
Staphylococcus aureus Ceftazidime avibactam
Staphylococcus aureus Ceftriaxone
Staphylococcus aureus Doripenem
Staphylococcus aureus Ertapenem
Staphylococcus aureus Imipenem
Staphylococcus aureus Meropenem
Staphylococcus aureus Piperacillin tazobactam
Escherichia coli Aztreonam avibactam
Escherichia coli Ceftaroline avibactam
Escherichia coli Colistin
Escherichia coli ColistinP80
Klebsiella pneumoniae Aztreonam avibactam
Klebsiella pneumoniae Ceftaroline avibactam
Klebsiella pneumoniae Colistin
Klebsiella pneumoniae ColistinP80
Pseudomonas aeruginosa Amoxycillin clavulanate
Pseudomonas aeruginosa Ampicillin
Pseudomonas aeruginosa Aztreonam avibactam
Pseudomonas aeruginosa Ceftaroline
Pseudomonas aeruginosa Ceftaroline avibactam
Pseudomonas aeruginosa Ceftriaxone
Pseudomonas aeruginosa ColistinP80
Pseudomonas aeruginosa Ertapenem
Pse

## MIC DISTRIBUTIONS AND R CLUSTER (TEST)

In [None]:
#CREATE NEW DIRECTORY
import os
try:
    os.mkdir('results_test/mic_distributions')
except OSError as error: 
    print('mic_distributions folder already created')
try:
    os.mkdir('results_test/Rcluster')
except OSError as error: 
    print('Rcluster folder already created')

DF, pL, bP, drugs=read_dataset('test')
#GET GAUSSIAN MIXTURE FOR EACH PA PAIR
for sp in pL:
    for dr in drugs:
        BP=0
        if dr in bP[sp]:
            BP=bP[sp][dr][0]
            continue#because we have done it already!
        pY,pM,sM=mic_dist(sp,dr,DF,BP)
        if not pY: continue
        print(sp,dr)
        #GAUSSIAN MIXTURE AND R CLUSTER
        result=False
        while result==False:#sometimes the Gaussian mixture doesn't work
            try:
                extract_rcluster(sp, dr, pY, pM, sM, DF)
                Z,gmm_x=gaussian_mixture(pY, sM)
                result=True
            except:
                pass
        np.savetxt('results_test/mic_distributions/'+sp+'_'+dr+'.txt',Z,fmt='%.3f')                
        np.savetxt('results_test/mic_distributions/'+sp+'_'+dr+'_years.txt',pY, fmt='%d')
        #WRITE HISTOGRAMS
        sH=[]
        oH=[]
        for i,y in enumerate(pY):
            # Make regular histogram (SMOOTHMIC)
            B1=np.arange(-10,10.5,0.5)
            n1, bins1=np.histogram(sM[i], bins=B1, density=True)                     
            # Make regular histogram (MIC)
            B2=np.arange(-10,10.5,1)
            n2, bins2=np.histogram(pM[i], bins=B2, density=True)
            sH.append(n1)
            oH.append(n2)
        sH=np.array(sH, dtype=float)
        oH=np.array(oH, dtype=float)
        np.savetxt('results_test/mic_distributions/'+sp+'_'+dr+'_original.txt',oH,fmt='%.3f')
        np.savetxt('results_test/mic_distributions/'+sp+'_'+dr+'_smooth.txt',sH,fmt='%.3f')

## MIC DISTRIBUTIONS AND R CLUSTER (INFORM)

In [None]:
#CREATE NEW DIRECTORY
import os
try:
    os.mkdir('results_test/mic_distributions')
except OSError as error: 
    print('mic_distributions folder already created')
try:
    os.mkdir('results_test/Rcluster')
except OSError as error: 
    print('Rcluster folder already created')

DF, pL, bP, drugs=read_dataset('inform')
#GET GAUSSIAN MIXTURE FOR EACH PA PAIR
for sp in pL:
    for dr in drugs:
        BP=0
        if dr in bP[sp]:
            BP=bP[sp][dr][0]
            continue#because we have done it already!
        pY,pM,sM=mic_dist(sp,dr,DF,BP)
        if not pY: continue
        print(sp,dr)
        #GAUSSIAN MIXTURE AND R CLUSTER
        result=False
        while result==False:#sometimes the Gaussian mixture doesn't work
            try:
                extract_rcluster(sp, dr, pY, pM, sM, DF)
                Z,gmm_x=gaussian_mixture(pY, sM)
                result=True
            except:
                pass
        np.savetxt('results_test/mic_distributions/'+sp+'_'+dr+'.txt',Z,fmt='%.3f')                
        np.savetxt('results_test/mic_distributions/'+sp+'_'+dr+'_years.txt',pY, fmt='%d')
        #WRITE HISTOGRAMS
        sH=[]
        oH=[]
        for i,y in enumerate(pY):
            # Make regular histogram (SMOOTHMIC)
            B1=np.arange(-10,10.5,0.5)
            n1, bins1=np.histogram(sM[i], bins=B1, density=True)                     
            # Make regular histogram (MIC)
            B2=np.arange(-10,10.5,1)
            n2, bins2=np.histogram(pM[i], bins=B2, density=True)
            sH.append(n1)
            oH.append(n2)
        sH=np.array(sH, dtype=float)
        oH=np.array(oH, dtype=float)
        np.savetxt('results_test/mic_distributions/'+sp+'_'+dr+'_original.txt',oH,fmt='%.3f')
        np.savetxt('results_test/mic_distributions/'+sp+'_'+dr+'_smooth.txt',sH,fmt='%.3f')

# CHANGEPOINT ANALYSIS
To check if increased testing has lead to a significant MIC trend in ATLAS (Fig S7). First we need to record global average MIC and number of cases (we had previously computed it for each country):

## GLOBAL AVERAGE MIC PER YEAR 

In [6]:
DF, pL, bP, drugs=read_dataset('atlas')
L=[]
for sp in pL:#for every pathogen
    print(sp)
    DFsp=DF[DF.Species==sp]
    if DFsp.empty: continue
    for dr in drugs:#for every drug
        DFnew=pd.DataFrame({'Year': DFsp['Year'], dr: DFsp[dr]})
        DFnew=DFnew.dropna()
        if DFnew.empty: continue
        yL=get_key(DFnew, 'Year')
        for y in yL:#for every year
            DFy=DFnew[DFnew.Year==y]
            if DFy.empty: continue
            #DATA
            BP=0#we include those PA pairs without a breakpoint
            if dr in bP[sp]:
                BP=bP[sp][dr][0]
            m=DFy[dr].mean()-BP
            merr=DFy[dr].sem()
            N=len(DFy)
            #RECORD
            L.append([sp,dr,y,m,merr,N])
#SAVE ALL IN DATAFRAME AND WRITE TO FILE
MICdf=pd.DataFrame.from_records(L, columns=['Species', 'Antibiotic', 'Year', 'Average MIC', 'Average MIC SEM', 'Number of cases'])
MICdf.to_csv('data/average_mic_per_year_complete.csv', float_format='%.3f', na_rep='NaN', index=False)

Staphylococcus aureus
Escherichia coli
Klebsiella pneumoniae
Pseudomonas aeruginosa
Enterobacter cloacae
Streptococcus pneumoniae
Haemophilus influenzae
Acinetobacter baumannii
Enterococcus faecalis
Streptococcus agalactiae
Serratia marcescens
Klebsiella oxytoca
Klebsiella aerogenes
Enterococcus faecium
Streptococcus pyogenes
Staphylococcus epidermidis
Citrobacter freundii
Proteus mirabilis
Citrobacter koseri
Staphylococcus haemolyticus
Bacteroides fragilis
Stenotrophomonas maltophilia
Morganella morganii
Acinetobacter pitii
Proteus vulgaris
Enterobacter asburiae
Acinetobacter lwoffii
Streptococcus dysgalactiae
Clostridioides (Clostridium) difficile
Clostridium perfringens
Staphylococcus hominis
Moraxella catarrhalis
Klebsiella variicola
Streptococcus anginosus
Enterobacter kobei
Parvimonas micra
Prevotella bivia
Providencia stuartii
Staphylococcus lugdunensis
Bacteroides thetaiotaomicron
Peptostreptococcus magnus
Serratia liquefaciens
Acinetobacter nosocomialis
Staphylococcus capitis


## CHANGEPOINT DETECTION

In [17]:
DF, pL, bP, drugs=read_dataset('atlas')
DF2=pd.read_csv('data/average_mic_per_year_complete.csv')
sdL=[]
for sp, dr in itertools.product(pL, drugs):
    DFsp=DF2[(DF2.Species==sp) & (DF2.Antibiotic==dr)]
    if DFsp.empty: continue
    yL=get_key(DFsp, 'Year')
    if len(yL)<2: continue
    nL=[]
    rL=[]
    for year in yL:
        DFnew=DFsp[DFsp.Year==year]
        n=DFnew['Number of cases'].sum()
        nL.append(n)
    tau=find_changepoint(nL)
    if tau>-1:
        sdL.append([sp,dr, yL[tau]])
print(len(sdL))
#NOW CHECK WHICH OF THOSE HAVE A NEGATIVE SLOPE
special=[]
from scipy import stats
for key in sdL:
    sp=key[0]
    dr=key[1]
    BP=0
    if dr in bP[sp]:
        BP=bP[sp][dr][0]
    tau=key[2]
    #COMPUTE SLOPE
    DFdr=DF[DF.Species==sp]
    DFc=pd.DataFrame({'Year': DFdr['Year'], dr: DFdr[dr]})
    DFc=DFc.dropna()
    slope, intercept, r_value, p_value, std_err=stats.linregress(DFc['Year'], DFc[dr])
    intercept=intercept-BP
    if p_value>0.05:
        slope=0
        intercept=DFc[dr].mean()-BP
    #COMPUTE SECOND SLOPE
    DFdr=DF[(DF.Species==sp) & (DF.Year>tau-1)]
    DFc=pd.DataFrame({'Year': DFdr['Year'], dr: DFdr[dr]})
    DFc=DFc.dropna()
    yL=sorted(get_key(DFc, 'Year'))
    s1=0
    i1=DFc[dr].mean()-BP
    if len(yL)>2:
        s1, i1, r1, p1, std1=stats.linregress(DFc['Year'], DFc[dr])
        i1=i1-BP
        if p1>0.05:
            s1=0
            i1=DFc[dr].mean()-BP
    #COMPUTE THIRD SLOPE
    DFdr=DF[(DF.Species==sp) & (DF.Year<tau)]
    DFc=pd.DataFrame({'Year': DFdr['Year'], dr: DFdr[dr]})
    DFc=DFc.dropna()
    yL=sorted(get_key(DFc, 'Year'))
    s2=0
    i2=DFc[dr].mean()-BP
    if len(yL)>2:
        s2, i2, r2, p2, std2=stats.linregress(DFc['Year'], DFc[dr])
        i2=i2-BP
        if p2>0.05:
            s2=0
            i2=DFc[dr].mean()-BP
    if s1*s2<0:
        special.append([sp,dr,tau,s1,i1,s2,i2,slope,intercept])

DFstr=pd.DataFrame.from_records(special, columns=['Species', 'Antibiotic', 'Changepoint', 'Slope After', 'Intercept After', 'Slope Before', 'Intercept Before', 'Slope Global', 'Intercept Global'])
DFstr.to_csv('data/changepoint_analysis_change_in_mic.csv', index=False, float_format='%.6f')

1718


# R CLUSTER TRENDS

Data for Figs 5, 6, 8, Supp Figs 14, 17, 20)

## ATLAS TREND
(record slopes and intercepts with %.6f, so that there is no error!)

In [None]:
DF, pL, bP, drugs=read_dataset('atlas')
newDF=resistant_cluster_trends(DF, pL, bP, drugs)
newDF.to_csv('results/Rcluster/resistant_cluster_trends_noBP.csv', index=False, float_format='%.6f')

## TEST TREND

In [None]:
DF, pL, bP, drugs=read_dataset('test')
newDF=resistant_cluster_trends(DF, pL, bP, drugs)
newDF.to_csv('results_test/Rcluster/resistant_cluster_trends_noBP.csv', index=False, float_format='%.6f')

## INFORM TREND

In [None]:
DF, pL, bP, drugs=read_dataset('inform')
newDF=resistant_cluster_trends(DF, pL, bP, drugs)
newDF.to_csv('results_inform/Rcluster/resistant_cluster_trends_noBP.csv', index=False, float_format='%.6f')

## EUROPE TREND

In [None]:
DF, pL, bP, drugs=read_dataset('europe')
newDF=resistant_cluster_trends(DF, pL, bP, drugs)
newDF.to_csv('results/Rcluster/resistant_cluster_trends_noBP_europe.csv', index=False, float_format='%.6f')

## USA TREND

In [None]:
DF, pL, bP, drugs=read_dataset('USA')
newDF=resistant_cluster_trends(DF, pL, bP, drugs)
newDF.to_csv('results/Rcluster/resistant_cluster_trends_noBP_usa.csv', index=False, float_format='%.6f')

# REST TREND

In [None]:
DF, pL, bP, drugs=read_dataset('rest')
newDF=resistant_cluster_trends(DF, pL, bP, drugs)
newDF.to_csv('results/Rcluster/resistant_cluster_trends_noBP_rest.csv', index=False, float_format='%.6f')

# RECORD DATAPOINTS IN RCLUSTER TRENDS
Supp Fig 20D

In [5]:
DF, pL, bP, drugs=read_dataset('atlas')
from sklearn import mixture
import statsmodels.api as sm
from functions import *

lS=[]
lA=[]
lN=[]
lY=[]
for sp in pL:
    for dr in drugs:
        print(sp,dr)
        #GLOBAL TREND
        DFsp=DF[DF.Species==sp]
        DFc=pd.DataFrame({'Year': DFsp['Year'], 'MIC': DFsp[dr]})
        DFc=DFc.dropna()
        yL=get_key(DFc, 'Year')
        if len(yL)<2: continue
        #LISTS
        lS.append(sp)
        lA.append(dr)
        lN.append(len(DFc))
        lY.append(len(yL))
newDF=pd.DataFrame({'Species':lS, 'Antibiotic':lA, 'Datapoints':lN, 'YearRange':lY})
newDF.to_csv('results/Rcluster/resistant_cluster_trends_datapoints.csv', index=False)

Staphylococcus aureus Amikacin
Staphylococcus aureus Amoxycillin clavulanate
Staphylococcus aureus Ampicillin
Staphylococcus aureus Ampicillin sulbactam
Staphylococcus aureus Azithromycin
Staphylococcus aureus Aztreonam
Staphylococcus aureus Aztreonam avibactam
Staphylococcus aureus Cefepime
Staphylococcus aureus Cefoxitin
Staphylococcus aureus Ceftaroline
Staphylococcus aureus Ceftaroline avibactam
Staphylococcus aureus Ceftazidime
Staphylococcus aureus Ceftazidime avibactam
Staphylococcus aureus Ceftriaxone
Staphylococcus aureus Clarithromycin
Staphylococcus aureus Clindamycin
Staphylococcus aureus Colistin
Staphylococcus aureus ColistinP80
Staphylococcus aureus Daptomycin
Staphylococcus aureus Doripenem
Staphylococcus aureus Ertapenem
Staphylococcus aureus Erythromycin
Staphylococcus aureus Gentamicin
Staphylococcus aureus Imipenem
Staphylococcus aureus Levofloxacin
Staphylococcus aureus Linezolid
Staphylococcus aureus Meropenem
Staphylococcus aureus Metronidazole
Staphylococcus aur

Haemophilus influenzae Azithromycin
Haemophilus influenzae Aztreonam
Haemophilus influenzae Aztreonam avibactam
Haemophilus influenzae Cefepime
Haemophilus influenzae Cefoxitin
Haemophilus influenzae Ceftaroline
Haemophilus influenzae Ceftaroline avibactam
Haemophilus influenzae Ceftazidime
Haemophilus influenzae Ceftazidime avibactam
Haemophilus influenzae Ceftriaxone
Haemophilus influenzae Clarithromycin
Haemophilus influenzae Clindamycin
Haemophilus influenzae Colistin
Haemophilus influenzae ColistinP80
Haemophilus influenzae Daptomycin
Haemophilus influenzae Doripenem
Haemophilus influenzae Ertapenem
Haemophilus influenzae Erythromycin
Haemophilus influenzae Gentamicin
Haemophilus influenzae Imipenem
Haemophilus influenzae Levofloxacin
Haemophilus influenzae Linezolid
Haemophilus influenzae Meropenem
Haemophilus influenzae Metronidazole
Haemophilus influenzae Minocycline
Haemophilus influenzae Moxifloxacin
Haemophilus influenzae Oxacillin
Haemophilus influenzae Penicillin
Haemophil

Klebsiella aerogenes Ceftaroline
Klebsiella aerogenes Ceftaroline avibactam
Klebsiella aerogenes Ceftazidime
Klebsiella aerogenes Ceftazidime avibactam
Klebsiella aerogenes Ceftriaxone
Klebsiella aerogenes Clarithromycin
Klebsiella aerogenes Clindamycin
Klebsiella aerogenes Colistin
Klebsiella aerogenes ColistinP80
Klebsiella aerogenes Daptomycin
Klebsiella aerogenes Doripenem
Klebsiella aerogenes Ertapenem
Klebsiella aerogenes Erythromycin
Klebsiella aerogenes Gentamicin
Klebsiella aerogenes Imipenem
Klebsiella aerogenes Levofloxacin
Klebsiella aerogenes Linezolid
Klebsiella aerogenes Meropenem
Klebsiella aerogenes Metronidazole
Klebsiella aerogenes Minocycline
Klebsiella aerogenes Moxifloxacin
Klebsiella aerogenes Oxacillin
Klebsiella aerogenes Penicillin
Klebsiella aerogenes Piperacillin tazobactam
Klebsiella aerogenes Quinupristin dalfopristin
Klebsiella aerogenes Teicoplanin
Klebsiella aerogenes Tigecycline
Klebsiella aerogenes Trimethoprim sulfa
Klebsiella aerogenes Vancomycin
En

Citrobacter koseri Colistin
Citrobacter koseri ColistinP80
Citrobacter koseri Daptomycin
Citrobacter koseri Doripenem
Citrobacter koseri Ertapenem
Citrobacter koseri Erythromycin
Citrobacter koseri Gentamicin
Citrobacter koseri Imipenem
Citrobacter koseri Levofloxacin
Citrobacter koseri Linezolid
Citrobacter koseri Meropenem
Citrobacter koseri Metronidazole
Citrobacter koseri Minocycline
Citrobacter koseri Moxifloxacin
Citrobacter koseri Oxacillin
Citrobacter koseri Penicillin
Citrobacter koseri Piperacillin tazobactam
Citrobacter koseri Quinupristin dalfopristin
Citrobacter koseri Teicoplanin
Citrobacter koseri Tigecycline
Citrobacter koseri Trimethoprim sulfa
Citrobacter koseri Vancomycin
Staphylococcus haemolyticus Amikacin
Staphylococcus haemolyticus Amoxycillin clavulanate
Staphylococcus haemolyticus Ampicillin
Staphylococcus haemolyticus Ampicillin sulbactam
Staphylococcus haemolyticus Azithromycin
Staphylococcus haemolyticus Aztreonam
Staphylococcus haemolyticus Aztreonam avibac

Proteus vulgaris Colistin
Proteus vulgaris ColistinP80
Proteus vulgaris Daptomycin
Proteus vulgaris Doripenem
Proteus vulgaris Ertapenem
Proteus vulgaris Erythromycin
Proteus vulgaris Gentamicin
Proteus vulgaris Imipenem
Proteus vulgaris Levofloxacin
Proteus vulgaris Linezolid
Proteus vulgaris Meropenem
Proteus vulgaris Metronidazole
Proteus vulgaris Minocycline
Proteus vulgaris Moxifloxacin
Proteus vulgaris Oxacillin
Proteus vulgaris Penicillin
Proteus vulgaris Piperacillin tazobactam
Proteus vulgaris Quinupristin dalfopristin
Proteus vulgaris Teicoplanin
Proteus vulgaris Tigecycline
Proteus vulgaris Trimethoprim sulfa
Proteus vulgaris Vancomycin
Enterobacter asburiae Amikacin
Enterobacter asburiae Amoxycillin clavulanate
Enterobacter asburiae Ampicillin
Enterobacter asburiae Ampicillin sulbactam
Enterobacter asburiae Azithromycin
Enterobacter asburiae Aztreonam
Enterobacter asburiae Aztreonam avibactam
Enterobacter asburiae Cefepime
Enterobacter asburiae Cefoxitin
Enterobacter asburi

Staphylococcus hominis Amoxycillin clavulanate
Staphylococcus hominis Ampicillin
Staphylococcus hominis Ampicillin sulbactam
Staphylococcus hominis Azithromycin
Staphylococcus hominis Aztreonam
Staphylococcus hominis Aztreonam avibactam
Staphylococcus hominis Cefepime
Staphylococcus hominis Cefoxitin
Staphylococcus hominis Ceftaroline
Staphylococcus hominis Ceftaroline avibactam
Staphylococcus hominis Ceftazidime
Staphylococcus hominis Ceftazidime avibactam
Staphylococcus hominis Ceftriaxone
Staphylococcus hominis Clarithromycin
Staphylococcus hominis Clindamycin
Staphylococcus hominis Colistin
Staphylococcus hominis ColistinP80
Staphylococcus hominis Daptomycin
Staphylococcus hominis Doripenem
Staphylococcus hominis Ertapenem
Staphylococcus hominis Erythromycin
Staphylococcus hominis Gentamicin
Staphylococcus hominis Imipenem
Staphylococcus hominis Levofloxacin
Staphylococcus hominis Linezolid
Staphylococcus hominis Meropenem
Staphylococcus hominis Metronidazole
Staphylococcus hominis

Prevotella bivia ColistinP80
Prevotella bivia Daptomycin
Prevotella bivia Doripenem
Prevotella bivia Ertapenem
Prevotella bivia Erythromycin
Prevotella bivia Gentamicin
Prevotella bivia Imipenem
Prevotella bivia Levofloxacin
Prevotella bivia Linezolid
Prevotella bivia Meropenem
Prevotella bivia Metronidazole
Prevotella bivia Minocycline
Prevotella bivia Moxifloxacin
Prevotella bivia Oxacillin
Prevotella bivia Penicillin
Prevotella bivia Piperacillin tazobactam
Prevotella bivia Quinupristin dalfopristin
Prevotella bivia Teicoplanin
Prevotella bivia Tigecycline
Prevotella bivia Trimethoprim sulfa
Prevotella bivia Vancomycin
Providencia stuartii Amikacin
Providencia stuartii Amoxycillin clavulanate
Providencia stuartii Ampicillin
Providencia stuartii Ampicillin sulbactam
Providencia stuartii Azithromycin
Providencia stuartii Aztreonam
Providencia stuartii Aztreonam avibactam
Providencia stuartii Cefepime
Providencia stuartii Cefoxitin
Providencia stuartii Ceftaroline
Providencia stuartii 

Acinetobacter nosocomialis Ceftazidime
Acinetobacter nosocomialis Ceftazidime avibactam
Acinetobacter nosocomialis Ceftriaxone
Acinetobacter nosocomialis Clarithromycin
Acinetobacter nosocomialis Clindamycin
Acinetobacter nosocomialis Colistin
Acinetobacter nosocomialis ColistinP80
Acinetobacter nosocomialis Daptomycin
Acinetobacter nosocomialis Doripenem
Acinetobacter nosocomialis Ertapenem
Acinetobacter nosocomialis Erythromycin
Acinetobacter nosocomialis Gentamicin
Acinetobacter nosocomialis Imipenem
Acinetobacter nosocomialis Levofloxacin
Acinetobacter nosocomialis Linezolid
Acinetobacter nosocomialis Meropenem
Acinetobacter nosocomialis Metronidazole
Acinetobacter nosocomialis Minocycline
Acinetobacter nosocomialis Moxifloxacin
Acinetobacter nosocomialis Oxacillin
Acinetobacter nosocomialis Penicillin
Acinetobacter nosocomialis Piperacillin tazobactam
Acinetobacter nosocomialis Quinupristin dalfopristin
Acinetobacter nosocomialis Teicoplanin
Acinetobacter nosocomialis Tigecycline


# RCLUSTER VALUES

In [None]:
DF, pL, bP, drugs=read_dataset('test')
newDF=resistant_cluster_values(DF, pL, bP, drugs)
newDF.to_csv('results/Rcluster/resistant_cluster_values_test.csv', index=False, float_format='%.6f')

DF, pL, bP, drugs=read_dataset('inform')
newDF=resistant_cluster_values(DF, pL, bP, drugs)
newDF.to_csv('results/Rcluster/resistant_cluster_values_inform.csv', index=False, float_format='%.6f')

# DIFFERENCE BETWEEN TEST AND INFORM

Data for Fig 7, Supp Figs 10 and 22

In [None]:
DFT=pd.read_csv('results/Rcluster/resistant_cluster_values_test.csv')
DFI=pd.read_csv('results/Rcluster/resistant_cluster_values_inform.csv')
YL=range(2012,2018)
L=[]
for index, row in DFT.iterrows():
    sp=row['Species']
    dr=row['Antibiotic']
    row2=DFI[(DFI.Species==sp) &(DFI.Antibiotic==dr)]
    if row2.empty: continue
    Diff=0
    for y in YL:
        r='R'+str(y)
        s='S'+str(y)
        
        #DIFFERENCE IN R
        if pd.isna(row[r]) or pd.isna(row2[r].mean()):#no R cluster in either (no data to compare)
            continue
        #DIFFERENCE IN S
        if pd.isna(row[s]):#no S cluster in TEST
            if pd.isna(row2[s].mean()):#no S cluster in either
                Diff+=(row[r]-row2[r].mean())**2
                print(r,'Diff1', (row[r]-row2[r].mean())**2)
            else:#no S cluster in TEST, but yes in INFORM
                avg=(row2[s]+row2[r])/2
                Diff+=(avg.mean()-row[r])**2
                print(s,'Diff2',(avg.mean()-row[r])**2)
        elif pd.isna(row2[s].mean()):#no S cluster in INFORM, but yes in TEST
            avg=(row[s]+row[r])/2
            Diff+=(avg-row2[r].mean())**2
            print(s,'Diff3',(avg-row2[r])**2)
        else:#R and S cluster in both
            Diff+=(row[r]-row2[r].mean())**2
            Diff+=(row[s]-row2[s].mean())**2
            print(s,'Diff4',(row[s]-row2[s].mean())**2)
    Diff/=len(YL)
    L.append([sp,dr,Diff])

L=sorted(L, reverse=True, key=itemgetter(2))
newDF=pd.DataFrame.from_records(L, columns=['Species', 'Antibiotic', 'Diff'])
newDF.to_csv('results/Rcluster/resistant_cluster_values_test_vs_inform.csv', index=False, float_format='%.6f')

# DATAPOINTS FOR CAZ AVI TESTING

Data for Supp Fig 6

In [4]:
TEST=DF[DF.Study=='TEST']
INFORM=DF[DF.Study=='Inform']

T=[]
for dr in drugs:
    newDF=pd.DataFrame({'Year': DF['Year'], dr: DF[dr]})
    newDF=newDF.dropna()
    yL=get_key(newDF,'Year')
    Data=[]
    for y in yL:
        DFy=newDF[newDF.Year==y]
        v=len(DFy)
        T.append([dr,y,v])
DFT=pd.DataFrame.from_records(T, columns=['Antibiotic','Year','Datapoints'])
DFT.to_csv('data/drug_testing_atlas.csv', index=False)

T=[]
for dr in drugs:
    newDF=pd.DataFrame({'Year': TEST['Year'], dr: TEST[dr]})
    newDF=newDF.dropna()
    yL=get_key(newDF,'Year')
    Data=[]
    for y in yL:
        DFy=newDF[newDF.Year==y]
        v=len(DFy)
        T.append([dr,y,v])
DFT=pd.DataFrame.from_records(T, columns=['Antibiotic','Year','Datapoints'])
DFT.to_csv('data/drug_testing_test.csv', index=False)

T=[]
for dr in drugs:
    newDF=pd.DataFrame({'Year': INFORM['Year'], dr: INFORM[dr]})
    newDF=newDF.dropna()
    yL=get_key(newDF,'Year')
    Data=[]
    for y in yL:
        DFy=newDF[newDF.Year==y]
        v=len(DFy)
        T.append([dr,y,v])
DFT=pd.DataFrame.from_records(T, columns=['Antibiotic','Year','Datapoints'])
DFT.to_csv('data/drug_testing_inform.csv', index=False)

# PA PAIRS WITH LOW TAU (Uncertain)
Data for Supp Fig 12

In [None]:
DFdist=pd.read_csv('results/correlations/tau_test.csv')
Diff=pd.read_csv('results/Rcluster/resistant_cluster_values_test_vs_inform.csv')
SS=pd.read_csv('data/average_mic_per_year_complete.csv')

SN=[]
SV=[]
SM=[]
SD=[]
for index,row in DFdist.iterrows():
    sp=row['Species']
    dr=row['Antibiotic']
    SSsp=SS[(SS.Species==sp) & (SS.Antibiotic==dr)]
    N=SSsp['Number of cases'].mean()
    V=SSsp['Number of cases'].std()
    M=SSsp['Number of cases'].min()
    SN.append(N)
    SV.append(V)
    SM.append(M)
    Diffsp=Diff[(Diff.Species==sp) & (Diff.Antibiotic==dr)]
    if Diffsp.empty: continue
    D=Diffsp['Diff'].mean()
    SD.append(D)
SN=np.array(SN)
SV=np.array(SV)
SM=np.array(SM)
SD=np.array(SD)
N1=np.quantile(SN,0.25)
V1=np.quantile(SV,0.75)
M1=np.quantile(SM,0.25)
D1=np.quantile(SD,0.75)

DFdist=DFdist[DFdist.Tau<0.25]
print(len(DFdist))
DFdist=DFdist.iloc[::-1]
DFdist=DFdist.reset_index(drop=True)
List=[]
for index,row in DFdist.iterrows():
    sp=row['Species']
    dr=row['Antibiotic']
    tau=row['Tau']
    L='('
    #check if sample size issue
    SSsp=SS[(SS.Species==sp) & (SS.Antibiotic==dr)]
    N=SSsp['Number of cases'].mean()
    V=SSsp['Number of cases'].std()
    M=SSsp['Number of cases'].min()
    if N<N1:
        L+='N'
        Ns+=1
    if V>V1:
        L+='V'
        Nv+=1
    if M<M1:
        L+='M'
        Nm+=1
    #check if data curation issue
    Diffsp=Diff[(Diff.Species==sp) & (Diff.Antibiotic==dr)]
    D=Diffsp['Diff'].mean()
    if D>D1:
        L+='D'
        Nd+=1
    L+=')'
    if L=='()':
        L='(U)'
        Nu+=1
        List.append([sp,dr,tau,N,V,M,D])
newDF=pd.DataFrame.from_records(List, columns=['Species','Antibiotic','Tau','N','V','M','D'])
newDF.to_csv('results/correlations/bad_correlations.csv', index=False, float_format='%.3f')

# MOVING BREAKPOINTS

Data for Supp Fig 13

In [None]:
L=[]
pC=get_key(DF,'Country')
BP=pd.read_excel('data/breakpoints_year.xls')
pLnew=get_key(BP,'Species')
for country in pC:#for every country
    DFc=DF[DF.Country==country]
    if DFc.empty: continue   
    for sp in pLnew:#for every pathogen
        DFsp=DFc[DFc.Species==sp]
        if DFsp.empty: continue
        BPsp=BP[BP.Species==sp]
        for dr in drugs:#for every drug
            BPdr=BPsp[BPsp.Antibiotic==dr]
            if BPdr.empty: continue
            DFnew=pd.DataFrame({'Year': DFsp['Year'], dr: DFsp[dr]})
            DFnew=DFnew.dropna()
            yL=get_key(DFnew, 'Year')
            for y in yL:#for every year
                DFy=DFnew[DFnew.Year==y]
                if DFy.empty: continue
                #DATA
                breakpoint=BPdr[BPdr.Year>y]['S'].iloc[0]
                print(y,sp,dr,breakpoint)
                rcases=DFy[DFy[dr]>breakpoint].count()[dr]  
                N=len(DFy)
                R=rcases
                F=rcases/N
                #RECORD
                L.append([sp,dr,country,y,N,R,F])
#SAVE ALL IN DATAFRAME AND WRITE TO FILE
MICdf=pd.DataFrame.from_records(L, columns=['Species', 'Antibiotic', 'Country', 'Year', 'Number of cases', 'Number of resistant cases', 'Fraction of resistant cases'])
MICdf.to_csv('data/moving_breakpoints.csv', float_format='%.3f', na_rep='NaN', index=False)

In [None]:
#READ AVERAGE MIC DATA
DFavg=pd.read_csv('data/moving_breakpoints.csv')
#READ RESISTANCEMAP DATA
RD=pd.read_csv('data/resistmap_data.csv')
RD['year']=RD['year'].astype('int64')
RD['value']=RD['value'].astype('float64')
RD['IsolatesTested']=RD['IsolatesTested'].astype('float64')
spR=get_key(RD, 'Organism')
cR=get_key(RD, 'CountryName')
dR=get_key(RD, 'Antimicrobial')
#DATA IN RESISTANCEMAP IS AGGREGATED IN A SIMILAR WAY AS IN ECDC
Rant={}
Rant['Acinetobacter baumannii']={'Amikacin':['Amikacin'], 
    'Ampicillin-sulbactam':['Ampicillin sulbactam'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Ceftazidime':['Ceftazidime'],
    'Glycylcyclines':['Tigecycline']}
Rant['Enterobacter aerogenes/cloacae']={'Amoxicillin-clavulanate':['Amoxicillin clavulanate'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Glycylcyclines':['Tigecycline'],
    'Piperacillin-tazobactam':['Piperacillin tazobactam']}
Rant['Enterococcus faecalis']={'Aminoglycosides (high-level)':['Gentamicin'],
    'Vancomycin':['Vancomycin']}
Rant['Enterococcus faecium']={'Vancomycin':['Vancomycin']}
Rant['Escherichia coli']={'Amoxicillin-clavulanate':['Amoxicillin clavulanate'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Glycylcyclines':['Tigecycline'],
    'Piperacillin-tazobactam':['Piperacillin tazobactam']}
Rant['Klebsiella pneumoniae']={'Amoxicillin-clavulanate':['Amoxicillin clavulanate'],
    'Carbapenems':['Imipenem, Meropenem'],
    'Glycylcyclines':['Tigecycline'],
    'Piperacillin-tazobactam':['Piperacillin tazobactam']}
Rant['Pseudomonas aeruginosa']={'Amikacin':['Amikacin'],
    'Carbapenems':['Imipenem', 'Meropenem'],
    'Ceftazidime':['Ceftazidime'],
    'Piperacillin-tazobactam':['Piperacillin tazobactam']}
Rant['Staphylococcus aureus']={'Linezolid':['Linezolid'],
    'Oxacillin (MRSA)':['Oxacillin'],
    'Vancomycin':['Vancomycin']}
Rant['Streptococcus pneumoniae']={'Macrolides':['Erythromycin', 'Clarithromycin', 'Azithromycin']}

#COMPARE RESISTANCE RATES
yL=list(range(2004,2018))
L=[]
for sp, country in itertools.product(spR, cR):
    DFsp=DFavg[(DFavg.Species==sp) & (DFavg.Country==country)]
    RDsp=RD[(RD.Organism==sp) & (RD.CountryName==country)]
    if DFsp.empty or RDsp.empty: continue
    for key,value in Rant[sp].items():
        DFc=DFsp[DFsp.Antibiotic.isin(value)]
        RDc=RDsp[RDsp.Antimicrobial==key]
        for y in yL:
            DFy=DFc[DFc.Year==y]
            RDy=RDc[RDc.year==y]
            if DFy.empty or RDy.empty: continue
            x1=RDy.value.mean()
            xhigh=RDy.CIHigh.mean()
            xlow=RDy.CILow.mean()
            if np.isnan(x1): continue
            y1=DFy['Fraction of resistant cases'].mean()*100
            diff=y1-x1
            N=DFy['Number of cases'].sum()
            L.append([sp,key,y,country,x1,xhigh,xlow,y1,diff,N])

NewDF=pd.DataFrame.from_records(L, columns=['Species','Antibiotic','Year','Country','RMapValue','RMapCIHigh', 'RMapCILow','ATLASValue','Diff','N'])
NewDF.to_excel('data/atlas_vs_resistancemap_moving_breakpoints.xls', float_format='%.3f', index=False)

In [None]:
#READ AVERAGE MIC DATA
DFavg=pd.read_csv('data/moving_breakpoints.csv')
#READ ECDC DATA
RD=pd.read_csv('data/europe_resistance_data.csv')
#we want to compare fraction of resistance in both datasets
RD=RD[RD.Indicator=='Nonsusceptible proportion']
RD['Year']=RD['Year'].astype('int64')
RD['Value']=RD['Value'].astype('float64')
spR=get_key(RD, 'Species')
cR=get_key(RD, 'Country')
dR=get_key(RD, 'Antibiotic')
#ECDC resistance data comes aggregated for drug classes and for different
#pathogens. For instance, for Acinetobacter it reports resistance to
#'Carbapenems'. We therefore compare with resistance to all drugs tested
#in ATLAS for that pathogen-drug class combination, as specified in the
#ECDC website.
#Also, Acinetobacter is reported as a genus, but we here only compare with
#A.baumannii, which is by far the most important pathogen
RD['Species']=RD['Species'].str.replace('Acinetobacter spp.', 'Acinetobacter baumannii')
Rant={}
Rant['Acinetobacter baumannii']={'Carbapenems':['Imipenem', 'Meropenem']}
Rant['Enterococcus faecalis']={'Highlevel gentamicin':['Gentamicin'],
    'Vancomycin':['Vancomycin']}
Rant['Enterococcus faecium']={'Highlevel gentamicin':['Gentamicin'],
    'Vancomycin':['Vancomycin']}
Rant['Escherichia coli']={'Carbapenems':['Imipenem', 'Meropenem']}
Rant['Klebsiella pneumoniae']={'Carbapenems':['Imipenem, Meropenem']}
Rant['Pseudomonas aeruginosa']={'Carbapenems':['Imipenem', 'Meropenem'],
    'Ceftazidime':['Ceftazidime'],
    'PiperacillinTazobactam':['Piperacillin tazobactam']}
Rant['Staphylococcus aureus']={'Meticillin (MRSA)':['Oxacillin']}
Rant['Streptococcus pneumoniae']={'Macrolides':['Erythromycin', 'Clarithromycin', 'Azithromycin'],
    'Penicillins': ['Penicillin', 'Oxacillin']}

#COMPARE RESISTANCE RATES
yL=list(range(2004,2018))
L=[]
for sp, country in itertools.product(spR, cR):
    DFsp=DFavg[(DFavg.Species==sp) & (DFavg.Country==country)]
    RDsp=RD[(RD.Species==sp) & (RD.Country==country)]
    if DFsp.empty or RDsp.empty: continue
    for key,value in Rant[sp].items():
        DFc=DFsp[DFsp.Antibiotic.isin(value)]
        RDc=RDsp[RDsp.Antibiotic==key]
        for y in yL:
            DFy=DFc[DFc.Year==y]
            RDy=RDc[RDc.Year==y]
            if DFy.empty or RDy.empty: continue
            x1=RDy.Value.mean()
            if np.isnan(x1): continue
            y1=DFy['Fraction of resistant cases'].mean()*100
            diff=y1-x1
            N=DFy['Number of cases'].sum()
            L.append([sp,key,y,country,x1,y1,diff,N])
NewDF=pd.DataFrame.from_records(L, columns=['Species','Antibiotic','Year','Country','ECDCValue','ATLASValue','Diff','N'])
NewDF.to_excel('data/atlas_vs_ecdc_moving_breakpoints.xls', float_format='%.3f', index=False)

# DIFFERENCES BETWEEN TEST AND INFORM

Data for Fig 8D

In [None]:
#FIRST COMPARE RESISTANCE FRACTIONS
T=pd.read_csv('data/average_mic_per_year_test.csv')
I=pd.read_csv('data/average_mic_per_year_inform.csv')
Y=range(2012,2018)

L=[]
for index,row in T.iterrows():
    sp=row['Species']
    dr=row['Antibiotic']
    c=row['Country']
    y=row['Year']
    Iy=I[(I.Species==sp) & (I.Antibiotic==dr) & (I.Country==c) & (I.Year==y)]
    if Iy.empty: continue
    rT=row['Fraction of resistant cases']
    rI=Iy['Fraction of resistant cases'].mean()
    nT=row['Number of cases']
    nI=Iy['Number of resistant cases'].mean()
    diff=rT-rI
    L.append([sp,dr,y,c,rT*100,rI*100,diff*100,nT,nI])
NewDF=pd.DataFrame.from_records(L, columns=['Species','Antibiotic','Year','Country','TESTValue','INFORMValue','Diff','NTEST', 'NINFORM'])
NewDF.to_excel('data/test_vs_inform.xls', float_format='%.3f', index=False)

# CLINICAL PA PAIRS WITH LOW TAU

Data for Supp Fig 22

In [None]:
DFM=pd.read_csv('results/Rcluster/resistant_cluster_trends_noBP.csv')
DFM=DFM[DFM.Breakpoint==True]
C=pd.read_csv('results/correlations/tau_test.csv')
TL=[]
for i,sp in enumerate(SPL):
    DFsp=DFM[DFM.Species==sp]
    for dr in drugs:
        DF=DFsp[DFsp.Antibiotic==dr]
        if DF.empty: continue
        Csp=C[(C.Species==sp) & (C.Antibiotic==dr)]
        t=Csp['Tau'].mean()
        TL.append([sp,dr,t])
DFT=pd.DataFrame.from_records(TL, columns=['Species', 'Antibiotic', 'Tau'])
DFT.to_csv('results/correlations/tau_fig4.csv', float_format='%.3f', index=False)