# PREPARE DATA

This notebook generates the files that the notebook *figures_carbapenems.ypynb* needs for obtaining all the plots used for this project.

# TREAT DATASET 

In [1]:
from functions import *
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
#DIRECTORY
path_main = '/Users/carmen/Desktop/carbapenems'
os.chdir(path_main)
#CREATE NEW DIRECTORY
os.mkdir('/Users/carmen/Desktop/carbapenems/results') 
os.mkdir('/Users/carmen/Desktop/carbapenems/results/mic_distributions')
os.mkdir('/Users/carmen/Desktop/carbapenems/results/Rcluster')

In [None]:
#WE READ THE EXCEL FILE
DF=pd.read_excel('data/Carbapenems_not_treated.xlsx')
DF=DF.rename(columns={'In / Out Patient': 'InOutPatient'})
#WRITE DATAFRAME'S KEYS (USEFUL FOR LATER ANALYSES)
keys=['Year', 'Country', 'Species']
mkeys={}
for key in keys:
    dL=DF[key].tolist()
    mkeys[key]={}
    for i,z in enumerate(dL):
        z=str(z)
        if z not in mkeys[key]:
            mkeys[key][z]=0  
        mkeys[key][z]+=1
    with open('data/key_'+key+'.txt','w') as f:
        for z in sorted(mkeys[key], key=lambda x: (str(type(x)), x)):
            f.write(str(z)+'\t'+str(mkeys[key][z])+'\n')

#WE WANT TO LOG2-TRANSFORM THE MIC VALUES IN THE DATASET
#BUT MIC VALUES APPEAR IN A VARIETY OF WAYS THROUGHOUT THE DATASET
#FILE data/key_MIC_values.txt HOLDS ALL MIC VALUES IN THE DATASET
#IN THE FIRST COLUMN. IN THE SECOND WE HAVE MODIFIED THEM, AND IN THE
#THIRD WE HAVE THE LOG2-TRANSFORMED VALUES
MICdict=[]
with open('data/key_MIC_values.txt', 'r') as f:
    for line in f:
        w=line.split()
        k1=w[0]
        k3=float(w[2])
        MICdict.append([k1, str(k3), len(k1)])
MICdict=sorted(MICdict, key=itemgetter(2), reverse=True)

#WE ASSIGN A RANDOM KEY TO EACH MIC VALUE BEFORE SUBSTITUTING
import string
import random
def randomString(stringLength=10):
    """Generate a random string of fixed length """
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(stringLength))
eggs=[randomString() for z in MICdict]

#NOW WE SUBSTITUTE THEM IN THE DATASET
drugs=read_key('Drugs', 'data/')
for dr in drugs:
    DF[dr]=DF[dr].astype('str')
    for x1,y1 in zip(MICdict,eggs):
        DF[dr]=DF[dr].str.replace(x1[0],y1)
    for x1,y1 in zip(eggs,MICdict):
        DF[dr]=DF[dr].str.replace(x1,y1[1])
    DF[dr]=DF[dr].astype('float64')
DF['Species']=DF['Species'].astype('str')
#FINALLY, WE UNIFORMIZE SOME SPECIES
DF['Species']=DF['Species'].str.replace('Enterobacter aerogenes', 'Klebsiella aerogenes', regex=False)
DF['Species']=DF['Species'].str.replace('Klebsiella (Enterobacter) aerogenes', 'Klebsiella aerogenes', regex=False)
DF['Species']=DF['Species'].str.replace('Klebsiella planticola', 'Raoultella planticola', regex=False)
DF.to_excel('data/atlas_treated.xlsx')   
#WRITE NEW SPECIES FILE
mkeys={}
dL=DF['Species'].tolist()
for i,z in enumerate(dL):
    z=str(z)
    if z not in mkeys:
        mkeys[z]=0  
    mkeys[z]+=1
with open('data/key_Species.txt','w') as f:
    for z in sorted(mkeys, key=lambda x: (str(type(x)), x)):
        f.write(str(z)+'\t'+str(mkeys[z])+'\n')

# READ DATA SET

Useful for later cells

In [2]:
DF, pL, bP, drugs=read_dataset('data/')

# DATAPOINTS PER YEAR

In [2]:
DF=pd.read_excel('data/atlas_treated.xlsx')
yL=list(range(2004,2018))
spL=[]
dataL=[]
for year in yL:
    DFy=DF[DF.Year==year]
    k1=get_key(DFy,'Species')
    dataL.append(len(DFy))
    spL.append(len(k1))
DFnew=pd.DataFrame({'Year':yL, 'Species': spL, 'Datapoints': dataL})
DFnew.to_csv('data/datapoints_per_year.csv', index=False)

# CORRELELOGRAMS

In [None]:
#CREATE NEW DIRECTORY
from functions import *
import os
os.mkdir('results/correlations')

#COMPUTE CORRELATIONS BETWEEN YEARS FOR EACH PA
bins=np.arange(-10.5,11.5,1)
z=np.zeros(len(bins))
for sp,dr in itertools.product(pL,drugs):
    DFsp=DF[DF.Species==sp]
    DFc=pd.DataFrame({'Year': DFsp['Year'], dr: DFsp[dr]})
    DFc=DFc.dropna()
    if DFc.empty: continue
    Y=get_key(DFc,'Year')
    H=[z for Y in Y]#H will hold the distribution of MIC values each year
    for i1,y in enumerate(Y):
        DFy=DFc[DFc.Year==y]
        h,b1=np.histogram(DFy[dr], bins=bins, density=True)
        H[i1]=h
    H=np.array(H)
    R=np.corrcoef(H)#correlation between the MIC distributions in different years
    if R.size>1:
        np.savetxt('results/correlations/'+sp+'_'+dr+'.txt',R,fmt='%.3f')
        np.savetxt('results/correlations/'+sp+'_'+dr+'_years.txt',Y, fmt='%d')
#COMPUTE TAU FOR EVERY PA PAIR  
from os import path
Dl=[]
for sp,dr in itertools.product(pL,drugs):
    if path.exists('results/correlations/'+sp+'_'+dr+'.txt'):
        R,Y=read_corr(sp, dr)
        n,m=R.shape
        tau,tauN=tautest(R)
        Dl.append([sp,dr,tau,tauN,n])     
Dl=sorted(Dl, reverse=True, key=itemgetter(2))
DFdist=pd.DataFrame.from_records(Dl, columns=['Species', 'Antibiotic', 'Tau', 'TauN', 'N'])
DFdist.to_csv('results/correlations/tau_test.csv', float_format='%.3f', index=False)

# MIC DISTRIBUTIONS, GAUSSIAN MIXTURE AND R CLUSTER


Datafiles to record the MIC distributions (raw and smooth) of every PA pair for all years (oH and sH), the Gaussian Mixture fit (Z), and the R cluster.

In [1]:
from functions import *
DF, pL, bP, drugs=read_dataset('data/')
yL=list(range(2004,2018))

In [6]:
#GET GAUSSIAN MIXTURE FOR EACH PA PAIR
for sp in pL:
    for dr in drugs:
        if dr not in bP[sp]: continue
        pY,pM,sM=mic_dist(sp,dr,DF,bP[sp][dr][0])
        if not pY: continue
        #GAUSSIAN MIXTURE AND R CLUSTER
        result=False
        while result==False:#sometimes the Gaussian mixture doesn't work
            try:
                extract_rcluster(sp, dr, pY, pM, sM, DF)
                Z,gmm_x=gaussian_mixture(pY, sM)
                result=True
            except:
                pass
        np.savetxt('results/mic_distributions/'+sp+'_'+dr+'.txt',Z,fmt='%.3f')                
        np.savetxt('results/mic_distributions/'+sp+'_'+dr+'_years.txt',pY, fmt='%d')
        #WRITE HISTOGRAMS
        sH=[]
        oH=[]
        for i,y in enumerate(pY):
            # Make regular histogram (SMOOTHMIC)
            B1=np.arange(-10,10.5,0.5)
            n1, bins1=np.histogram(sM[i], bins=B1, density=True)                     
            # Make regular histogram (MIC)
            B2=np.arange(-10,10.5,1)
            n2, bins2=np.histogram(pM[i], bins=B2, density=True)
            sH.append(n1)
            oH.append(n2)
        sH=np.array(sH, dtype=float)
        oH=np.array(oH, dtype=float)
        np.savetxt('results/mic_distributions/'+sp+'_'+dr+'_original.txt',oH,fmt='%.3f')
        np.savetxt('results/mic_distributions/'+sp+'_'+dr+'_smooth.txt',sH,fmt='%.3f')

# R CLUSTERS TRENDS

**GLOBAL TRENDS**

In [2]:
from functions import *
DF, pL, bP, drugs=read_dataset('data/')
from sklearn import mixture
import statsmodels.api as sm

lS=[]
lA=[]
lT=[]
lI=[]
lEr=[]
lRt=[]
lRte=[]
lSt=[]
lSte=[]
lRv=[]
lRve=[]
lRI=[]
lSv=[]
lSve=[]
lSI=[]
lB=[]
lE=[]
lE2=[]
N=50
for sp in pL:
    for dr in drugs:
        print(sp,dr)
        BP=0
        BPbool=False
        if dr in bP[sp]:
            BP=bP[sp][dr][0]
            BPbool=True
        #GLOBAL TREND
        DFsp=DF[DF.Species==sp]
        DFc=pd.DataFrame({'Year': DFsp['Year'], 'MIC': DFsp[dr]})
        DFc=DFc.dropna()
        yL=get_key(DFc, 'Year')
        if len(yL)<2: continue
        x1=DFc['Year'].tolist()
        y1=DFc['MIC'].tolist()
        s1,i1,r1,p1,std1=stats.linregress(x1,y1)
        if p1<0.05:
            T=s1
            Er=std1
            I=i1
        else:
            T=0
            Er=0
            I=DFc['MIC'].mean()
        #LOOP FOR R CLUSTER
        Rtrend=[]
        Rvalue=[]
        Ryear=[]
        R2017=[]
        Strend=[]
        Svalue=[]
        Syear=[]
        S2017=[]
        i=-1
        while i<N:
            i=i+1
            #GET R CLUSTER
            try:#sometimes the Gaussian mixture does not converge, we need to generate a new smooth distribution
                pY,pM,sM=mic_dist(sp,dr,DF,0)
                mDFc, lDFc=extract_rclusternowrite(sp, dr, pY, pM, sM, DF)
            except:
                i=i-1
                continue
            #RCLUSTER TREND
            yLm=get_key(mDFc, 'Year') 
            if len(yLm)>=2:
                x1=mDFc['Year'].tolist()
                y1=mDFc['MIC'].tolist()
                s1,i1,r1,p1,std1=stats.linregress(x1,y1)
                if p1<0.05:
                    Rtrend.append(s1)#Rs=s1
                    mDFy=mDFc[mDFc.Year==2017]
                    if mDFy.empty:
                        R2017.append(s1*2017+i1)
                        E=True
                    else:
                        R2017.append(mDFy['MIC'].mean())
                        E=False
                else:
                    Rtrend.append(0.0)#Rs=0
                    R2017.append(mDFc['MIC'].mean())
                Rvalue.append(np.average(y1))
                Ryear.append(np.average(x1))
            #SCLUSTER TREND
            yLm=get_key(lDFc, 'Year') 
            if len(yLm)>=2:
                x1=lDFc['Year'].tolist()
                y1=lDFc['MIC'].tolist()
                s1,i1,r1,p1,std1=stats.linregress(x1,y1)
                if p1<0.05:
                    Strend.append(s1)#Rs=s1
                    lDFy=lDFc[lDFc.Year==2017]
                    if lDFy.empty:
                        S2017.append(s1*2017+i1)
                        E2=True
                    else:
                        S2017.append(lDFy['MIC'].mean())
                        E2=False
                else:
                    Strend.append(0.0)#Rs=0
                    S2017.append(lDFc['MIC'].mean())
                Svalue.append(np.mean(y1))
                Syear.append(np.mean(x1))
        #LISTS
        lS.append(sp)
        lA.append(dr)
        lT.append(T)
        lEr.append(Er)
        lI.append(I-BP)
        lRt.append(np.mean(Rtrend))
        lRte.append(np.std(Rtrend))
        lRv.append(np.mean(R2017)-BP)
        lRve.append(np.std(R2017))
        lRI.append(np.mean(Rvalue)-BP-np.mean(Rtrend)*np.mean(Ryear))
        lSt.append(np.mean(Strend))
        lSte.append(np.std(Strend))
        lSv.append(np.mean(S2017)-BP)
        lSve.append(np.std(S2017))
        lSI.append(np.mean(Svalue)-BP-np.mean(Strend)*np.mean(Syear))
        lB.append(BPbool)
        lE.append(E)
        lE2.append(E2)
        newDF=pd.DataFrame({'Species': lS, 'Antibiotic': lA, 'Trend':lT,
                    'Trenderror':lEr, 'Intercept': lI,
                    'Rtrend':lRt, 'Rtrenderror': lRte, 
                    'R2017': lRv, 'R2017error': lRve,
                    'RIntercept': lRI,
                    'Strend': lSt, 'Strenderror': lSte,
                    'S2017': lSv, 'S2017error': lSve,
                    'SIntercept': lSI, 'Breakpoint': lB,
                    'EstimatedR': lE, 'EstimatedS': lE2})
        newDF.to_csv('results/Rcluster/resistant_cluster_trends_noBP.csv', index=False, float_format='%.6f')

Escherichia coli Doripenem
Escherichia coli Imipenem
Escherichia coli Meropenem
Klebsiella pneumoniae Doripenem
Klebsiella pneumoniae Imipenem
Klebsiella pneumoniae Meropenem
Pseudomonas aeruginosa Doripenem
Pseudomonas aeruginosa Imipenem
Pseudomonas aeruginosa Meropenem
Enterobacter cloacae Doripenem
Enterobacter cloacae Imipenem
Enterobacter cloacae Meropenem
Streptococcus pneumoniae Doripenem
Streptococcus pneumoniae Imipenem
Streptococcus pneumoniae Meropenem
Haemophilus influenzae Doripenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Haemophilus influenzae Imipenem
Haemophilus influenzae Meropenem
Acinetobacter baumannii Doripenem
Acinetobacter baumannii Imipenem
Acinetobacter baumannii Meropenem
Serratia marcescens Doripenem
Serratia marcescens Imipenem
Serratia marcescens Meropenem
Streptococcus agalactiae Doripenem
Streptococcus agalactiae Imipenem
Streptococcus agalactiae Meropenem
Klebsiella oxytoca Doripenem
Klebsiella oxytoca Imipenem
Klebsiella oxytoca Meropenem
Streptococcus pyogenes Doripenem
Streptococcus pyogenes Imipenem
Streptococcus pyogenes Meropenem
Citrobacter freundii Doripenem
Citrobacter freundii Imipenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Citrobacter freundii Meropenem
Proteus mirabilis Doripenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Proteus mirabilis Imipenem
Proteus mirabilis Meropenem
Citrobacter koseri Doripenem
Citrobacter koseri Imipenem
Citrobacter koseri Meropenem
Bacteroides fragilis Doripenem
Bacteroides fragilis Imipenem
Bacteroides fragilis Meropenem
Morganella morganii Doripenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Morganella morganii Imipenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Morganella morganii Meropenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Acinetobacter pitii Doripenem
Acinetobacter pitii Imipenem
Acinetobacter pitii Meropenem
Proteus vulgaris Doripenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Proteus vulgaris Imipenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Proteus vulgaris Meropenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Enterobacter asburiae Doripenem
Enterobacter asburiae Imipenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Enterobacter asburiae Meropenem
Acinetobacter lwoffii Doripenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Acinetobacter lwoffii Imipenem
Acinetobacter lwoffii Meropenem
Streptococcus dysgalactiae Doripenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Streptococcus dysgalactiae Imipenem
Streptococcus dysgalactiae Meropenem


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)


Clostridioides (Clostridium) difficile Doripenem
Clostridioides (Clostridium) difficile Imipenem
Clostridioides (Clostridium) difficile Meropenem


# R TRENDS: COUNTRIES ANALYSIS

In [None]:
CNT=['United States','Spain','France','Germany','Italy','Belgium', 'Mexico', 'Canada','Argentina','Portugal','Israel']

from sklearn import mixture
import statsmodels.api as sm
for country in CNT:
    print(country)
    lS=[]
    lA=[]
    lT=[]
    lI=[]
    lEr=[]
    lRt=[]
    lRte=[]
    lSt=[]
    lSte=[]
    lRv=[]
    lRve=[]
    lRI=[]
    lSv=[]
    lSve=[]
    lSI=[]
    lB=[]
    #lE=[]
    #lE2=[]
    N=50
    for sp in pL:
        for dr in drugs:
            print(sp,dr)
            BP=0
            BPbool=False
            if dr in bP[sp]:
                BP=bP[sp][dr][0]
                BPbool=True
            #GLOBAL TREND
            DFsp=DF[DF.Species==sp]
            DFc=pd.DataFrame({'Year': DFsp['Year'], 'MIC': DFsp[dr]})
            DFc=DFc.dropna()
            yL=get_key(DFc, 'Year')
            if len(yL)<2: continue
            x1=DFc['Year'].tolist()
            y1=DFc['MIC'].tolist()
            s1,i1,r1,p1,std1=stats.linregress(x1,y1)
            if p1<0.05:
                T=s1
                Er=std1
                I=i1
            else:
                T=0
                Er=0
                I=DFc['MIC'].mean()
            #LOOP FOR R CLUSTER
            Rtrend=[]
            Rvalue=[]
            Ryear=[]
            R2017=[]
            Strend=[]
            Svalue=[]
            Syear=[]
            S2017=[]
            i=-1
            while i<N:
                i=i+1
                #GET R CLUSTER
                try:#sometimes the Gaussian mixture does not converge, we need to generate a new smooth distribution
                    pY,pM,sM=mic_dist(sp,dr,DF,0)
                    mDFc, lDFc=extract_rclusternowrite(sp, dr, pY, pM, sM, DF)
                except:
                    i=i-1
                    continue
                #RCLUSTER TREND
                mDFc=mDFc[mDFc.Country==country]
                lDFc=lDFc[lDFc.Country==country]
                yLm=get_key(mDFc, 'Year') 
                if len(yLm)>=2:
                    x1=mDFc['Year'].tolist()
                    y1=mDFc['MIC'].tolist()
                    s1,i1,r1,p1,std1=stats.linregress(x1,y1)
                    if p1<0.05:
                        Rtrend.append(s1)#Rs=s1
                        mDFy=mDFc[mDFc.Year==2017]
                        if mDFy.empty:
                            R2017.append(s1*2017+i1)
                            E=True
                        else:
                            R2017.append(mDFy['MIC'].mean())
                            E=False
                    else:
                        Rtrend.append(0.0)#Rs=0
                        R2017.append(mDFc['MIC'].mean())
                    Rvalue.append(np.average(y1))
                    Ryear.append(np.average(x1))
                #SCLUSTER TREND
                yLm=get_key(lDFc, 'Year') 
                if len(yLm)>=2:
                    x1=lDFc['Year'].tolist()
                    y1=lDFc['MIC'].tolist()
                    s1,i1,r1,p1,std1=stats.linregress(x1,y1)
                    if p1<0.05:
                        Strend.append(s1)#Rs=s1
                        lDFy=lDFc[lDFc.Year==2017]
                        if lDFy.empty:
                            S2017.append(s1*2017+i1)
                            E2=True
                        else:
                            S2017.append(lDFy['MIC'].mean())
                            E2=False
                    else:
                        Strend.append(0.0)#Rs=0
                        S2017.append(lDFc['MIC'].mean())
                    Svalue.append(np.mean(y1))
                    Syear.append(np.mean(x1))
            #LISTS
            lS.append(sp)
            lA.append(dr)
            lT.append(T)
            lEr.append(Er)
            lI.append(I-BP)
            lRt.append(np.mean(Rtrend))
            lRte.append(np.std(Rtrend))
            lRv.append(np.mean(R2017)-BP)
            lRve.append(np.std(R2017))
            lRI.append(np.mean(Rvalue)-BP-np.mean(Rtrend)*np.mean(Ryear))
            lSt.append(np.mean(Strend))
            lSte.append(np.std(Strend))
            lSv.append(np.mean(S2017)-BP)
            lSve.append(np.std(S2017))
            lSI.append(np.mean(Svalue)-BP-np.mean(Strend)*np.mean(Syear))
            lB.append(BPbool)
            #lE.append(E)
            #lE2.append(E2)
            newDF=pd.DataFrame({'Species': lS, 'Antibiotic': lA, 'Trend':lT,
                        'Trenderror':lEr, 'Intercept': lI,
                        'Rtrend':lRt, 'Rtrenderror': lRte, 
                        'R2017': lRv, 'R2017error': lRve,
                        'RIntercept': lRI,
                        'Strend': lSt, 'Strenderror': lSte,
                        'S2017': lSv, 'S2017error': lSve,
                        'SIntercept': lSI, 'Breakpoint': lB})

    filename = 'resistant_cluster_trends_{}.csv'.format(country)
    newDF.to_csv('results/Rcluster_country/'+filename, float_format='%.3f',index=False)