## Kernel to load: vax_inc_general 

In [1]:
import pandas as pd
import numpy as np
import pycountry
import os

In [2]:
notebook_dir = os.path.dirname(os.getcwd())
source_data_path=os.path.join(notebook_dir, "Common Source Data")

In [3]:
iso3s = {}
for country in pycountry.countries:
    iso3s[country.name] = country.alpha_3
    iso3s['USA']='USA'
    iso3s['UK']='GBR'
    iso3s['Taiwan']='TWN'
    iso3s['South Korea']='KOR'
    iso3s['Czech Republic']='CZE'
    iso3s['Brunei']='BRN'
    iso3s['Russia']='RUS'
    iso3s['Iran']='IRN'
    iso3s['United States of America']='USA'
    iso3s['Venezuela']='VEN'
    iso3s['China (Hong Kong SAR)']='HKG'
    iso3s["Cote d'Ivoire"]='CIV'
    iso3s['DR Congo']='COD'
    iso3s['Guinea Bissau']='GNB'
    iso3s['Lao PDR']='LAO'
    iso3s['Micronesia (Federated States of)']='FSM'
    iso3s['North Korea']='PRK'
    iso3s['Occupied Palestinian Territory']='PSE'
    iso3s['Swaziland']='SWZ'
    iso3s['Tanzania']='TZA'
    iso3s['Bolivia']='BOL'
    iso3s['Macedonia (TFYR)']='MKD'
    iso3s['Moldova']='MDA'
    iso3s['Bolivia (Plurinational State of)']='BOL'
    iso3s['China, Hong Kong SAR']='HKG'
    iso3s['China, Taiwan Province of']='TWN'
    iso3s['China, mainland']='CHN'
    iso3s['Czechoslovakia']='CSK'
    iso3s["Democratic People's Republic of Korea"]='PRK'
    iso3s['Democratic Republic of the Congo']='COD'
    iso3s['French Guyana']='GUF'
    iso3s['Micronesia']='FSM'
    iso3s['Palestine']='PSE'
    iso3s['Polynesia']='PYF'
    iso3s['Republic of Korea']='KOR'
    iso3s['Serbia and Montenegro']='SCG'
    iso3s['Sudan (former)']='SDN'
    iso3s['USSR']='SUN'
    iso3s['Iran (Islamic Republic of)']='IRN'
    iso3s['Republic of Moldova']='MDA'
    iso3s['United Kingdom of Great Britain and Northern Ireland']='GBR'
    iso3s['United Republic of Tanzania']='TZA'
    iso3s['Venezuela (Bolivarian Republic of)']='VEN'
    iso3s['Yugoslav SFR']='YUG'
    iso3s['Ethiopia PDR']='ETH'
    iso3s['Central African (Rep.)']='CAF'
    iso3s["China (People's Rep. of)"]='CHN'
    iso3s['Chinese Taipei']='TWN'
    iso3s['Congo (Dem. Rep. of the)']='COD'
    iso3s['Congo (Rep. of the)']='COG'
    iso3s["Cote D'Ivoire"]='CIV'
    iso3s['Dominican (Rep.)']='DOM'
    iso3s["Korea (Dem People's Rep. of)"]='PRK'
    iso3s['Korea (Rep. of)']='KOR'
    iso3s['Laos']='LAO'
    iso3s['South Sudan (Rep. of)']='SSD'
    iso3s['Syria']='SYR'
    iso3s['St. Vincent and the Grenadines']='VCT'
    iso3s['Vietnam']='VNM'
    iso3s['Reunion']='REU'
    iso3s['Guadaloupe']='GLP'
    iso3s['China, Macao SAR']='MAC'
    iso3s['Netherlands (Kingdom of the)']='NLD'
    iso3s['Türkiye (Rep. of)']='TUR'
    iso3s['Belgium-Luxembourg']='BLX'
    iso3s['Faeroe Islands']='FRO'
    iso3s['St. Lucia']='LCA'
    iso3s['Falkland Islands']='FLK'
    iso3s['Cabo Verde']='CPV'
    iso3s['St. Helena']='SHN'
    iso3s['Cabo verde']='CPV'


In [4]:
cattle_measures=pd.read_csv(os.path.join(source_data_path,'cattle','AllRegions_Cattle_Vaccination_OfficialOrProhibited.csv'))
cattle_measures=cattle_measures[cattle_measures['Control measure'].isin(['Official vaccination','Vaccination prohibited'])]
cattle_measures.drop(columns=['Region','Animal Category'],inplace=True)
cattle_measures.rename(columns={'Species':'Animal'},inplace=True)
cattle_measures['Disease']=[i if 'Newcastle' not in i else 'Newcastle disease (velogenic)' for i in cattle_measures['Disease']]
cattle_measures['Animal']='Cattle'
cattle_measures['Country']=['Cabo Verde' if i=='Cabo verde' else i for i in cattle_measures['Country']]
cattle_measures=cattle_measures[(cattle_measures['Country']!='Ceuta')&(cattle_measures['Country']!='Melilla')]
cattle_measures['ISO3']=[iso3s[country] for country in cattle_measures['Country']]
cattle_measures.drop(columns=['Country'],inplace=True)
cattle_measures.rename(columns={'Control measure':'Official Vaccination Measure'},inplace=True)
cattle_measures['Year Official Vaccination Measure']=cattle_measures['Year']
cattle_measures['Semester']=[i.split('-')[0] for i in cattle_measures['Semester']]
cattle_measures['Semester'] = pd.Categorical(cattle_measures['Semester'], categories=['Jan', 'Jul'], ordered=True)
cattle_measures = cattle_measures.sort_values(by=['Year', 'Disease', 'ISO3', 'Semester'], ascending=[True, True, True, False])
cattle_measures = cattle_measures.drop_duplicates(subset=['Year', 'Disease', 'ISO3'], keep='first')
cattle_measures.drop(columns=['Semester'],inplace=True)


new_data = []

#Getting latest available data up to the year (takes care of instances were interpolations could not be made, 
    #but past data is available)
for year in range(2005, 2025):
    current_year_data = cattle_measures[cattle_measures['Year'] == year]

    previous_years_data = cattle_measures[cattle_measures['Year'] <= year]

    for _, group in previous_years_data.groupby(['ISO3', 'Disease', 'Animal']):
        #Find the latest available data (row) up to and including the current year
        latest_row = group[group['Year'] == group['Year'].max()].iloc[0]
        
        # If data for the current year is available, keep it
        if not current_year_data.empty and ((current_year_data['ISO3'] == latest_row['ISO3']) & 
                                            (current_year_data['Disease'] == latest_row['Disease']) & 
                                            (current_year_data['Animal'] == latest_row['Animal'])).any():
            current_row = current_year_data[(current_year_data['ISO3'] == latest_row['ISO3']) & 
                                            (current_year_data['Disease'] == latest_row['Disease']) & 
                                            (current_year_data['Animal'] == latest_row['Animal'])]
            new_data.append(current_row.iloc[0].to_dict())
        else:
            new_row = latest_row.copy()
            new_row['Year'] = year
            new_data.append(new_row.to_dict())

cattle_official_vacc_status_df= pd.DataFrame(new_data)

In [5]:
swine_measures=pd.read_csv(os.path.join(source_data_path,'swine','AllRegions_Swine_Vaccination_OfficialOrProhibited.csv'))
swine_measures=swine_measures[swine_measures['Control measure'].isin(['Official vaccination','Vaccination prohibited'])]
swine_measures.drop(columns=['Region','Animal Category'],inplace=True)
swine_measures.rename(columns={'Species':'Animal'},inplace=True)
swine_measures['Disease']=[i if 'Newcastle' not in i else 'Newcastle disease (velogenic)' for i in swine_measures['Disease']]
swine_measures['Animal']='Swine'
swine_measures['Country']=['Cabo Verde' if i=='Cabo verde' else i for i in swine_measures['Country']]
swine_measures=swine_measures[(swine_measures['Country']!='Ceuta')&(swine_measures['Country']!='Melilla')]
swine_measures['ISO3']=[iso3s[country] for country in swine_measures['Country']]
swine_measures.drop(columns=['Country'],inplace=True)
swine_measures.rename(columns={'Control measure':'Official Vaccination Measure'},inplace=True)
swine_measures['Year Official Vaccination Measure']=swine_measures['Year']
swine_measures['Semester']=[i.split('-')[0] for i in swine_measures['Semester']]
swine_measures['Semester'] = pd.Categorical(swine_measures['Semester'], categories=['Jan', 'Jul'], ordered=True)
swine_measures = swine_measures.sort_values(by=['Year', 'Disease', 'ISO3', 'Semester'], ascending=[True, True, True, False])
swine_measures = swine_measures.drop_duplicates(subset=['Year', 'Disease', 'ISO3'], keep='first')
swine_measures.drop(columns=['Semester'],inplace=True)


new_data = []

#Getting latest available data up to the year (takes care of instances were interpolations could not be made, 
    #but past data is available)
for year in range(2005, 2025):
    current_year_data = swine_measures[swine_measures['Year'] == year]

    previous_years_data = swine_measures[swine_measures['Year'] <= year]

    for _, group in previous_years_data.groupby(['ISO3', 'Disease', 'Animal']):
        #Find the latest available data (row) up to and including the current year
        latest_row = group[group['Year'] == group['Year'].max()].iloc[0]

        # If data for the current year is available, keep it
        if not current_year_data.empty and ((current_year_data['ISO3'] == latest_row['ISO3']) & 
                                            (current_year_data['Disease'] == latest_row['Disease']) & 
                                            (current_year_data['Animal'] == latest_row['Animal'])).any():
            current_row = current_year_data[(current_year_data['ISO3'] == latest_row['ISO3']) & 
                                            (current_year_data['Disease'] == latest_row['Disease']) & 
                                            (current_year_data['Animal'] == latest_row['Animal'])]
            new_data.append(current_row.iloc[0].to_dict())
        else:
            new_row = latest_row.copy()
            new_row['Year'] = year
            new_data.append(new_row.to_dict())

swine_official_vacc_status_df= pd.DataFrame(new_data)

In [6]:
poultry_measures=pd.read_csv(os.path.join(source_data_path,'poultry','AllRegions_Poultry_Vaccination_OfficialOrProhibited.csv'))
poultry_measures=poultry_measures[poultry_measures['Control measure'].isin(['Official vaccination','Vaccination prohibited'])]
poultry_measures.drop(columns=['Region','Animal Category'],inplace=True)
poultry_measures.rename(columns={'Species':'Animal'},inplace=True)
poultry_measures['Disease']=[i if 'Newcastle' not in i else 'Newcastle disease (velogenic)' for i in poultry_measures['Disease']]
poultry_measures['Animal']='Poultry'
poultry_measures=poultry_measures[(poultry_measures['Country']!='Ceuta')&(poultry_measures['Country']!='Melilla')]
poultry_measures['ISO3']=[iso3s[country] for country in poultry_measures['Country']]
poultry_measures.drop(columns=['Country'],inplace=True)
poultry_measures.rename(columns={'Control measure':'Official Vaccination Measure'},inplace=True)
poultry_measures['Year Official Vaccination Measure']=poultry_measures['Year']
poultry_measures['Semester']=[i.split('-')[0] for i in poultry_measures['Semester']]
poultry_measures['Semester'] = pd.Categorical(poultry_measures['Semester'], categories=['Jan', 'Jul'], ordered=True)
poultry_measures = poultry_measures.sort_values(by=['Year', 'Disease', 'ISO3', 'Semester'], ascending=[True, True, True, False])
poultry_measures = poultry_measures.drop_duplicates(subset=['Year', 'Disease', 'ISO3'], keep='first')
poultry_measures.drop(columns=['Semester'],inplace=True)


new_data = []

#Getting latest available data up to the year (takes care of instances were interpolations could not be made, 
    #but past data is available)
for year in range(2005, 2025):
    current_year_data = poultry_measures[poultry_measures['Year'] == year]

    previous_years_data = poultry_measures[poultry_measures['Year'] <= year]

    for _, group in previous_years_data.groupby(['ISO3', 'Disease', 'Animal']):
        #Find the latest available data (row) up to and including the current year
        latest_row = group[group['Year'] == group['Year'].max()].iloc[0]

        # If data for the current year is available, keep it
        if not current_year_data.empty and ((current_year_data['ISO3'] == latest_row['ISO3']) & 
                                            (current_year_data['Disease'] == latest_row['Disease']) & 
                                            (current_year_data['Animal'] == latest_row['Animal'])).any():
            current_row = current_year_data[(current_year_data['ISO3'] == latest_row['ISO3']) & 
                                            (current_year_data['Disease'] == latest_row['Disease']) & 
                                            (current_year_data['Animal'] == latest_row['Animal'])]
            new_data.append(current_row.iloc[0].to_dict())
        else:
            new_row = latest_row.copy()
            new_row['Year'] = year
            new_data.append(new_row.to_dict())

poultry_official_vacc_status_df= pd.DataFrame(new_data)

In [7]:
all_official_vacc_status=pd.concat([cattle_official_vacc_status_df,swine_official_vacc_status_df,poultry_official_vacc_status_df])

In [8]:
imputed_df=pd.read_csv("FINAL_ALL_vaccine_cov_imputations_to_add.csv")
imputed_df.drop(columns=['Vaccination Coverage Lower (Residual Only)','Vaccination Coverage Upper (Residual Only)',
                        'Vaccination Coverage Lower (Bootstrap Only)','Vaccination Coverage Upper (Bootstrap Only)'],inplace=True)
imputed_df.rename(columns={'Vaccination Coverage Lower (Asymmetric)':'Vaccination Coverage Lower',
                           'Vaccination Coverage Upper (Asymmetric)':'Vaccination Coverage Upper'},inplace=True)

poultry_original=pd.read_csv("2005-2024_full_poultry_vaccine_coverage_by_country.csv")
poultry_original['Animal']='Poultry'

cattle_original=pd.read_csv("2005-2024_full_cattle_vaccine_coverage_by_country.csv")
cattle_original['Animal']='Cattle'

swine_original=pd.read_csv("2005-2024_full_swine_vaccine_coverage_by_country.csv")
swine_original['Animal']='Swine'


def duplicate_and_modify(df, disease, new_disease):
    clone_df = df[(df['Disease'] == disease) & (df['ISO3'] != 'USA')].copy()
    clone_df['Disease'] = new_disease
    return pd.concat([df, clone_df], ignore_index=True)


#We are doing below because each country vaccinates against Newcastle disease, USA is the only one we have data for (from an outside source, not WAHIS) that we know doesn't vaccinate against velogenic
    #All other data is either from WAHIS (all velogenic data for Newcastle), or surveys based on velogenic Newcastle
poultry_original = duplicate_and_modify(poultry_original, "Newcastle disease", "Newcastle disease (velogenic)")
cattle_original = duplicate_and_modify(cattle_original, "Newcastle disease", "Newcastle disease (velogenic)")
swine_original = duplicate_and_modify(swine_original, "Newcastle disease", "Newcastle disease (velogenic)")


orginal_dfs=pd.concat([poultry_original, cattle_original, swine_original])

orginal_dfs['ISO3']=[iso3s[country] for country in orginal_dfs['Country']]

orginal_dfs = orginal_dfs.rename(columns={col: col.replace('Vaccine', 'Vaccination') for col in orginal_dfs.columns})
imputed_df= imputed_df.rename(columns={col: col.replace('Vaccine', 'Vaccination') for col in imputed_df.columns})
compiled=pd.concat([orginal_dfs,imputed_df])

compiled['Vaccination Coverage']=compiled['Vaccination Coverage']*100
compiled['Vaccination Coverage Lower']=compiled['Vaccination Coverage Lower']*100
compiled['Vaccination Coverage Upper']=compiled['Vaccination Coverage Upper']*100
compiled.drop(columns=['Region','Animal category','Species','Vaccination type','Number of vaccinated',
                      'time','TOTAL Population','TOTAL Slaughtered Population'],inplace=True)
compiled.rename(columns={'Vaccination Coverage':'Vaccination Coverage (%)'},inplace=True)

In [9]:
countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3
    countries['Serbia and Montenegro']='SCG'

flipped_dict = {v: k for k, v in countries.items()}
flipped_dict['TUR']='Türkiye, Republic of'
flipped_dict['XKX']='Kosovo'

In [10]:
all_official_vacc_status['Country']=[flipped_dict[iso3] for iso3 in all_official_vacc_status['ISO3']]

In [11]:
unknown_iso3s=[]
for iso3 in np.unique(compiled['ISO3']):
    try: 
        flipped_dict[iso3]
    except:
        unknown_iso3s+=[iso3]

In [12]:
compiled = compiled[~compiled['ISO3'].isin(unknown_iso3s)]

In [13]:
compiled['Country']=[flipped_dict[iso3] for iso3 in compiled['ISO3']]
compiled = compiled[~((compiled['Country'] == 'Serbia and Montenegro') & (compiled['Year'] > 2006))]

In [14]:
compiled[(compiled['Country']=='Finland')&(compiled['Disease']=='Anthrax')]

Unnamed: 0,ISO3,Country,Year,Disease,Year Range,Vaccination Coverage (%),Source,Vaccination Coverage Lower,Vaccination Coverage Upper,Animal
8952,FIN,Finland,2008.0,Anthrax,2008.0,0.0,WAHIS administrative division reports,0.0,9.739376,Cattle
8956,FIN,Finland,2009.0,Anthrax,2008.0,0.0,WAHIS administrative division reports,0.0,9.739376,Cattle
8960,FIN,Finland,2010.0,Anthrax,2008.0,0.0,WAHIS administrative division reports,0.0,9.739376,Cattle
8964,FIN,Finland,2011.0,Anthrax,2008.0,0.0,WAHIS administrative division reports,0.0,9.739376,Cattle
8968,FIN,Finland,2012.0,Anthrax,2008.0,0.0,WAHIS administrative division reports,0.0,9.739376,Cattle
8972,FIN,Finland,2013.0,Anthrax,2008.0,0.0,WAHIS administrative division reports,0.0,9.739376,Cattle
8976,FIN,Finland,2014.0,Anthrax,2008.0,0.0,WAHIS administrative division reports,0.0,9.739376,Cattle
8980,FIN,Finland,2015.0,Anthrax,2008.0,0.0,WAHIS administrative division reports,0.0,9.739376,Cattle
8984,FIN,Finland,2016.0,Anthrax,2008.0,0.0,WAHIS administrative division reports,0.0,9.739376,Cattle
8988,FIN,Finland,2017.0,Anthrax,2008.0,0.0,WAHIS administrative division reports,0.0,9.739376,Cattle


In [15]:
full_vaccination_final=pd.merge(compiled,all_official_vacc_status,how='outer')
full_vaccination_final.rename(columns={'Year Range':'Year Data'},inplace=True)
full_vaccination_final['Animal']=['Pigs' if i=='Swine' else i for i in full_vaccination_final['Animal']]

In [16]:
full_vaccination_final=full_vaccination_final.loc[:,['Country','ISO3','Animal','Year','Disease','Vaccination Coverage (%)', 'Source', 'Vaccination Coverage Lower', 'Vaccination Coverage Upper', 'Year Data', 'Official Vaccination Measure', 'Year Official Vaccination Measure']]


In [17]:
# Update rows where 'Official Vaccination Measure' is 'Vaccination prohibited'
full_vaccination_final.loc[full_vaccination_final['Official Vaccination Measure'] == 'Vaccination prohibited', 
                           ['Vaccination Coverage (%)', 'Vaccination Coverage Lower', 'Vaccination Coverage Upper', 'Source']] = [0, 0, 0, 'WAHIS - officially reported vaccination measure']


In [18]:
full_vaccination_final.rename(columns={'Vaccination Coverage Lower':'Vaccination Coverage (%) Lower',
                                      'Vaccination Coverage Upper':'Vaccination Coverage (%) Upper'},inplace=True)

In [19]:
full_vaccination_final["Vaccination Coverage (%)"] = full_vaccination_final["Vaccination Coverage (%)"].clip(upper=100)
full_vaccination_final["Vaccination Coverage (%) Upper"] = full_vaccination_final["Vaccination Coverage (%) Upper"].clip(upper=100)


In [20]:
full_vaccination_final.sort_values(['Country','Animal','Year','Disease']).to_csv('Supplementary Spreadsheet- Vaccination Coverage Estimates.csv',index=False,
                                                                                    encoding='utf-8-sig'  # Use 'utf-8-sig' to handle special characters correctly
)