## Kernel to load: vax_inc_incidence 

In [1]:
import pandas as pd
import numpy as np
import random
from functools import reduce
import os 

# OPTIONALLY (if not already added to system path),
    # may need to uncomment and change the below line to point to the installed R location on your local
    # computer (directory that contains the bin folder)
# os.environ['R_HOME'] = r'C:/Program Files/R/R-4.1.3/'

import rpy2.robjects as robjects
from scipy.stats import binomtest
import time
import pycountry

In [2]:
notebook_dir = os.path.dirname(os.getcwd())
source_data_path=os.path.join(notebook_dir, "Common Source Data")

In [3]:
df_start= pd.read_csv(os.path.join(source_data_path, "swine/AllRegions_swine.csv"))



#Removing 2024 July-December As this semester is not yet complete to represent true disease incidence
df_start=df_start[df_start['Semester']!='Jul-Dec 2024']
#A few times when country-wide report, they report NaN for administrative division
df_start['Administrative Division'] = df_start['Administrative Division'].fillna(df_start['Country'])
df_start

Unnamed: 0,Year,Semester,World region,Country,Administrative Division,Disease,Serotype/Subtype/Genotype,Animal Category,Species,Event_id,Outbreak_id,New outbreaks,Susceptible,Measuring units,Cases,Killed and disposed of,Slaughtered,Deaths,Vaccinated
0,2005,Jul-Dec 2005,Africa,Angola,Huambo,Taenia solium (Inf. with) (Porcine cysticercosis),,Domestic,Swine,-,-,-,321,Animal,10,3,321,0,0
1,2005,Jul-Dec 2005,Africa,Angola,Huila,African swine fever virus (Inf. with),,Domestic,Swine,-,-,-,150,Animal,58,0,0,58,0
2,2005,Jul-Dec 2005,Africa,Benin,Abomey-Calavi,African swine fever virus (Inf. with),,Domestic,Swine,-,-,-,17360,Animal,467,0,0,332,-
3,2005,Jul-Dec 2005,Africa,Benin,Abomey-Calavi,Taenia solium (Inf. with) (Porcine cysticercosis),,Domestic,Swine,-,-,-,-,Animal,1,-,1,-,-
4,2005,Jul-Dec 2005,Africa,Benin,Akpro-Misserete,African swine fever virus (Inf. with),,Domestic,Swine,-,-,-,3000,Animal,25,0,0,20,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18217,2024,Jan-Jun 2024,Europe,Ukraine,Poltavs'kyi,African swine fever virus (Inf. with),-,Domestic,Swine,5707,135245,1,16,Animal,16,15,0,1,0
18218,2024,Jan-Jun 2024,Europe,Ukraine,Slovians'kyi,African swine fever virus (Inf. with),-,Domestic,Swine,5652,134146,1,25,Animal,6,19,0,6,0
18219,2024,Jan-Jun 2024,Europe,Ukraine,Tarashchans'kyi,African swine fever virus (Inf. with),-,Domestic,Swine,5678,135573,1,620,Animal,15,605,0,15,0
18220,2024,Jan-Jun 2024,Europe,Ukraine,Volodymyr-Volyns'kyi,African swine fever virus (Inf. with),-,Domestic,Swine,5606,135378,1,19,Animal,1,18,0,1,0


In [4]:
# Replace '-' with appropriate values and convert to float
columns_to_process = ['Vaccinated', 'Susceptible', 'Killed and disposed of', 
                      'Deaths', 'New outbreaks', 'Slaughtered', 'Cases']

columns_to_process = ['Vaccinated', 'Susceptible', 'Killed and disposed of', 'Deaths', 'New outbreaks', 'Slaughtered','Cases']
for col in columns_to_process:
    if col=='Cases':
        df_start[col] = df_start[col].replace('-', np.nan).astype(float)    
    else:
        df_start[col] = df_start[col].replace('-', 0).astype(float)

# Step 1: Combine Removal Categories
df_start['Total_Removed'] = df_start['Deaths'] + df_start['Killed and disposed of'] + df_start['Slaughtered']

# Step 2: Initialize Adjusted Susceptible and Adjustment Markers
df_start['Adjusted_Susceptible'] = df_start['Susceptible']
df_start['Adjustment_STEP'] = ''  # To record which steps were applied



#THIS IS HOW I'M PROCESSING IT, MIGHT BE OVERKILL

# Step 3: Apply Sequential Adjustments
# Step 1: Add Vaccinated if INITIAL Susceptible < Vaccinated
mask_vaccinated = df_start['Susceptible'] < df_start['Vaccinated']
df_start.loc[mask_vaccinated, 'Adjusted_Susceptible'] += df_start['Vaccinated']
df_start.loc[mask_vaccinated, 'Adjustment_STEP'] += 'Step1_AddVaccinated;'

# Step 2: Add Total Removed if INITIAL Susceptible < Removed
mask_removed = df_start['Susceptible'] < df_start['Total_Removed']
df_start.loc[mask_removed, 'Adjusted_Susceptible'] += df_start['Total_Removed']
df_start.loc[mask_removed, 'Adjustment_STEP'] += 'Step2_AddRemoved;'

# Step 3: If Vaccinated was NOT added but Adjusted Susceptible < Cases and adding Vaccinated alone resolves the discrepancy, add Vaccinated
mask_vaccinated_not_added = (~mask_vaccinated) & \
                            (df_start['Adjusted_Susceptible'] < df_start['Cases']) & \
                            ((df_start['Adjusted_Susceptible'] + df_start['Vaccinated']) >= df_start['Cases'])

df_start.loc[mask_vaccinated_not_added, 'Adjusted_Susceptible'] += df_start.loc[mask_vaccinated_not_added, 'Vaccinated']
df_start.loc[mask_vaccinated_not_added, 'Adjustment_STEP'] += 'Step3_AddVaccinated;'


# Step 4: If Removed was NOT added but Adjusted_Susceptible < Cases and adding Removed alone resolves the discrepancy, add Removed
mask_removed_not_added = (~mask_removed) & \
(df_start['Adjusted_Susceptible'] < df_start['Cases']) &\
(df_start['Adjusted_Susceptible'] + df_start['Total_Removed'] >= df_start['Cases'])
df_start.loc[mask_removed_not_added, 'Adjusted_Susceptible'] += df_start['Total_Removed']
df_start.loc[mask_removed_not_added, 'Adjustment_STEP'] += 'Step5_AddRemoved;'

# Step 5: If Vaccinated and Removed was NOT added but Adjusted_Susceptible < Cases and adding Removed and Vaccinated together resolves the discrepancy
#add Removed and Vaccinted
mask_removed_not_added = (~mask_removed) &\
(df_start['Adjusted_Susceptible'] < df_start['Cases']) &\
(df_start['Adjusted_Susceptible'] +df_start['Total_Removed'] + df_start['Vaccinated'] >= df_start['Cases'])
df_start.loc[mask_removed_not_added, 'Adjusted_Susceptible'] += df_start['Total_Removed'] + df_start['Vaccinated']
df_start.loc[mask_removed_not_added, 'Adjustment_STEP'] += 'Step5_AddRemoved;'

# Step 6: If less than cases after the above steps, just add cases to Adjusted_Susceptible
mask_final_cases =(df_start['Adjusted_Susceptible'] < df_start['Cases'])
df_start.loc[mask_final_cases, 'Adjusted_Susceptible'] += df_start['Cases']
df_start.loc[mask_final_cases, 'Adjustment_STEP'] += 'Step6_AddCases;'

# Final Output: Adjusted DataFrame
df_start['Final_Adjustment'] = np.where(
    df_start['Adjusted_Susceptible'] != df_start['Susceptible'], 1, 0
)


# Handle invalid Susceptible Population values; we will not use population size, we will just compute national incidence estimates without
    #adding using record as part of confidence interval calculation
df_start.loc[
    (df_start['Adjusted_Susceptible'] < df_start['Cases']) | (df_start['Adjusted_Susceptible'] == 0),
    'Adjusted_Susceptible'
] = np.nan


df_start=df_start[df_start['Cases'].notna()]

unique_countries = df_start['Country'].unique()
print(unique_countries)
print(len(unique_countries))


['Angola' 'Benin' 'Burundi' 'Cameroon' 'Congo (Dem. Rep. of the)' 'Egypt'
 'Ghana' 'Madagascar' 'Malawi' 'Mozambique' 'Namibia' 'Nigeria'
 'South Africa' 'Togo' 'Uganda' 'Zimbabwe' 'Argentina' 'Bolivia' 'Brazil'
 'Colombia' 'Cuba' 'Dominican (Rep.)' 'El Salvador' 'Greenland'
 'Guatemala' 'Jamaica' 'Mexico' 'Nicaragua' 'Peru'
 'United States of America' 'Uruguay' 'Venezuela' 'Armenia' 'Bhutan'
 'Cambodia' "China (People's Rep. of)" 'Chinese Taipei' 'Georgia'
 'Hong Kong' 'India' 'Japan' 'Korea (Rep. of)' 'Malaysia' 'Mongolia'
 'Myanmar' 'Nepal' 'Philippines' 'Sri Lanka' 'Thailand' 'Vietnam'
 'Belarus' 'Bosnia and Herzegovina' 'Bulgaria' 'Croatia' 'Estonia'
 'France' 'Hungary' 'Italy' 'Latvia' 'Lithuania' 'Poland' 'Portugal'
 'Romania' 'Russia' 'Serbia and Montenegro' 'Slovakia' 'Slovenia' 'Spain'
 'Switzerland' 'Ukraine' 'United Kingdom' 'New Caledonia' 'Guinea-Bissau'
 'Senegal' 'Tanzania' 'Ecuador' 'Panama'
 'Saint Vincent and the Grenadines' 'Finland' 'Kenya' 'Lesotho' 'Rwanda'
 'Zam

### Extrapolate total cases by reporting cases divided by adjusted pop then multiplying by population of the livestock in the region.



In [5]:
dict_dates = dict({
                  'Jan-Jun 2005':'2005-06-30','Jul-Dec 2005':'2005-12-31',
                  'Jan-Jun 2006':'2006-06-30','Jul-Dec 2006':'2006-12-31',
                  'Jan-Jun 2007':'2007-06-30','Jul-Dec 2007':'2007-12-31',
                  'Jan-Jun 2008':'2008-06-30','Jul-Dec 2008':'2008-12-31',
                  'Jan-Jun 2009':'2009-06-30','Jul-Dec 2009':'2009-12-31',
                  'Jan-Jun 2010':'2010-06-30','Jul-Dec 2010':'2010-12-31',
                  'Jan-Jun 2011':'2011-06-30','Jul-Dec 2011':'2011-12-31',
                  'Jan-Jun 2012':'2012-06-30','Jul-Dec 2012':'2012-12-31',
                  'Jan-Jun 2013':'2013-06-30','Jul-Dec 2013':'2013-12-31',
                  'Jan-Jun 2014':'2014-06-30','Jul-Dec 2014':'2014-12-31',
                  'Jan-Jun 2015':'2015-06-30','Jul-Dec 2015':'2015-12-31',
                  'Jan-Jun 2016':'2016-06-30','Jul-Dec 2016':'2016-12-31',
                  'Jan-Jun 2017':'2017-06-30','Jul-Dec 2017':'2017-12-31',
                  'Jan-Jun 2018':'2018-06-30','Jul-Dec 2018':'2018-12-31',
                  'Jan-Jun 2019':'2019-06-30','Jul-Dec 2019':'2019-12-31',
                  'Jan-Jun 2020':'2020-06-30','Jul-Dec 2020':'2020-12-31',
                  'Jan-Jun 2021':'2021-06-30','Jul-Dec 2021':'2021-12-31',
                  'Jan-Jun 2022':'2022-06-30','Jul-Dec 2022':'2022-12-31',
                  'Jan-Jun 2023':'2023-06-30','Jul-Dec 2023':'2023-12-31',
                  'Jan-Jun 2024':'2024-06-30'

    
                                                                        })

df_start=df_start.copy()

df_start['time']=[dict_dates[i] for i in df_start['Semester']]
df_start['Semester']=['June' if '06-30' in time else 'December' for time in df_start['time'].values]
df_start['time'] = pd.to_datetime(df_start['time'])

In [6]:
start_countries=df_start['Country']

countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3
    countries['USA']='USA'
    countries['UK']='GBR'
    countries['Taiwan']='TWN'
    countries['South Korea']='KOR'
    countries['Czech Republic']='CZE'
    countries['Brunei']='BRN'
    countries['Russia']='RUS'
    countries['Iran']='IRN'
    countries['United States of America']='USA'
    countries['Venezuela']='VEN'
    countries['China (Hong Kong SAR)']='HKG'
    countries["Cote d'Ivoire"]='CIV'
    countries['DR Congo']='COD'
    countries['Guinea Bissau']='GNB'
    countries['Lao PDR']='LAO'
    countries['Micronesia (Federated States of)']='FSM'
    countries['North Korea']='PRK'
    countries['Occupied Palestinian Territory']='PSE'
    countries['Swaziland']='SWZ'
    countries['Tanzania']='TZA'
    countries['Bolivia']='BOL'
    countries['Macedonia (TFYR)']='MKD'
    countries['Moldova']='MDA'
    countries['Bolivia (Plurinational State of)']='BOL'
    countries['China, Hong Kong SAR']='HKG'
    countries['China, Taiwan Province of']='TWN'
    countries['China, mainland']='CHN'
    countries['Czechoslovakia']='CSK'
    countries["Democratic People's Republic of Korea"]='PRK'
    countries['Democratic Republic of the Congo']='COD'
    countries['French Guyana']='GUF'
    countries['Micronesia']='FSM'
    countries['Palestine']='PSE'
    countries['Polynesia']='PYF'
    countries['Republic of Korea']='KOR'
    countries['Serbia and Montenegro']='SCG'
    countries['Sudan (former)']='SDN'
    countries['Türkiye']='TUR'
    countries['USSR']='SUN'
    countries['Iran (Islamic Republic of)']='IRN'
    countries['Republic of Moldova']='MDA'
    countries['United Kingdom of Great Britain and Northern Ireland']='GBR'
    countries['United Republic of Tanzania']='TZA'
    countries['Venezuela (Bolivarian Republic of)']='VEN'
    countries['Yugoslav SFR']='YUG'
    countries['Ethiopia PDR']='ETH'
    countries['Central African (Rep.)']='CAF'
    countries["China (People's Rep. of)"]='CHN'
    countries['Chinese Taipei']='TWN'
    countries['Congo (Dem. Rep. of the)']='COD'
    countries['Congo (Rep. of the)']='COG'
    countries["Cote D'Ivoire"]='CIV'
    countries['Dominican (Rep.)']='DOM'
    countries["Korea (Dem People's Rep. of)"]='PRK'
    countries['Korea (Rep. of)']='KOR'
    countries['Laos']='LAO'
    countries['South Sudan (Rep. of)']='SSD'
    countries['Syria']='SYR'
    countries['St. Vincent and the Grenadines']='VCT'
    countries['Vietnam']='VNM'
    countries['Reunion']='REU'
    countries['Guadaloupe']='GLP'
    countries['China, Macao SAR']='MAC'
    countries['Netherlands (Kingdom of the)']='NLD'
    countries['Türkiye (Rep. of)']='TUR'
    countries['Belgium-Luxembourg']='BLX'
    countries['St. Helena']='SHN'
    countries['Cabo verde']='CPB'
    countries['Faeroe Islands']='FRO'
    countries['Wallis and Futuna Islands']='WLF'


codes_start=[countries.get(country, 'Unknown code:'+country) for country in start_countries]

for code in codes_start:
    if "Unknown" in code:
        print("FIX THIS:",code)


iso3s_start=[]

for i in start_countries:
    try:
        iso3s_start+=[countries[i]]
    except:
        iso3s_start+=[None]

df_start['ISO3']=iso3s_start

In [7]:
df_start['Estimated Cases']=df_start['Cases']

df_start = (
    df_start.groupby(['Country', 'Administrative Division', 'Disease', 'Semester','Year'], as_index=False)
    .agg({
        'Country': 'first',
        'ISO3':'first',
        'Administrative Division': 'first',
        'Disease': 'first',
        'Semester': 'first',
        'Year': 'first',
        'Adjusted_Susceptible': lambda x: x.dropna().sum() if not x.dropna().empty else np.nan,
        'Cases': lambda x: x.dropna().sum(),
        'Estimated Cases': lambda x: x.dropna().sum(),
        'World region': 'first', 
        'Serotype/Subtype/Genotype':lambda x: 'aggregated',
        'Event_id':lambda x: 'aggregated',
        'Species':'first',
        'time':'first',
        'Outbreak_id':lambda x: 'aggregated',
        'New outbreaks':lambda x: x.dropna().sum()
    })
)

df_start['Incidence']=df_start['Estimated Cases']/df_start['Adjusted_Susceptible']

df_start['original_year']=df_start['Year']

In [8]:
# Step 1: Add missing years for each unique combination of Semester, ISO3, Administrative Division, and Disease
def add_missing_years(df):
    group_columns = ["Semester", "ISO3", "Administrative Division", "Disease"]

    # Get all unique combinations of grouping columns and their year ranges
    unique_combinations = df[group_columns].drop_duplicates()
    unique_combinations['Min_Year'] = df.groupby(group_columns)['Year'].transform('min')
    unique_combinations['Max_Year'] = df.groupby(group_columns)['Year'].transform('max')

    # Create a dataframe with all possible year combinations
    all_years = []
    for _, row in unique_combinations.iterrows():
        years = pd.DataFrame({'Year': range(row['Min_Year'], row['Max_Year'] + 1)})
        for col in group_columns:
            years[col] = row[col]
        all_years.append(years)

    all_years_df = pd.concat(all_years, ignore_index=True)

    # Merge with the original dataframe to add missing years
    expanded_df = pd.merge(all_years_df, df, on=group_columns + ['Year'], how='left')

    # Set specified columns to None for new rows
    none_columns = ["Estimated Cases", 'Adjusted_Susceptible', 'Incidence', 'New outbreaks']
    for col in none_columns:
        expanded_df[col] = expanded_df[col].where(expanded_df[col].notna(), None)

    # Exclude `none_columns` from forward-filling
    ffill_columns = expanded_df.columns.difference(none_columns + ['Year'])

    # Fill other columns forward
    expanded_df[ffill_columns] = expanded_df.sort_values(by=group_columns + ['Year'])[ffill_columns].ffill()

    # Update the 'time' column based on Year and Semester
    def update_time(row):
        if row['Semester'] == 'December':
            return f"{row['Year']}-12-31"
        elif row['Semester'] == 'June':
            return f"{row['Year']}-06-30"
        else:
            return None  # Handle unexpected semester values if necessary

    expanded_df['time'] = expanded_df.apply(update_time, axis=1)

    # Convert 'time' to datetime64 type
    expanded_df['time'] = pd.to_datetime(expanded_df['time'], format='%Y-%m-%d', errors='coerce')

    # Initialize the Derived_Cases_Method column
    expanded_df['Derived_Cases_Method'] = "None"

    return expanded_df


    expanded_df['time'] = expanded_df.apply(update_time, axis=1)

    # Convert 'time' to datetime64 type
    expanded_df['time'] = pd.to_datetime(expanded_df['time'], format='%Y-%m-%d', errors='coerce')

    # Initialize the Derived_Cases_Method column
    expanded_df['Derived_Cases_Method'] = "None"

    return expanded_df


    #Linear imputing incidence for samples is very risky due to sample size changes. 1 sick out 1 interpolated to 1 sick out millions, will predict MANY cases
#     return df.groupby(group_columns, group_keys=False).apply(interpolate_group)
def interpolate_adjusted_and_cases(df):
    group_columns = ["Semester", "ISO3", "Administrative Division", "Disease"]

    def interpolate_group(group):
        group = group.sort_values('Year').reset_index(drop=True)

        # Interpolate Adjusted_Susceptible and Incidence
        interpolated_susceptible = group['Adjusted_Susceptible'].fillna(value=np.nan).interpolate(method='linear')
        interpolated_cases = group['Estimated Cases'].fillna(value=np.nan).interpolate(method='linear')

        # Update Derived_Cases_Method for interpolated rows
        group.loc[interpolated_susceptible.notna() & group['Adjusted_Susceptible'].isna(), 'Derived_Cases_Method'] = "Adjusted_Susceptible_Cases"
        group.loc[interpolated_cases.notna() & group['Estimated Cases'].isna(), 'Derived_Cases_Method'] = "Adjusted_Susceptible_Cases"

        # Update Adjusted_Susceptible and Incidence
        group['Adjusted_Susceptible'] = interpolated_susceptible
        group['Estimated Cases'] = interpolated_cases

        return group

    return df.groupby(group_columns, group_keys=False).apply(interpolate_group)
    
def interpolate_remaining_cases(df):
    group_columns = ["Semester", "ISO3", "Administrative Division", "Disease"]

    def interpolate_cases(group):
        group = group.sort_values('Year').reset_index(drop=True)
        interpolated_cases = group['Estimated Cases'].fillna(value=np.nan).interpolate(method='linear')

        # Update Derived_Cases_Method for interpolated rows
        imputed_rows = interpolated_cases.notna() & group['Estimated Cases'].isna()
        group.loc[imputed_rows, 'Derived_Cases_Method'] = "Cases_Only"

        # Update Estimated Cases with interpolated values
        group['Estimated Cases'] = interpolated_cases

        return group

    return df.groupby(group_columns, group_keys=False).apply(interpolate_cases)

def update_interpolated_upper_year(df):
    group_columns = ["Semester", "ISO3", "Administrative Division", "Disease"]

    def assign_upper_year(group):
        group = group.sort_values('Year').reset_index(drop=True)

        for idx in group[group['Derived_Cases_Method'].notna()].index:
            if group.loc[idx, 'Derived_Cases_Method'] in ["Adjusted_Susceptible_Cases", "Cases_Only"]:
                # Find the next original (non-interpolated) row by year
                upper_idx = group[(group.index > idx) & (group['Derived_Cases_Method']=='None')].index.min()
                lower_idx = group[(group.index < idx) & (group['Derived_Cases_Method']=='None')].index.max()

                
                if pd.notna(upper_idx):  # Ensure a valid upper index exists
                    group.loc[idx, 'interpolated_upper_year'] = int(group.loc[upper_idx, 'Year'])
                else:
                    group.loc[idx, 'interpolated_upper_year'] = None  # Handle edge cases

                if pd.notna(lower_idx):  # Ensure a valid upper index exists
                    group.loc[idx, 'interpolated_lower_year'] = int(group.loc[lower_idx, 'Year'])
                else:
                    group.loc[idx, 'interpolated_lower_year'] = None  # Handle edge cases
        return group

    return df.groupby(group_columns, group_keys=False).apply(assign_upper_year)


# Main procedure
def process_dataframe(df):
    df = add_missing_years(df)  # Add missing years
    df = interpolate_adjusted_and_cases(df)  # Interpolate Adjusted_Susceptible and Incidence, update Cases
    df = interpolate_remaining_cases(df)  # Interpolate remaining Cases
    df = update_interpolated_upper_year(df)  # Update interpolated_upper_year
    return df

# Example usage
df_result = process_dataframe(df_start)



In [9]:
final_dfs=[]
#Now doing 2005 to 2024 - getting all data possible
for year in range(2005,2025): #Making data for all years (2010-2024)... Doing 2010 as earliest, since that is the earliest we have gross economic value data
    df2=df_result.copy()
    df2=df2[df2['Year']<=year]

    #Country,disease pairs for which incidence can be calculated (have population estimates, or notification sample sizes to generate CIs)
    work=[]
    #Country,disease pairs for which incidence cannot be calculated (no population estimates, or notification sample sizes to generate CIs)
    not_work=[]

    
    #df2=df2.sort_values('time').drop_duplicates(['Country','Administrative Division','Disease','Semester'], keep='last')
    #Below we aggregate if multiple reports for same disease/administrative division/semester/year, for latest available year
    df2 = df2[df2['time'] == df2.groupby(['Country', 'Administrative Division', 'Disease', 'Semester'])['time'].transform('max')]
    cols_keep=['Country','ISO3','Administrative Division','Disease','Semester','Year','original_year','Adjusted_Susceptible','Cases',
               'World region','Serotype/Subtype/Genotype','Event_id','Species','time','Outbreak_id','New outbreaks','Estimated Cases',
               'Derived_Cases_Method','interpolated_upper_year','interpolated_lower_year']
    df2=df2.loc[:,cols_keep]

    df2=df2.sort_values('time')

    df2.loc[
    (df2['Adjusted_Susceptible'] < df2['Estimated Cases']) | (df2['Adjusted_Susceptible'] == 0),
    'Adjusted_Susceptible'
    ] = np.nan

            
    incidence_scale_up=[]
    pop_for_CIs=[]
    total_cases=[]
    
    for row in df2.iterrows():
        #if NaN
        if (row[1]['Adjusted_Susceptible']!=row[1]['Adjusted_Susceptible']):  
            #incidence_scale_up+=[None]
            incidence_scale_up+=[row[1]['Estimated Cases']] #In this case, we will simply scale up cases, predict incidence, but not form CIs, as we can't
                                                        #Storing cases instead of incidence, thus we skip multiplying by 'sample size' in next step
                                                            #where this is done for this type of instance
            pop_for_CIs+=[np.nan]
            #print(row[1]['Country'], row[1]['Adjusted_Susceptible'],row[1]['Cases'])
        else:
            incidence_scale_up+=[(row[1]['Estimated Cases'])/(row[1]['Adjusted_Susceptible'])]
            
            pop_for_CIs+=[row[1]['Adjusted_Susceptible']]
        
        if row[1]['Estimated Cases']==row[1]['Estimated Cases']:
            total_cases+=[row[1]['Cases']]
            
                      
    df2['Incidence to form initial CIs']=incidence_scale_up
    df2['Pop for initial CIs']=pop_for_CIs
    df2['Latest Reported Cases Aggregate']=total_cases
    
    df2=df2.reset_index()
    
    
    incidencer_lower=[]
    incidencer_upper=[]
    
    for row in df2.iterrows():
        try:
            
            if row[1]['Estimated Cases']>0:
            
                string='''
                    n <- val1
                    d <- val2
                    fit <- glm(n ~ offset(log(d)), family=poisson)
                    
                    exp(confint(fit))
                    '''
                string=string.replace('n <- val1','n <- '+str(int(round(row[1]['Incidence to form initial CIs']*row[1]['Pop for initial CIs']))))
                string=string.replace('d <- val2','d <- '+str(row[1]['Pop for initial CIs']))
                
                
                l,u=robjects.r(string)
                
            else:
                l,u=binomtest(int(round(row[1]['Incidence to form initial CIs']*row[1]['Pop for initial CIs'])),int(round(row[1]['Pop for initial CIs']))).proportion_ci()
    
                
                
                
                
            
        except:
            #print(row)
            l,u=np.nan,np.nan
        incidencer_lower+=[l]
        incidencer_upper+=[u]
    
    df2['Incidence Initial Lower']=incidencer_lower
    df2['Incidence Initial Upper']=incidencer_upper
    
    #df2=df2[df2['Incidence Initial Lower'].notna()]
    
    
    
    pop_swine_df = pd.read_csv(os.path.join(source_data_path, 'swine/swine_pop_2024.csv')).loc[:,['Area','Unit','Value','Year','Item','ISO3']]
    pop_swine_df = pop_swine_df.sort_values('Value').drop_duplicates(subset=['ISO3','Year','Item'], keep='last')
    pop_swine_df.rename(columns={'Value':'TOTAL Population'},inplace=True)
    
    pop_swine_df = (
        pop_swine_df.groupby(['Area', 'Year','ISO3'], as_index=False)
        .agg({
            'ISO3':'first',
            'Area': 'first',
            'Year': 'first',
            'Unit': 'first',
            'Item': 'first',
            'TOTAL Population': 'sum',  # Aggregate the 'Value' by summing
        })
    )
    pop_swine_df.drop(columns=['Item'],inplace=True)
    
    
    #pop_swine_df_2024 denotes the year, not just 2024
    pop_swine_df_2024 = pd.read_csv(os.path.join(source_data_path, 'swine/swine_pop_2024.csv')).loc[:,['Area','Unit','Value','Year','Item','ISO3']]
    pop_swine_df_2024=pop_swine_df_2024[pop_swine_df_2024['Year']==year]
    pop_swine_df_2024 = pop_swine_df_2024.sort_values('Value').drop_duplicates(subset=['ISO3','Year','Item'], keep='last')
    pop_swine_df_2024 = (
        pop_swine_df_2024.groupby(['Area', 'Year','ISO3'], as_index=False)
        .agg({
            'ISO3':'first',
            'Area': 'first',
            'Year': 'first',
            'Unit': 'first',
            'Item': 'first',
            'Value': 'sum',  # Aggregate the 'Value' by summing
        })
    )
    pop_swine_df_2024.drop(columns=['Item'],inplace=True)
    
    pop_swine_df_2024=pop_swine_df_2024.sort_values('Value', ascending=False).drop_duplicates(['Area','Year'],keep='first')
    pop_swine_df_2024.reset_index().drop(columns=['index']) 
    pop_swine_df_2024.rename(columns={'Value':'TOTAL Population '+str(year)},inplace=True)


    
    killed_pop_swine_df = pd.read_csv(os.path.join(source_data_path, 'swine/killed_swine_pop_2024.csv')).loc[:,['Area','Unit','Value','Year','Item','ISO3']]
    killed_pop_swine_df = killed_pop_swine_df.sort_values('Value').drop_duplicates(subset=['ISO3','Year','Item'], keep='last')
    killed_pop_swine_df.rename(columns={'Value':'TOTAL Slaughtered Population'},inplace=True)
    killed_pop_swine_df = (
        killed_pop_swine_df.groupby(['Area', 'Year','ISO3'], as_index=False)
        .agg({
            'ISO3':'first',
            'Area': 'first',
            'Year': 'first',
            'Unit': 'first',
            'Item': 'first',
            'TOTAL Slaughtered Population': 'sum',  # Aggregate the 'Value' by summing
        })
    )
    killed_pop_swine_df.drop(columns=['Item'],inplace=True)

    killed_pop_swine_df_2024 = pd.read_csv(os.path.join(source_data_path, 'swine/killed_swine_pop_2024.csv')).loc[:,['Area','Unit','Value','Year','Item','ISO3']]
    killed_pop_swine_df_2024=killed_pop_swine_df_2024[killed_pop_swine_df_2024['Year']==year]
    killed_pop_swine_df_2024 = killed_pop_swine_df_2024.sort_values('Value').drop_duplicates(subset=['ISO3','Year','Item'], keep='last')
    killed_pop_swine_df_2024 = (
        killed_pop_swine_df_2024.groupby(['Area', 'Year','ISO3'], as_index=False)
        .agg({
            'ISO3':'first',
            'Area': 'first',
            'Year': 'first',
            'Unit': 'first',
            'Item': 'first',
            'Value': 'sum',  # Aggregate the 'Value' by summing
        })
    )
    killed_pop_swine_df_2024.rename(columns={'Value':'TOTAL Slaughtered Population '+str(year)},inplace=True)

    killed_pop_swine_df_2024.drop(columns=['Item'],inplace=True)


    pop_swine_df = pop_swine_df.sort_values('TOTAL Population').drop_duplicates(subset=['ISO3','Year'], keep='last')
    killed_pop_swine_df = killed_pop_swine_df.sort_values('TOTAL Slaughtered Population').drop_duplicates(subset=['ISO3','Year'], keep='last')

    
    pop_swine_df_2024 = pop_swine_df_2024.sort_values('TOTAL Population '+str(year)).drop_duplicates(subset=['ISO3','Year'], keep='last')
    killed_pop_swine_df_2024 = killed_pop_swine_df_2024.sort_values('TOTAL Slaughtered Population '+str(year)).drop_duplicates(subset=['ISO3','Year'], keep='last')

    
    df2_merged=reduce(lambda  left,right: pd.merge(left,right,on=['ISO3','Year'],
                                                how='left', suffixes=('_left', '_right')), [df2,
                                                             pop_swine_df,killed_pop_swine_df])
    
    # Initialize year_range dictionary
    year_range = {}
    
    #MOVED THIS CODE, MAKE SURE IT STILL WORKS (MOVED FROM START, AND CHANGE SO IT'S BASED ON df2_merged instead of df2)
    # Populate year_range with valid min and max years
    # Rename columns to replace spaces and special characters
    df2_merged_holder=df2_merged.copy()
    df2_merged_holder.columns = df2_merged_holder.columns.str.replace(' ', '_').str.replace(r'[^\w]', '_', regex=True)
    
    # Deduplicate column names
    df2_merged_holder.columns = [
        f"{col}_{i}" if col in df2_merged_holder.columns[:i] else col
        for i, col in enumerate(df2_merged_holder.columns)
    ]
    
    # Now iterate using itertuples
    for row in df2_merged_holder.itertuples(index=False):
        key = (row.Country, row.Disease)
        if key not in year_range:
            year_range[key] = [9999, 0]  # Default range
    
        if row.TOTAL_Population + row.TOTAL_Slaughtered_Population == row.TOTAL_Population + row.TOTAL_Slaughtered_Population:
            if not pd.isna(row.interpolated_upper_year):
                year_range[key][1] = max(year_range[key][1], int(row.interpolated_upper_year))
            if not pd.isna(row.interpolated_lower_year):
                year_range[key][0] = min(year_range[key][0], int(row.interpolated_lower_year))
        if not pd.isna(row.original_year):
            year_range[key][0] = min(year_range[key][0], int(row.original_year))
            year_range[key][1] = max(year_range[key][1], int(row.original_year))

    
    # Format year_range for consistent handling
    for key in year_range:
        start, end = year_range[key]
        if start == 9999 and end == 0:  # No valid years
            year_range[key] = [9999, 0]
        elif start == end:  # Single year
            year_range[key] = [start,0]
        else:  # Valid range
            year_range[key] = [start, end]

    pop_swine_df_2024.drop(columns=['Year'],inplace=True)
    killed_pop_swine_df_2024.drop(columns=['Year'],inplace=True)

    df2_merged=reduce(lambda  left,right: pd.merge(left,right,on=['ISO3'],
                                                how='left', suffixes=('_l', '_r')), [df2_merged,
                                                             pop_swine_df_2024,
                                                                killed_pop_swine_df_2024])
                                                            
    total_slaughtered_for_scaling=[]
    
    for row in df2_merged.iterrows():
        try:
            if row[1]['TOTAL Slaughtered Population']==row[1]['TOTAL Slaughtered Population']:
                total_slaughtered_for_scaling+=[row[1]['TOTAL Slaughtered Population']]
            else:
                before_pop=pop_df[(pop_df['ISO3']==row[1]['ISO3'])&(pop_df['Year']==row[1]['Year'])].iloc[0]['TOTAL Population']
                after_pop=pop_df[(pop_df['ISO3']==row[1]['ISO3'])&(pop_df['Year'])].iloc[0]['TOTAL Population '+str(year)]
                
                #Estimating what the slaughtered pop was if the slaughtered pop data is missing
                total_slaughtered_for_scaling+=[(before_pop/after_pop)*killed_pop_df[(killed_pop_df['ISO3']==row[1]['ISO3'])].iloc[0]['TOTAL Slaughtered Population '+str(year)]]
                
        except:
            #print(row)
            total_slaughtered_for_scaling+=[np.nan]
                
    
    df2_merged['TOTAL Slaughtered Population']=total_slaughtered_for_scaling
    
    nsims=100
    
    
    cases_scaled_lower=[]
    cases_scaled_upper=[]
    cases_scaled=[]
    for row in df2_merged.iterrows():
        l = (row[1]['Incidence Initial Lower']*row[1]['Pop for initial CIs']/
         (row[1]['TOTAL Population']+row[1]['TOTAL Slaughtered Population']))*(row[1]['TOTAL Population ' +str(year)]+row[1]['TOTAL Slaughtered Population ' +str(year)])
        u = (row[1]['Incidence Initial Upper']*row[1]['Pop for initial CIs']/
         (row[1]['TOTAL Population']+row[1]['TOTAL Slaughtered Population']))*(row[1]['TOTAL Population ' +str(year)]+row[1]['TOTAL Slaughtered Population ' +str(year)])

        #If there was a sample size in notification report
        if row[1]['Pop for initial CIs']==row[1]['Pop for initial CIs']:
            val= (row[1]['Incidence to form initial CIs']*row[1]['Pop for initial CIs']/
             (row[1]['TOTAL Population']+row[1]['TOTAL Slaughtered Population']))*(row[1]['TOTAL Population ' +str(year)]+row[1]['TOTAL Slaughtered Population ' +str(year)])
        #if there was no sample size in notification report, we will compute scaled up cases, and indicence, but not generate CIs
            #We stored cases in 'Incidence to form initial CIs' instead of incidence in this case, so we don't multiply by pop size
        else:
            val= (row[1]['Incidence to form initial CIs']/
             (row[1]['TOTAL Population']+row[1]['TOTAL Slaughtered Population']))*(row[1]['TOTAL Population ' +str(year)]+row[1]['TOTAL Slaughtered Population ' +str(year)])

        #try:
            #l,u,val=percentile_confidence_interval(cases_scaled_to_2024)
        #except:
            #if population size was 0, incidence was NaN




        if l!=l:
            #print(row)
            l,u=np.nan,np.nan

            #if incidence could be computed, keep it, will just not generate a CI. setting lower and upper bound to val for now
                #so if compted incidence are added to other incidence estimates with CIs, we will increase the lower and upper bounds
            if val==val:
                l=val
                u=val
                work+=[(row[1]['ISO3'],row[1]['Disease'],row[1]['Semester'])]
            else:
                val=np.nan 

                not_work+=[(row[1]['ISO3'],row[1]['Disease'],row[1]['Semester'])]
        else:
            work+=[(row[1]['ISO3'],row[1]['Disease'],row[1]['Semester'])]

            #df2_merged.loc[row[0], ['Incidence', 'Incidence Lower', 'Incidence Upper', 
            #                   'Cases', 'Cases Lower', 'Cases Upper']] = np.nan

        cases_scaled_lower+=[l]
        cases_scaled_upper+=[u]
        cases_scaled+=[val]

    work=set(work)
    not_work=set(not_work)

    actionable_not_work_double = not_work.difference(work) #if incidence cannot be generated because 0 usable data

    work_disese_country = {(t[0], t[1]) for t in work}
    not_work_disease_country = {(t[0], t[1]) for t in not_work}

    actionable_not_work_remove=not_work_disease_country.difference(work_disese_country)
        
    df2_merged['Cases ' +str(year)]=cases_scaled
    df2_merged['Cases ' +str(year)+' Lower']=cases_scaled_lower
    df2_merged['Cases ' +str(year)+' Upper']=cases_scaled_upper
    
    df2_merged['Incidence']=df2_merged['Cases ' +str(year)]/(df2_merged['TOTAL Population ' +str(year)]+df2_merged['TOTAL Slaughtered Population ' +str(year)])
    df2_merged['Incidence Lower']=df2_merged['Cases ' +str(year)+' Lower']/(df2_merged['TOTAL Population ' +str(year)]+df2_merged['TOTAL Slaughtered Population ' +str(year)])
    df2_merged['Incidence Upper']=df2_merged['Cases ' +str(year)+' Upper']/(df2_merged['TOTAL Population ' +str(year)]+df2_merged['TOTAL Slaughtered Population ' +str(year)])    
    df2_merged['records_contributing']=[1]*len(df2_merged)

    #Sum reports across administrative divisions
    df2_merged_isolate=df2_merged.loc[:,['Year','Semester','Country','ISO3','Administrative Division','Disease','records_contributing','TOTAL Population '+str(year),'TOTAL Slaughtered Population '+str(year),'Latest Reported Cases Aggregate','Cases '+str(year), 'Cases '+str(year)+' Lower', 'Cases '+str(year)+' Upper', 'Incidence',
       'Incidence Lower', 'Incidence Upper','Derived_Cases_Method']]
    
    df2_merged_isolate = df2_merged_isolate.groupby(
        ['Country', 'ISO3', 'Disease', 'Semester'],
        as_index=False
    ).agg({
        'Year': 'sum',  # Adjust as needed for non-numeric fields
        'records_contributing': 'sum',
        'TOTAL Population ' + str(year): 'sum',
        'TOTAL Slaughtered Population ' + str(year): 'sum',
        'Latest Reported Cases Aggregate': 'sum',
        'Cases ' + str(year): 'sum',
        'Cases ' + str(year) + ' Lower': 'sum',
        'Cases ' + str(year) + ' Upper': 'sum',
        'Incidence': 'sum',
        'Incidence Lower': 'sum',
        'Incidence Upper': 'sum',
        'Derived_Cases_Method': lambda x: 'None' if all(v == 'None' for v in x) else x[x != 'None'].iloc[0]
    })
    
    df2_merged_isolate['semesters_contributing']=[1]*len(df2_merged_isolate)
    
    
    # Sum reports across semesters to get country-level data
    d2_country_level = df2_merged_isolate.groupby(
        ['Country', 'ISO3', 'Disease'],
        as_index=False
    ).agg({
        'Year': 'sum',
        'records_contributing': 'sum',
        'TOTAL Population ' + str(year): 'sum',
        'TOTAL Slaughtered Population ' + str(year): 'sum',
        'Latest Reported Cases Aggregate': 'sum',
        'Cases ' + str(year): 'sum',
        'Cases ' + str(year) + ' Lower': 'sum',
        'Cases ' + str(year) + ' Upper': 'sum',
        'Incidence': 'sum',
        'Incidence Lower': 'sum',
        'Incidence Upper': 'sum',
        'semesters_contributing': 'sum',
        'Derived_Cases_Method': lambda x: 'None' if all(v == 'None' for v in x) else x[x != 'None'].iloc[0]
    })

    print(d2_country_level['semesters_contributing'])

    d2_country_level['Incidence']=d2_country_level['Incidence']*(2/(d2_country_level['semesters_contributing'])) #Multiply by two if only 1 semester)
    d2_country_level['Incidence Lower']=d2_country_level['Incidence Lower']*(2/(d2_country_level['semesters_contributing'])) #Multiply by two if only 1 semester)
    d2_country_level['Incidence Upper']=d2_country_level['Incidence Upper']*(2/(d2_country_level['semesters_contributing'])) #Multiply by two if only 1 semester)
    

    #Account for very rare cases if  may not have data to generate incidence in one semester of country, disease (but have case aggregate); but have incidence data for second semester:
    # Initialize a flag to track whether any matches are found
    matches_found = False
    
    # Iterate over each tuple in actionable_not_work
    for iso3, disease,_ in actionable_not_work_double:
        # Identify the rows where "ISO3" and "Disease" match the current tuple
        mask = (d2_country_level['ISO3'] == iso3) & (d2_country_level['Disease'] == disease)
        
        if mask.any():  # Check if any rows match the condition
            matches_found = True
            print(f"Match found for ISO3={iso3} and Disease={disease}")
            
            # Multiply the specified columns by 2 for the matching rows
            d2_country_level.loc[mask, ['Incidence', 'Incidence Lower', 'Incidence Upper']] *= 2
    
    # Final message if no matches were found
    if not matches_found:
        print("No matches found in actionable_not_work_double.")

    #Set vaues to NaN if impossible to calculate incidence (are naturally incorrectly estimated at 0 due to pandas groupsum)
        #if incidence could be computed but no CI, we need to remove the CI (CI width is set to 0 in such cases to make groupsum previously feasible - i.e. add to lower bounds of other estimates)

    for iso3, disease in actionable_not_work_remove:
        # Create a mask to find rows where "ISO3" and "Disease" match the current tuple
        mask = (d2_country_level['ISO3'] == iso3) & (d2_country_level['Disease'] == disease)
        
        # Check if there are any matches
        if mask.any():
            
            # Iterate over the matching rows
            for idx in d2_country_level[mask].index:
                if d2_country_level.loc[idx, 'Incidence'] > 0:
                    print(f"Setting incidence bounds of rows with ISO3={iso3} and Disease={disease} to NaN due to unknown sample size")

                    # If "Incidence" > 0, only set "Incidence Lower" and "Incidence Upper" to NaN
                    d2_country_level.loc[idx, ['Incidence Lower', 'Incidence Upper']] = np.nan
                else:
                    print(f"Setting incidence of rows with ISO3={iso3} and Disease={disease} to NaN due to unknown population size")

                    # Otherwise, set all three columns to NaN
                    d2_country_level.loc[idx, ['Incidence', 'Incidence Lower', 'Incidence Upper']] = np.nan

    
    country_disease_for_pooling_year_range=dict()
    for row in df2_merged_isolate.iterrows():
        if year_range[(row[1]['Country'],row[1]['Disease'])]!=[9999,0]:
            if year_range[(row[1]['Country'],row[1]['Disease'])][0]==9999:
                country_disease_for_pooling_year_range[(row[1]['Country'],row[1]['Disease'])]=str(year_range[(row[1]['Country'],row[1]['Disease'])][1])
            elif year_range[(row[1]['Country'],row[1]['Disease'])][1]==0:
                country_disease_for_pooling_year_range[(row[1]['Country'],row[1]['Disease'])]=str(year_range[(row[1]['Country'],row[1]['Disease'])][0])
            else:
                country_disease_for_pooling_year_range[(row[1]['Country'],row[1]['Disease'])]=str(year_range[(row[1]['Country'],row[1]['Disease'])][0])+'-'+str(year_range[(row[1]['Country'],row[1]['Disease'])][1])
    
    year_range_list=[]
    for row in d2_country_level.iterrows():
        year_range_list+=[country_disease_for_pooling_year_range[row[1]['Country'],row[1]['Disease']]]
    year_range_list
        
    d2_country_level['Year Range Incidence Estimate']=year_range_list
    
    d2_country_level.drop(columns=['Year','records_contributing','semesters_contributing'],inplace=True)
    d2_country_level['Year']=[year]*d2_country_level.shape[0]
    
    d2_country_level=d2_country_level.sort_values(['ISO3','Year','Disease'])
    d2_country_level.columns=[i.replace(' '+str(year),'') for i in d2_country_level.columns]
    d2_country_level=d2_country_level.drop(columns=['TOTAL Slaughtered Population','TOTAL Population'])
    
    d2_country_level=d2_country_level.iloc[:,[0,1,2,12,3,4,5,6,7,8,9,10,11]]
   
    #If no confidence intervals could be generated, leave them empty
    d2_country_level.loc[
        (d2_country_level['Incidence'] == d2_country_level['Incidence Lower']) & 
        (d2_country_level['Incidence'] == d2_country_level['Incidence Upper']),
        ['Incidence Lower', 'Incidence Upper']
    ] = np.nan    
    
    final_dfs+=[d2_country_level]
    
    print("Finished analysis for:",str(year))

0      2
1      2
2      1
3      2
4      2
      ..
193    1
194    2
195    2
196    2
197    1
Name: semesters_contributing, Length: 198, dtype: int64
Match found for ISO3=GRL and Disease=Trichinella spp. (Inf. with)
Match found for ISO3=GRL and Disease=Trichinella spp. (Inf. with)
Match found for ISO3=AGO and Disease=African swine fever virus (Inf. with)
Match found for ISO3=AGO and Disease=African swine fever virus (Inf. with)
Match found for ISO3=AGO and Disease=Taenia solium (Inf. with) (Porcine cysticercosis)
Match found for ISO3=AGO and Disease=Taenia solium (Inf. with) (Porcine cysticercosis)
Setting incidence of rows with ISO3=GRL and Disease=Trichinella spp. (Inf. with) to NaN due to unknown population size
Setting incidence of rows with ISO3=AGO and Disease=African swine fever virus (Inf. with) to NaN due to unknown population size
Setting incidence of rows with ISO3=AGO and Disease=Taenia solium (Inf. with) (Porcine cysticercosis) to NaN due to unknown population size
Fi

In [10]:
years_data=pd.concat(final_dfs)
years_data['Disease']=[i if i!= 'Newcastle disease virus (Inf. with)' else 'Newcastle disease (velogenic)' for i in years_data['Disease']]
years_data.sort_values(['ISO3','Year','Disease'])
years_data.to_csv('2005-2024_swine_disease_incidence_stats.csv',index=False)
years_data

Unnamed: 0,Country,ISO3,Disease,Year,Latest Reported Cases Aggregate,Cases,Cases Lower,Cases Upper,Incidence,Incidence Lower,Incidence Upper,Derived_Cases_Method,Year Range Incidence Estimate
0,Angola,AGO,African swine fever virus (Inf. with),2005,198.0,0.000000,0.000000,0.000000,,,,,2005
1,Angola,AGO,Taenia solium (Inf. with) (Porcine cysticercosis),2005,11.0,0.000000,0.000000,0.000000,,,,,2005
2,Argentina,ARG,Anthrax,2005,0.0,0.000000,0.000000,3.556087,0.000000,0.000000,0.000002,,2005
3,Argentina,ARG,Aujeszky's disease virus (Inf. with),2005,232.0,232.000000,188.754600,285.453007,0.000054,0.000044,0.000066,,2005
4,Argentina,ARG,Taenia solium (Inf. with) (Porcine cysticercosis),2005,386.0,386.000000,343.352771,433.756914,0.000090,0.000080,0.000101,,2005
...,...,...,...,...,...,...,...,...,...,...,...,...,...
673,Zambia,ZMB,Trypanosomosis (tsetse-transmitted) (-2021),2024,8.0,10.164515,4.644987,18.918512,0.000011,0.000005,0.000021,,2011
674,Zimbabwe,ZWE,African swine fever virus (Inf. with),2024,595.0,713.138089,635.160845,797.248845,0.001579,0.001407,0.001766,,2019
675,Zimbabwe,ZWE,Anthrax,2024,3.0,2.981855,0.741474,7.729409,0.000013,0.000003,0.000034,,2020
676,Zimbabwe,ZWE,Old world screwworm (Chrysomya bezziana),2024,14.0,14.012261,6.149358,28.915545,0.000031,0.000014,0.000064,,2015-2019
