# Summary statistics
This notebook contains the calculations that produced several overall statistics quoted in the manuscript.  For the code that generated the processed datasets imported here, see the Python scripts in the `src` folder.

In [1]:
import pandas as pd

## Global

In [2]:
towns = pd.read_csv('../data/processed/towns_with_exposure.csv', index_col=0)
# Total population that live within 5 km of a CONAGUA site
towns.population.sum()

66127985

In [3]:
# Total population that live within 5 km of a CONAGUA site, as a fraction
# of the total population of Mexico in 2010 (according to Google)
towns.population.sum() / 117000000

0.565196452991453

## Arsenic

In [4]:
samples = pd.read_csv('../data/processed/samples.csv', index_col=0)

In [5]:
# Number of sites with arsenic contamination
samples[samples.arsenic > 10].shape[0]

639

In [6]:
# Contaminated sites by type
samples[samples.arsenic > 10].type.value_counts()

Well                   325
River                  170
Lake                    54
Dam                     47
Discharge               11
Drainage                 9
Stream                   9
Municipal Discharge      4
Estuary                  2
Canal                    2
Lagoon                   2
Swamp                    2
Temporary Dam            1
Spring                   1
Name: type, dtype: int64

In [7]:
# Number of groundwater sites
total = samples[samples.arsenic > 10]\
        .type.value_counts().sum()

sites = samples[samples.arsenic > 10]\
        .type.value_counts()\
        [['Well','Spring']].sum()

print(f'Site Category: Groundwater')
print(f'Total sites: {sites}')
print(f'Percent of total: {sites/total*100:0.0f}%')

Site Category: Groundwater
Total sites: 326
Percent of total: 51%


In [8]:
# Number of river sites
total = samples[samples.arsenic > 10]\
        .type.value_counts().sum()

sites = samples[samples.arsenic > 10]\
        .type.value_counts()\
        [['River','Stream']].sum()

print(f'Site Category: Groundwater')
print(f'Total sites: {sites}')
print(f'Percent of total: {sites/total*100:0.0f}%')

Site Category: Groundwater
Total sites: 179
Percent of total: 28%


In [9]:
# Number of discharge sites
total = samples[samples.arsenic > 10]\
        .type.value_counts().sum()

sites = samples[samples.arsenic > 10]\
        .type.value_counts()\
        [['Discharge','Municipal Discharge']].sum()

print(f'Site Category: Discharges')
print(f'Total sites: {sites}')
print(f'Percent of total: {sites/total*100:0.0f}%')

Site Category: Discharges
Total sites: 15
Percent of total: 2%


In [10]:
# Number of other surface water sites
total = samples[samples.arsenic > 10]\
        .type.value_counts().sum()

sites = samples[samples.arsenic > 10]\
        .type.value_counts()\
        [['Lake', 'Dam', 'Drainage',
       'Swamp', 'Lagoon', 'Canal', 'Estuary',
       'Temporary Dam']].sum()

print(f'Site Category: Other surface water sites')
print(f'Total sites: {sites}')
print(f'Percent of total: {sites/total*100:0.0f}%')

Site Category: Other surface water sites
Total sites: 119
Percent of total: 19%


In [11]:
# Number of towns with arsenic exposures above 10 µg/L
towns[towns.arsenic > 10].shape[0]

7263

In [12]:
# Number of people with arsenic exposures above 10 µg/L
towns[towns.arsenic > 10].population.sum()

8807501

In [13]:
# Table 1.
# Population exposed to arsenic above 10 µg/L, and the expected
# cancer incidence associated with that exposure, by state.
summary_arsenic = pd.read_csv('../data/processed/summary_arsenic.csv', index_col=0)
summary_arsenic

Unnamed: 0,state,population,cancer_incidence
0,Durango,1174741,3380.955902
1,Jalisco,619058,2282.907393
2,Sinaloa,1166453,1246.504198
3,Chihuahua,1626153,1198.134944
4,Zacatecas,391704,911.722632
5,Sonora,1069452,893.70277
6,Guanajuato,618884,853.783788
7,Coahuila,712095,722.496184
8,Oaxaca,523865,443.829518
9,Hidalgo,32923,203.669132


In [14]:
# Population exposed to arsenic above 10 µg/L, and the expected
# cancer incidence associated with that exposure, totals
summary_arsenic.sum()[1:3]

population          8807501
cancer_incidence    13070.6
dtype: object

## Fluoride

In [15]:
# Number of sites with fluoride contamination
samples[samples.fluoride > 1500].shape[0]

184

In [16]:
# Contaminated sites by type
samples[samples.fluoride > 1500].type.value_counts()

Well      181
Spring      3
Name: type, dtype: int64

In [17]:
# Number of towns with fluoride exposures above 1500 µg/L
towns[towns.fluoride > 1500].shape[0]

2726

In [18]:
# Number of people with fluoride exposures above 1500 µg/L
towns[towns.fluoride > 1500].population.sum()

3054168

In [19]:
# Table 2.
# Population exposed to fluoride above 0.06 mg/(kg * day), 
# considered to be the limit above which significant 
# health effects begin
summary_fluoride = pd.read_csv('../data/processed/summary_fluoride.csv', index_col=0)
summary_fluoride

Unnamed: 0,state,population
0,San Luis Potosí,772124
1,Durango,634753
2,Zacatecas,165624
3,Guanajuato,133703
4,Jalisco,91556
5,Michoacán,86878
6,Chihuahua,81933
7,Hidalgo,45725
8,Sonora,22428
9,Querétaro,18861


In [20]:
# Total population exposed to fluoride above 0.06 mg/(kg * day)
summary_fluoride.sum()[1:2]

population    2071735
dtype: object