#EPA-ECHO
This notebook reads downloaded Annual Discharge Monitoring Record (DMR) data from EPA's ECHO fileserver (See: https://echo.epa.gov/tools/data-downloads)

Metadata for the download are here:<br> https://echo.epa.gov/files/echodownloads/ICIS_NPDES_Data_Download.pdf

A link to the download files is here: <br>
https://echo.epa.gov/files/echodownloads/NPDES_by_state_year/

Later this script should include procedures to download the data (vs manually)

In [1]:
#Imports
import pandas as pd
import os

In [None]:
#Pointers to data files (downloaded manually)
dataFolder = '../../Data/EPA_ECHO'
file1=os.path.join(dataFolder,'NC_FY2017_NPDES_DMRS.csv')
file2=os.path.join(dataFolder,'NC_FY2017_NPDES_LIMITS.csv')
file3=os.path.join(dataFolder,'NC_NPDES_EFF_VIOLATIONS.csv')

In [27]:
#Discharge monitoring report data, all data for one year
dfDMRS = pd.read_csv(file1,low_memory=False)

In [28]:
#Show the first record
dfDMRS.loc[0]

ACTIVITY_ID                                  3400052359
EXTERNAL_PERMIT_NMBR                          NC0065081
VERSION_NMBR                                          0
PERM_FEATURE_ID                              3400031484
PERM_FEATURE_NMBR                                   003
PERM_FEATURE_TYPE_CODE                              EXO
LIMIT_SET_ID                                 3400076553
LIMIT_SET_DESIGNATOR                                  M
LIMIT_SET_SCHEDULE_ID                        3400111154
LIMIT_ID                                     3400694666
LIMIT_BEGIN_DATE                             12/01/2012
LIMIT_END_DATE                               05/31/2017
NMBR_OF_SUBMISSION                                    1
NMBR_OF_REPORT                                        1
PARAMETER_CODE                                    50064
PARAMETER_DESC                 Chlorine, free available
MONITORING_LOCATION_CODE                              1
STAY_TYPE_CODE                                  

In [38]:
#Show number of unique values in each column
dfDMRS.nunique()

ACTIVITY_ID                      1268
EXTERNAL_PERMIT_NMBR             1093
VERSION_NMBR                        5
PERM_FEATURE_ID                  1545
PERM_FEATURE_NMBR                  69
PERM_FEATURE_TYPE_CODE              2
LIMIT_SET_ID                     1553
LIMIT_SET_DESIGNATOR                3
LIMIT_SET_SCHEDULE_ID            1561
LIMIT_ID                        17954
LIMIT_BEGIN_DATE                  114
LIMIT_END_DATE                    119
NMBR_OF_SUBMISSION                  2
NMBR_OF_REPORT                      2
PARAMETER_CODE                    132
PARAMETER_DESC                    132
MONITORING_LOCATION_CODE           10
STAY_TYPE_CODE                      0
LIMIT_VALUE_ID                  24603
LIMIT_VALUE_TYPE_CODE               5
LIMIT_VALUE_NMBR                  861
LIMIT_UNIT_CODE                    26
LIMIT_UNIT_DESC                    26
STANDARD_UNIT_CODE                 10
STANDARD_UNIT_DESC                 10
LIMIT_VALUE_STANDARD_UNITS       1002
STATISTICAL_

In [24]:
#Get a unique list of parameters
dfDMRS.PARAMETER_DESC.unique()

array(['Chlorine, free available', 'Chromium, total [as Cr]',
       'Solids, total suspended', 'Zinc, total [as Zn]',
       'Temperature, water deg. centigrade', 'Copper, total [as Cu]', 'pH',
       'Flow, in conduit or thru treatment plant',
       'Cadmium, total [as Cd]', 'Chlorine, total residual',
       'Oil & Grease', 'Nickel, total [as Ni]',
       'Coliform, fecal MF, MFC broth, 44.5 C', 'Nitrogen, total [as N]',
       'Oxygen, dissolved [DO]', 'Nitrogen, ammonia total [as N]',
       'BOD, 5-day, 20 deg. C', 'Nitrogen, Kjeldahl, total [as N]',
       'Nitrite + Nitrate total [as N]', 'Phosphorus, total [as P]',
       'Arsenic, total [as As]', 'Enterococci', 'Solids, total dissolved',
       'Conductivity', 'Salinity', 'Chloride [as Cl]', 'Turbidity',
       'Flow, total', 'Chv Statre 7Day Chronic Ceriodaphnia',
       'Xylene [mix of m+o+p]', 'Ethylbenzene', 'Toluene', 'Naphthalene',
       'Benzene', 'Aluminum, total [as Al]', 'Lead, total [as Pb]',
       'Phenolics, t

In [37]:
#Retrieve just 'Coliform, fecal general'
dfPoop = dfDMRS[dfDMRS.PARAMETER_DESC == 'Coliform, fecal general']
dfPoop[['EXTERNAL_PERMIT_NMBR','MONITORING_PERIOD_END_DATE','DMR_VALUE_STANDARD_UNITS']]                        

Unnamed: 0,EXTERNAL_PERMIT_NMBR,MONITORING_PERIOD_END_DATE,DMR_VALUE_STANDARD_UNITS
90549,NC0052469,10/31/2016,14.2
90550,NC0052469,10/31/2016,4.8
90574,NC0052469,11/30/2016,2.7
90575,NC0052469,11/30/2016,9.7
90592,NC0052469,12/31/2016,1.0
90593,NC0052469,12/31/2016,0.0
90607,NC0052469,01/31/2017,1.3
90612,NC0052469,01/31/2017,3.1
90638,NC0052469,02/28/2017,5.2
90642,NC0052469,02/28/2017,1.5


In [23]:
dfDMRS.loc[0]

ACTIVITY_ID                                  3400052359
EXTERNAL_PERMIT_NMBR                          NC0065081
VERSION_NMBR                                          0
PERM_FEATURE_ID                              3400031484
PERM_FEATURE_NMBR                                   003
PERM_FEATURE_TYPE_CODE                              EXO
LIMIT_SET_ID                                 3400076553
LIMIT_SET_DESIGNATOR                                  M
LIMIT_SET_SCHEDULE_ID                        3400111154
LIMIT_ID                                     3400694666
LIMIT_BEGIN_DATE                             12/01/2012
LIMIT_END_DATE                               05/31/2017
NMBR_OF_SUBMISSION                                    1
NMBR_OF_REPORT                                        1
PARAMETER_CODE                                    50064
PARAMETER_DESC                 Chlorine, free available
MONITORING_LOCATION_CODE                              1
STAY_TYPE_CODE                                  

In [20]:
#Limit data
dfLimits = pd.read_csv(file2,nrows=10)
dfLimits.columns

Index(['ACTIVITY_ID', 'EXTERNAL_PERMIT_NMBR', 'VERSION_NMBR',
       'PERM_FEATURE_ID', 'PERM_FEATURE_NMBR', 'PERM_FEATURE_TYPE_CODE',
       'LIMIT_SET_ID', 'LIMIT_SET_DESIGNATOR', 'LIMIT_SET_SCHEDULE_ID',
       'LIMIT_ID', 'LIMIT_BEGIN_DATE', 'LIMIT_END_DATE', 'NMBR_OF_SUBMISSION',
       'NMBR_OF_REPORT', 'PARAMETER_CODE', 'PARAMETER_DESC',
       'MONITORING_LOCATION_CODE', 'STAY_TYPE_CODE', 'LIMIT_VALUE_ID',
       'LIMIT_VALUE_TYPE_CODE', 'LIMIT_VALUE_NMBR', 'LIMIT_UNIT_CODE',
       'LIMIT_UNIT_DESC', 'STANDARD_UNIT_CODE', 'STANDARD_UNIT_DESC',
       'LIMIT_VALUE_STANDARD_UNITS', 'STATISTICAL_BASE_CODE',
       'STATISTICAL_BASE_TYPE_CODE', 'LIMIT_VALUE_QUALIFIER_CODE',
       'STAY_VALUE_NMBR', 'LIMIT_TYPE_CODE', 'OPTIONAL_MONITORING_FLAG',
       'LIMIT_SAMPLE_TYPE_CODE', 'LIMIT_FREQ_OF_ANALYSIS_CODE'],
      dtype='object')

In [18]:
#Violation data
dfViolations = pd.read_csv(file3,nrows=10)
dfViolations.head()

Unnamed: 0,NPDES_ID,VERSION_NMBR,ACTIVITY_ID,NPDES_VIOLATION_ID,PERM_FEATURE_NMBR,PERMIT_ACTIVITY_ID,LIMIT_SET_DESIGNATOR,MONITORING_LOCATION_CODE,DMR_FORM_VALUE_ID,DMR_VALUE_NMBR,...,DMR_VALUE_STANDARD_UNITS,VALUE_TYPE_CODE,RNC_DETECTION_CODE,RNC_DETECTION_DESC,RNC_DETECTION_DATE,RNC_RESOLUTION_CODE,RNC_RESOLUTION_DESC,RNC_RESOLUTION_DATE,STATISTICAL_BASE_CODE,STATISTICAL_BASE_MONTHLY_AVG
0,NC0024236,0,3400237134,3617136302,1,3400237134,M,1,3446827000.0,,...,,C2,N,Non-Receipt of DMR/Schedule Report,12/31/2014,0,RE - Two Years Past Detection (System Administ...,12/31/2016,3C,A
1,NC0024210,0,3400195807,3617119659,1,3400195807,M,G,,,...,,C3,K,"Non-receipt Violation, Non-Monthly Average",10/03/2015,0,RE - Two Years Past Detection (System Administ...,10/03/2017,DD,N
2,NC0000019,2,3200050612,3405241491,3,3200050612,M,1,,,...,,C1,,,,,,,DC,
3,NC0020931,2,3200048642,3617796545,1,3200048642,M,1,3423233000.0,,...,,C1,K,"Non-receipt Violation, Non-Monthly Average",03/03/2015,0,RE - Two Years Past Detection (System Administ...,03/03/2017,WK,N
4,NC0088331,2,3200075072,3600393898,1,3200075072,M,1,,,...,,C3,K,"Non-receipt Violation, Non-Monthly Average",12/01/2014,B,RE - Manual by EPA/State/Tribal Action,12/31/2016,DD,N


In [19]:
#Isolate data for one permit
dfWW = dfDMRS[dfDMRS['EXTERNAL_PERMIT_NMBR']=='NC0023841']
dfWW.head()

Unnamed: 0,ACTIVITY_ID,EXTERNAL_PERMIT_NMBR,VERSION_NMBR,PERM_FEATURE_ID,PERM_FEATURE_NMBR,PERM_FEATURE_TYPE_CODE,LIMIT_SET_ID,LIMIT_SET_DESIGNATOR,LIMIT_SET_SCHEDULE_ID,LIMIT_ID,...,VALUE_RECEIVED_DATE,DAYS_LATE,NODI_CODE,EXCEEDENCE_PCT,NPDES_VIOLATION_ID,VIOLATION_CODE,RNC_DETECTION_CODE,RNC_DETECTION_DATE,RNC_RESOLUTION_CODE,RNC_RESOLUTION_DATE
214597,3600003709,NC0023841,0,3600115478,01D3,EXO,3600151830,M,3600195873,3601547699,...,11/23/2016,,,,,,,,,
214598,3600003709,NC0023841,0,3600115478,01D3,EXO,3600151830,M,3600195873,3601547719,...,11/23/2016,,,,,,,,,
214599,3600003709,NC0023841,0,3600115478,01D3,EXO,3600151830,M,3600195873,3601547708,...,,,,,3608604000.0,D80,K,12/31/2016,1.0,12/31/2016
214600,3600003709,NC0023841,0,3600115480,01D4,EXO,3600151831,M,3600195874,3601547731,...,11/23/2016,,,,,,,,,
214601,3600003709,NC0023841,0,3600001857,001,EXO,3600001812,M,3600002053,3601487449,...,11/23/2016,,,,,,,,,


In [15]:
dfWW.PARAMETER_DESC.unique()

array(['Nitrogen, Kjeldahl, total [as N]',
       'Temperature, water deg. centigrade',
       'Nitrogen, ammonia total [as N]', 'Phosphate, ortho [as PO4]',
       'Oxygen, dissolved [DO]', 'Conductivity',
       'Coliform, fecal MF, MFC broth, 44.5 C', 'pH',
       'Flow, in conduit or thru treatment plant', 'BOD, 5-day, 20 deg. C',
       'Nitrogen, total [as N]', 'Solids, total suspended',
       'Phosphorus, total [as P]', 'Chlorine, total residual',
       'Chlorophyll A', 'Flow, total',
       'Chv Statre 7Day Chronic Ceriodaphnia',
       'Nitrite + Nitrate total [as N]'], dtype=object)