This is an example of merging precipitation data with the PFAS file. The precipitation data is for Orange County, 2010-2019 and was accessed from the NOAA Climate Data Online Search (https://www.ncdc.noaa.gov/cdo-web/search?datasetid=GHCND)

In [79]:
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import math

def deg2rad(deg):
    return(deg*(math.pi/180.))
def getDistanceFromLatLonInKm(lat1,lon1,lat2,lon2):
    R=6371.
    dlat=deg2rad(lat2-lat1)
    dlon=deg2rad(lon2-lon1)
    a=math.sin(dlat/2.)*math.sin(dlat/2.) + math.cos(deg2rad(lat1)) *\
    math.cos(deg2rad(lat2)) *math.sin(dlon/2.)*math.sin(dlon/2.)

    b = 2. * math.atan2(math.sqrt(a),math.sqrt(1-a))
    d=R*b
    return(d)

prec=pd.read_csv("precip_2010_2019.csv")[['STATION','LATITUDE','LONGITUDE','ELEVATION','DATE','PRCP','TAVG','TMAX','TMIN']]
dat=pd.read_excel("SAR-Imperial_537Data_AsOf08-08-2019.xlsx",sheet_name='All')
location_data=pd.read_excel("SAR-Imperial_537Data_AsOf08-08-2019.xlsx",sheet_name='SAR-IMPERIAL-01 Location')
dat['latitude']=float(location_data['Latitude_WGS84'])
dat['longitude']=float(location_data['Longitude_WGS84'])

In [85]:
#Find the closest precip station to the PFAS station
prec_stations=prec[['STATION','LATITUDE','LONGITUDE']].drop_duplicates()
pfas_stations=dat[['Station Name','latitude','longitude']].drop_duplicates()
#Make table of distances
comb_dist=pd.DataFrame()
for s in pfas_stations['Station Name']:
    #Get the lat/lon coords for the station
    pfas_sub=pfas_stations[pfas_stations['Station Name']==s]
    pfas_lat=float(pfas_sub['latitude'])
    pfas_lon=float(pfas_sub['longitude'])
    for p in prec_stations['STATION']:
        prec_sub=prec_stations[prec_stations['STATION']==p]
        prec_lat=float(prec_sub['LATITUDE'])
        prec_lon=float(prec_sub['LONGITUDE'])
        dict_store={}
        dict_store['pfas_s']=s
        dict_store['prec_s']=p
        dict_store['dist_betw']=getDistanceFromLatLonInKm(pfas_lat,pfas_lon,prec_lat,prec_lon)
        comb_dist=comb_dist.append(dict_store,ignore_index=True)
#Find the min distance
min_dist=comb_dist.loc[comb_dist['dist_betw'].idxmin()]

In [86]:
#merge precip data and pfas data


dist_betw           0.638575
pfas_s       SAR-IMPERIAL-01
prec_s           US1CAOR0021
Name: 31, dtype: object

In [2]:
#Merge the data by the date?
prec['DATE']=pd.to_datetime(prec['DATE'])
prec_tomerge=prec[prec['']]
dat_merged=pd.merge(prec,dat,left_on='DATE',right_on='Sample Date')

In [3]:
dat_merged.head()

Unnamed: 0,STATION,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,TAVG,TMAX,TMIN,Station Name,...,Parameter Abbreviation,Sample Date,Sample Time,Result Value (10% of RDL for NDs),Result Text,Units,RDL,Detected (Y: Yes; N: No),Laboratory Sample Number,Method
0,US1CAOR0047,33.5692,-117.6362,164.0,2016-06-14,0.0,,,,SAR-IMPERIAL-01,...,PFBS,2016-06-14,12:10:00,9.0,ND,ng/L,90,N,16060368-19,537
1,US1CAOR0047,33.5692,-117.6362,164.0,2016-06-14,0.0,,,,SAR-IMPERIAL-01,...,PFHpA,2016-06-14,12:10:00,1.0,ND,ng/L,10,N,16060368-19,537
2,US1CAOR0047,33.5692,-117.6362,164.0,2016-06-14,0.0,,,,SAR-IMPERIAL-01,...,PFHxS,2016-06-14,12:10:00,3.0,ND,ng/L,30,N,16060368-19,537
3,US1CAOR0047,33.5692,-117.6362,164.0,2016-06-14,0.0,,,,SAR-IMPERIAL-01,...,PFNA,2016-06-14,12:10:00,2.0,ND,ng/L,20,N,16060368-19,537
4,US1CAOR0047,33.5692,-117.6362,164.0,2016-06-14,0.0,,,,SAR-IMPERIAL-01,...,PFOS,2016-06-14,12:10:00,4.0,ND,ng/L,40,N,16060368-19,537


In [47]:
dat_sort=dat_merged.sort_values(by=['DATE'])

In [52]:
dat_sort['STATION'].unique()

array(['US1CAOR0047', 'USC00047888', 'USR0000CBCN', 'US1CAOR0035',
       'US1CAOR0029', 'US1CAOR0027', 'US1CAOR0026', 'US1CAOR0021',
       'USR0000CFRE', 'USW00003166', 'US1CAOR0039', 'USC00044303',
       'US1CAOR0034', 'US1CAOR0031', 'US1CAOR0042', 'US1CAOR0001',
       'US1CAOR0006', 'US1CAOR0043', 'US1CAOR0049', 'USC00040192',
       'USW00093184', 'US1CAOR0019', 'US1CAOR0050', 'USC00046175',
       'US1CAOR0016', 'US1CAOR0051', 'US1CAOR0025', 'US1CAOR0028',
       'US1CAOR0052', 'US1CAOR0005', 'US1CAOR0041', 'US1CAOR0053',
       'US1CAOR0015', 'US1CAOR0037', 'US1CAOR0009', 'US1CAOR0059',
       'US1CAOR0067', 'US1CAOR0061', 'US1CAOR0070', 'US1CAOR0072'],
      dtype=object)

In [42]:
#Facet the data and plot 
dat_sub=dat_sort[['DATE','Sample Time','PRCP','Parameter Abbreviation','Result Value (10% of RDL for NDs)']]


In [43]:
dat_sub.head(20)

Unnamed: 0,DATE,Sample Time,PRCP,Parameter Abbreviation,Result Value (10% of RDL for NDs)
0,2016-06-14,12:10:00,0.0,PFBS,9.0
96,2016-06-14,12:10:00,0.0,PFBS,9.0
97,2016-06-14,12:10:00,0.0,PFHpA,1.0
98,2016-06-14,12:10:00,0.0,PFHxS,3.0
99,2016-06-14,12:10:00,0.0,PFNA,2.0
100,2016-06-14,12:10:00,0.0,PFOS,4.0
101,2016-06-14,12:10:00,0.0,PFOA,2.0
102,2016-06-14,12:10:00,,PFBS,9.0
103,2016-06-14,12:10:00,,PFHpA,1.0
104,2016-06-14,12:10:00,,PFHxS,3.0
