In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [37]:
df = pd.read_csv('./data/Water_CLEAN.csv', parse_dates = ['Date_Time'], index_col = 'Date_Time')
df.drop(columns = 'Unnamed: 0', inplace = True)

In [38]:
df_potomac = df.loc[df.HUCNAME_.str.contains('Potomac')]

In [39]:
df_potomac.head()

Unnamed: 0_level_0,Agency,Cruise,Database,HUC12,Latitude,Layer,Longitude,MeasureValue,Method,Parameter,...,TideStage,TierLevel,Unit,Point,HUC12_,HUCNAME_,FIPS_,COUNTY_,STATE_,TotalDepth
Date_Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-10-06 12:45:00,MDDNR,BAY514,CBP,20700020000.0,39.38935,S,-79.17936,10.0,F01,DO,...,Ebb Tide,T3,MG/L,POINT (-79.17936 39.38935),20700020205,Lostland Run-North Branch Potomac River,24023,Garrett County,MD,0.0
2009-10-06 12:45:00,MDDNR,BAY514,CBP,20700020000.0,39.38935,S,-79.17936,1.222,L01,NH4F,...,Flood Tide,T3,MG/L,POINT (-79.17936 39.38935),20700020205,Lostland Run-North Branch Potomac River,24023,Garrett County,MD,0.0
2009-10-06 12:45:00,MDDNR,BAY514,CBP,20700020000.0,39.38935,S,-79.17936,2.463,D01,NO3F,...,Flood Tide,T3,MG/L,POINT (-79.17936 39.38935),20700020205,Lostland Run-North Branch Potomac River,24023,Garrett County,MD,0.0
2009-10-06 12:45:00,MDDNR,BAY514,CBP,20700020000.0,39.38935,S,-79.17936,7.6,F01,PH,...,Ebb Tide,T3,SU,POINT (-79.17936 39.38935),20700020205,Lostland Run-North Branch Potomac River,24023,Garrett County,MD,0.0
2009-10-06 12:45:00,MDDNR,BAY514,CBP,20700020000.0,39.38935,S,-79.17936,0.006,L01,PO4F,...,Ebb Tide,T3,MG/L,POINT (-79.17936 39.38935),20700020205,Lostland Run-North Branch Potomac River,24023,Garrett County,MD,0.0


In [48]:
df_potomac.drop(columns = 'ParameterName_CBP', inplace = True)

In [49]:
def make_parameters(df):
    dums = pd.get_dummies(df['Parameter'], prefix = 'Parameter')
    df = pd.concat([df,dums], axis = 1).drop(columns = 'Parameter')
    
    param_cols = []
    for col in df:
        if col.startswith('Parameter'):
            param_cols.append(col)

    for col in param_cols:
        df[f'{col}'].where(df[f'{col}'] == 0, 
                           df['MeasureValue'], inplace = True)
    df.drop(columns = 'MeasureValue', inplace = True)
    df = df.groupby(by = [df.index, 'SampleId','SampleDepth']).max()
    df.reset_index(level = ['SampleId', 'SampleDepth'], inplace = True)
    return df

In [50]:
df = make_parameters(df_potomac)

In [51]:
df.columns

Index(['SampleId', 'SampleDepth', 'Agency', 'Cruise', 'Database', 'HUC12',
       'Latitude', 'Layer', 'Longitude', 'Method', 'Program', 'Project',
       'SampleReplicateType', 'SampleType', 'Source', 'Station', 'TideStage',
       'TierLevel', 'Unit', 'Point', 'HUC12_', 'HUCNAME_', 'FIPS_', 'COUNTY_',
       'STATE_', 'TotalDepth', 'Parameter_CHLA', 'Parameter_DO',
       'Parameter_NH4F', 'Parameter_NO3F', 'Parameter_PH', 'Parameter_PO4F',
       'Parameter_SALINITY', 'Parameter_SECCHI', 'Parameter_TALK',
       'Parameter_TDS', 'Parameter_TKNW', 'Parameter_TN', 'Parameter_TP',
       'Parameter_TSS', 'Parameter_TURB_NTU', 'Parameter_WTEMP'],
      dtype='object')

In [52]:
df.head()

Unnamed: 0_level_0,SampleId,SampleDepth,Agency,Cruise,Database,HUC12,Latitude,Layer,Longitude,Method,...,Parameter_SALINITY,Parameter_SECCHI,Parameter_TALK,Parameter_TDS,Parameter_TKNW,Parameter_TN,Parameter_TP,Parameter_TSS,Parameter_TURB_NTU,Parameter_WTEMP
Date_Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-07-11 08:30:00,29047.0,0.5,MDDNR,BAY422,CBP,20700110000.0,38.3626,S,-76.99063,L01,...,4.28,0.5,58.0,0.0,0.0,0.99,0.0729,7.9,8.0,27.7
2005-07-11 08:30:00,29047.0,1.0,MDDNR,BAY422,CBP,20700110000.0,38.3626,M,-76.99063,F01,...,4.31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.6
2005-07-11 08:30:00,29047.0,2.0,MDDNR,BAY422,CBP,20700110000.0,38.3626,AP,-76.99063,L01,...,4.76,0.0,58.0,0.0,0.0,1.341,0.0741,8.0,7.6,27.5
2005-07-11 08:30:00,29047.0,3.0,MDDNR,BAY422,CBP,20700110000.0,38.3626,M,-76.99063,F01,...,5.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.2
2005-07-11 08:30:00,29047.0,4.0,MDDNR,BAY422,CBP,20700110000.0,38.3626,M,-76.99063,F01,...,5.88,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.1


In [None]:
df.to_csv('./data/WQ_FINAL_with_Parameters.csv', index=False)