In [1]:
#Import functions and libraries from scripts
from GP_funcs import *
from file_opening import *
from SCB_conversion import SCB_CONVERSION

#for plotting
import matplotlib.pyplot as plt

# Load USGS site data nd calculate SAC

In [3]:
#load USGS data
USGS_site_id = ['05325000','05325300','05330000','05331000']
ADVM_readings = create_USGS_dfs('ADVM', USGS_site_id) #Sensor readings for all sites
Grab_samples = create_USGS_dfs('SSC_Q', USGS_site_id) #Grab samples and flow for all sites
Qall_time = create_USGS_dfs('Qall', USGS_site_id) #Discharge for total time range of sensor deployment per site

dfc = pd.read_csv(r'USGS_data/USGS_site_consts.csv') # Read in constants for all sites
dfc['Site_ID'] = dfc['Site_ID'].map(toName) #Converts Site_ID to string type

data_dictionary = {} # empty dictionary to put dataframes into, keys are site id

for id in USGS_site_id:
    #Selects constants for each USGS site
    Consts = dfc.loc[dfc['Site_ID'] ==  id].iloc[0, 1:]

    data_dictionary[id] = { 'ADVM' : ADVM_readings[id],
                            'Samples' : Grab_samples[id],
                            'Flow' : Qall_time[id],
                            'Consts' : Consts # This is a pandas series
                        }

    ## add in calculated parameters from raw data
    site_data = data_dictionary[id]
    
    # Calculate derivative of flow
    Qsamp = site_data['Samples']['Q']
    Tsamp = site_data['Samples']['DateTime']
    DTsamp = pd.Series(serialTimeToDatetime(Tsamp, tz_hour_offset = -5))

    site_data['Samples']['dQdT'] = (Qsamp.diff() / DTsamp.diff().dt.total_seconds())
    print(Qsamp.shape, site_data['Samples']['dQdT'].shape)

(20,) (20,)
(22,) (22,)
(21,) (21,)
(19,) (19,)


In [4]:

''' vvvvvvvvvvvvvvvvvvvvvv Loop that prepares and runs site data to calculate SCB, SAC vvvvvvvvvvvvvvvvvvvvvv '''
for id in USGS_site_id :
    site_data = data_dictionary[id]
    Constsa = site_data['Consts'].to_numpy()
    beams = Constsa[7]

    print(id, Constsa[7])

    #a is start cell, b is end cell for data reading
    a = '01'
    b = '%02d' % site_data['Consts'][6] #will give end cell number and add appropriate 0 if single digit int

    #Import sensor readings for site site_num and store as arrays
    #selects dataframe with matching key (USGS_site_id number) from ADVM_readings library
    df_a = site_data['ADVM']
    date_time = df_a['DateTime'].to_numpy()
    Temp = df_a['ADVMTemp'].to_numpy()
    Vbeam = df_a['Vbeam'].to_numpy()
    # If 2 beams being used, then make list of matrix of SNR and AMP
    if beams == 'Avg':
        SNR = [beam_array(df_a, ['Cell'+a+'SNR1','Cell'+b+'SNR1']), beam_array(df_a, ['Cell'+a+'SNR2','Cell'+b+'SNR2'])]
        AMP = [beam_array(df_a, ['Cell'+a+'Amp1','Cell'+b+'Amp1']), beam_array(df_a, ['Cell'+a+'Amp2','Cell'+b+'Amp2'])]
    # Else just have 2 matrices
    #idk why site 3 only uses beam 1 since the csv has them all?? maybe beam 2 wasn't good
    elif beams == '1':
        SNR = beam_array(df_a, ['Cell'+a+'SNR1','Cell'+b+'SNR1'])
        AMP = beam_array(df_a, ['Cell'+a+'Amp1','Cell'+b+'Amp1'])
    elif beams == '2':
        SNR = beam_array(df_a, ['Cell'+a+'SNR2','Cell'+b+'SNR2'])
        AMP = beam_array(df_a, ['Cell'+a+'Amp2','Cell'+b+'Amp2'])


    #Import measured SSC values and flow from the USGS site site_num and store as arrays
    df_s = site_data['Samples']
    date_time2 = df_s['DateTime'].to_numpy()
    SSC = df_s['SSC'].to_numpy()

   
    #Calculate mean SCB and SAC from sensor geometry and data
    Mean_SCB, SAC = SCB_CONVERSION(SNR, AMP, Constsa, Vbeam, Temp, date_time).convert_SNR_to_Mean_SCB()

    # matching measured SSC values from the USGS site and backscatter data to it by choosing closest times
    ind_match = [(np.abs(date_time - date_time2[i])).argmin() for i in range(len(date_time2))]
    SAC_matched = SAC[ind_match]
    Mean_SCB_matched = Mean_SCB[ind_match]
    

    #change to log10 options
    lSAC = np.log10(SAC)
    lSAC_matched = np.log10(SAC_matched)
    lSCB = np.log10(Mean_SCB)
    lSCB_matched = np.log10(Mean_SCB_matched)
    lSSC = np.log10(SSC)
    
    #store arrays back into dataframes

    # put time series of SAC and SCB into ADVM dataframe
    data_matrix = np.vstack([SAC, Mean_SCB, lSAC, lSCB]).transpose()
    df = pd.DataFrame(data_matrix, columns=['SAC','SCB', 'logSAC','logSCB'])
    site_data['ADVM'] = pd.concat([df_a, df], axis = 1)

    # Update matched output values to Samples dataframe
    matched_data_matrix = np.vstack([SAC_matched, Mean_SCB_matched, lSSC, lSAC_matched, lSCB_matched]).transpose()
    matched_df = pd.DataFrame(matched_data_matrix, columns=['SAC', 'SCB','logSSC','logSAC', 'logSCB'])
    site_data['Samples'] = pd.concat([df_s, matched_df], axis = 1)


05325000 Avg
(10, 1) R_invalid, (10,)
05325300 Avg
(10, 1) R_invalid, (10,)


  lSAC = np.log10(SAC)


05330000 Avg
(10, 1) R_invalid, (10,)


  lSAC = np.log10(SAC)


05331000 1
(5, 1) R_invalid, (5,)


In [5]:
# For saving computed SAC and stuff values into csv 
id = 3
site_data = data_dictionary[USGS_site_id[id]]
# site_data['Samples']['SAC'].to_numpy()
# print(site_data['Samples']['SAC'], seeya['Samples']['SAC'])

# site_data['Samples']
site_data.keys()
site_data['Samples']

site_data['Samples'].to_csv(f'{str(USGS_site_id[id])}_samples.csv', index=False)
site_data['ADVM'].to_csv(f'{str(USGS_site_id[id])}_sensor.csv', index=False)