In [5]:
import sprit
import obspy
import pandas as pd
import numpy as np
import datetime

def batch_data_read(input_data, batch_type='csv', param_col=None, batch_params=None, verbose=False, **readcsv_getMeta_fetch_kwargs):
    """Function to read data in data as a batch of multiple data files

    Parameters
    ----------
    input_data : filepath or list
        Input data information for how to read in data as batch
    batch_type : str, optional
        Type of batch read, only 'csv' and 'filelist' accepted. If 'csv', will read data from a file read in using pandas.read_csv(), by default 'csv'
    param_col : None or str, optional
        Name of parameter column from batch information file. Only used if a batch_type='csv' and single parameter column is used, rather than one column per parameter (for single parameter column, parameters are formatted with = between keys/values and , between item pairs), by default None
    batch_params : dict or list, optional
        Dictionary containing keyword arguments for pandas.read_csv(), sprit.input_params(), sprit.get_metadata(), and sprit.fetch_data(). Only used iwth batch_type='filelist. If dict, will use same parameters for all files. If list of dicts, needs to be same length as input_data, by default None
    verbose : bool, optional
        Whether to print information to terminal during batch read, by default False

    Returns
    -------
    dict
        Dictionary with each item representing a different file read in, and which consists of its own parameter dictionary to be used by the rest of the processing steps

    Raises
    ------
    IndexError
        _description_
    """

    # Dictionary to store the stream objects
    stream_dict = {}
    data_dict = {}
    if batch_type == 'csv':
        #Read csv
        read_csv_kwargs = {k: v for k, v in locals()['readcsv_getMeta_fetch_kwargs'].items() if k in pd.read_csv.__code__.co_varnames}
        dataReadInfoDF = pd.read_csv(input_data, **read_csv_kwargs, verbose=verbose)
        #dataReadInfoDF = dataReadInfoDF.replace(np.nan, None)

        default_dict = {'site':'HVSR Site',
                    'network':'AM', 
                    'station':'RAC84', 
                    'loc':'00', 
                    'channels':['EHZ', 'EHN', 'EHE'],
                    'acq_date':str(datetime.datetime.now().date()),
                    'starttime' : '00:00:00.00',
                    'endtime' : '23:59:59.999',
                    'tzone' : 'UTC',
                    'xcoord' : -88.2290526,
                    'ycoord' :  40.1012122,
                    'elevation' : 755,
                    'input_crs':'EPSG:4326',#4269 is NAD83, defautling to WGS
                    'output_crs':'EPSG:4326',
                    'elev_unit' : 'feet',
                    'depth' : 0,
                    'instrument' : 'Raspberry Shake',
                    'metapath' : '',
                    'hvsr_band' : [0.4, 40],
                    'write_path':'',
                    'source':'file', 
                    'export_format':'mseed', 
                    'detrend':'spline', 
                    'detrend_order':2, 
                    'verbose':False}

        if verbose:
            print(dataReadInfoDF)
        #First figure out columns
        input_params_params = sprit.input_params.__code__.co_varnames
        get_metadata_params = sprit.get_metadata.__code__.co_varnames
        fetch_data_params = sprit.fetch_data.__code__.co_varnames

        param_dict_list = []
        if param_col is None: #Not a single parameter column, each col=parameter
            for row_ind in range(dataReadInfoDF.shape[0]):
                param_dict = {}
                for col in dataReadInfoDF.columns:
                    if col in input_params_params or col in get_metadata_params or col in fetch_data_params:
                        currParam = dataReadInfoDF.loc[row_ind, col]
                        if pd.isna(currParam) or currParam == 'nan':
                            if col in default_dict.keys():
                                param_dict[col] = default_dict[col] #Get default value
                                if verbose:
                                    print('Replacing blank value for {} from file with default value {}'.format(col, default_dict[col]))
                            else:
                                param_dict[col] = None
                        else:
                            param_dict[col] = dataReadInfoDF.loc[row_ind, col]
                param_dict_list.append(param_dict)
        else:
            if param_col not in dataReadInfoDF.columns:
                raise IndexError('{} is not a column in {} (columns are: {})'.format(param_col, input_data, dataReadInfoDF.columns))
            for row in dataReadInfoDF[param_col]:
                param_dict = {}
                splitRow = str(row).split(',')
                for item in splitRow:
                    param_dict[item.split('=')[0]] = item.split('=')[1]
                param_dict_list.append(param_dict)
        #input_params(datapath,site,network,station,loc,channels, acq_date,starttime, endtime, tzone, xcoord, ycoord, elevation, depth, instrument, metapath, hvsr_band)
        #fetch_data(params, inv, source, trim_dir, export_format, detrend, detrend_order, verbose)
        #get_metadata(params, write_path)

    elif batch_type == 'filelist':
        # Read and process each MiniSEED file
        for i, file in enumerate(input_data):
            if batch_params is None:
                pass
            elif isinstance(batch_params, list):
                read_params = batch_params[i]
            elif isinstance(batch_params, dict):
                pass
                #Update this eventually

    hvsr_metaDict = {}
    zfillDigs = len(str(len(param_dict_list))) #Get number of digits of length of param_dict_list
    i=0
    for param_dict in param_dict_list:
        # Read the data file into a Stream object
        input_params_kwargs = {k: v for k, v in locals()['readcsv_getMeta_fetch_kwargs'].items() if k in sprit.input_params.__code__.co_varnames}
        input_params_kwargs2 = {k: v for k, v in param_dict.items() if k in sprit.input_params.__code__.co_varnames}
        input_params_kwargs.update(input_params_kwargs2)
        params = sprit.input_params(**input_params_kwargs)

        get_metadata_kwargs = {k: v for k, v in locals()['readcsv_getMeta_fetch_kwargs'].items() if k in sprit.get_metadata.__code__.co_varnames}
        get_metadata_kwargs2 = {k: v for k, v in param_dict.items() if k in sprit.get_metadata.__code__.co_varnames}
        get_metadata_kwargs.update(get_metadata_kwargs2)
        params = sprit.get_metadata(params=params, **get_metadata_kwargs)

        fetch_data_kwargs = {k: v for k, v in locals()['readcsv_getMeta_fetch_kwargs'].items() if k in sprit.fetch_data.__code__.co_varnames}
        fetch_data_kwargs2 = {k: v for k, v in param_dict.items() if k in sprit.fetch_data.__code__.co_varnames[0:7]}
        fetch_data_kwargs.update(fetch_data_kwargs2)
        params = sprit.fetch_data(params=params, **fetch_data_kwargs)

        if params['site'] == default_dict['site']: #If site was not designated
            params['site'] = "{}_{}".format(params['site'], str(i).zfill(zfillDigs))
            i+=1
        hvsr_metaDict[params['site']] = params
    
    return hvsr_metaDict

dataPath = r"C:\Users\riley\OneDrive - University of Illinois - Urbana\CodesandScripts\HVSR_Batch_practice.csv"
hvsr_data_batch = batch_data_read(input_data=dataPath, batch_type='csv', param_col=None, batch_params=None, verbose=False)
hvsr_data_batch.keys()



dict_keys(['BOM5_23', 'HVSR Site_0', 'BOP2_1'])