## Read .ghg metadata

In [3]:
import os
import re
import time
import zipfile
from datetime import datetime
import configparser
import pandas as pd
import numpy as np
from io import TextIOWrapper

from ipywidgets import FloatProgress
from IPython.display import display

def Parse_Cal(key,string):
    # Get Calibration metadata from co2app file
    Cal_Info = re.search(r''+key+'(.*?)\)\)', string).group(1)
    Cal_Date = Cal_Info.split('(Date ')[-1].replace(' at ',' ')
    Cal_Stats = Cal_Info.split('(Date ')[0]
    Cal_Stats = re.search(r'\((.*)\)', Cal_Stats).group(1).replace(')(',' ')
    Cal_Stats = pd.DataFrame(np.array([v for v in Cal_Stats.split(' ')]).reshape(-1,2))
    Cal_Stats = Cal_Stats.rename(columns={0:'Attribute',1:'Value'})
    Cal_Stats['Attribute']=key+'_'+Cal_Stats['Attribute'].astype(str)
    try:
        timestamp = datetime.strptime(Cal_Date,'%b %d %Y %H:%M:%S')
    except:
        timestamp = pd.Timestamp('nat')
        pass
    Cal_Stats = pd.concat(
        [Cal_Stats,pd.DataFrame(data={'Attribute':[key+'_Timestamp'],'Value':[timestamp]},index=[0])],
        axis=0,
        ignore_index=True
    )
    return(Cal_Stats)

def Parse_Coef(key,string):
    # Get Calibration coefficients from co2app file
    Coef_Info = re.search(r''+key+'\((.*?)\)\)', string).group(1).replace(')(',' ')
    Coef_Info = pd.DataFrame(np.array([v for v in Coef_Info.split(' ')]).reshape(-1,2))
    Coef_Info = Coef_Info.rename(columns={0:'Attribute',1:'Value'})
    Coef_Info['Attribute']=key.replace(' ','')+'_'+Coef_Info['Attribute'].astype(str)
    return(Coef_Info)

def Read_co2app(co2app):
    # Parse the system_config\co2app.conf file to get calibration settings
    # Only an option for co2/h2o for now
    # Does not appear the ch4 calibration values are saved anywhere?
    Coef = re.search(r'Coef(.*?)\)\)\)', co2app).group(0)
    Coef_Keys =  ['CO2 ','H2O ','Pressure ','MaxRef']
    Coef_Summary = pd.concat(
        [Parse_Coef(Val,Coef) for Val in Coef_Keys],
        axis=0,
        ignore_index=True)#.set_index('Key')

    Calibrate = re.search(r'Calibrate(.*?)\)\)\)', co2app).group(0)
    Calibrate_Keys =  ['ZeroCO2','SpanCO2','ZeroH2O','SpanH2O'] # Span2CO2 & Span2H2O not needed
    Calibrate_Summary = pd.concat(
        [Parse_Cal(Val,Calibrate) for Val in Calibrate_Keys],
        axis=0,
        ignore_index=True)

    Calibrate_Summary = pd.concat(
        [Calibrate_Summary,Coef_Summary],
        axis=0)

    return(Calibrate_Summary)


    
def Parse_Metadata(config,key_list):
    MetaData = pd.concat(
        [pd.DataFrame(data={'Attribute':config[key].keys(),
                            'Value':config[key].values()}) for key in key_list],
                            axis=0,
                            ignore_index=True
                        )
    return (MetaData)


def Summarize_Data(Data,means,diagnostics=None):

    Data_Summary = Data[means].mean().to_frame().reset_index()
    Data_Summary.columns=['Attribute','Value']
    if diagnostics is not None:
        Count = pd.DataFrame(data={'Attribute':'N_Samples',
        'Value':Data['Nanoseconds'].count()},index=[0])
        # Temporary implementation
        # Need to sort out more appropriate approach
        # Storing as an array of unique values may be better?
        data_Diagnostics = Data[diagnostics].mode().T.reset_index()
        data_Diagnostics.columns=['Attribute','Value']
        Data_Summary = pd.concat(
                            [Data_Summary,data_Diagnostics,Count],
                            axis=0,
                            ignore_index=True
                        
                            )
    return (Data_Summary)


In [8]:



Path = '..\\BB\\Data_To_Process\\'

data_Means = ['CO2 Absorptance', 'H2O Absorptance',
'CO2 (mmol/m^3)', 'H2O (mmol/m^3)',
'Block Temperature (C)', 'Total Pressure (kPa)',
'Box Pressure (kPa)', 'Head Pressure (kPa)', 'Aux 1 - U (m/s)',
'Aux 2 - V (m/s)', 'Aux 3 - W (m/s)', 'Aux 4 - SOS (m/s)',
'Cooler Voltage (V)', 'Chopper Cooler Voltage (V)',
'Dew Point (C)',
'Cell Temperature (C)', 'Temperature In (C)',
'Temperature Out (C)', 'Average Signal Strength',
'Flow Rate (lpm)',
'Flow Pressure (kPa)', 'Flow Power (V)', 'Flow Drive (%)',
'CH4 (umol/mol)','CH4 Temperature', 'CH4 Pressure',
'CH4 Signal Strength']

status_Means = ['OPTICSTEMP', 'OPTICSRH']

data_Diagnostics = ['Diagnostic Value',
'Diagnostic Value 2', 'CH4 Diagnostic Value']

metadata_Tags = ['Site','Station','Timing','Instruments']


def read_GHG(root,name):
    # Read the zipped data without extracting
    config = configparser.ConfigParser()
    with zipfile.ZipFile(root+'\\'+name+'.ghg', 'r') as zip_ref:
        config.read_file(TextIOWrapper(zip_ref.open(name+'.metadata'), 'utf-8'))
        MetaData = Parse_Metadata(config,metadata_Tags)
        Data = pd.read_csv(zip_ref.open(name+'.data'),delimiter='\t',skiprows=7)
        Data_Summary = Summarize_Data(Data,data_Means,data_Diagnostics)
        Status = pd.read_csv(zip_ref.open(name+'-li7700.status'),delimiter='\t',skiprows=7)
        Status_Summary = Summarize_Data(Status,status_Means)
        Calibrations = (Read_co2app(zip_ref.open('system_config/co2app.conf').read().decode("utf-8")))
        
    Summary = pd.concat(
        [MetaData,Data_Summary,Status_Summary,Calibrations],
        axis=0,
        ignore_index=True).set_index('Attribute').T
    # # Get the file timestamp
    TimeStamp = datetime.strptime(name.split('_')[0],'%Y-%m-%dT%H%M%S')
    Summary['TimeStamp'] = [TimeStamp]
    return(config,Summary)

T1 = time.time()
i = 0

print('Processing ghg files ')
f = FloatProgress(min=0, max=1) 
display(f) 
# Walk through the directory to find all "raw" folders
for (root, dirs, files) in sorted(os.walk(Path)):
    # .ghg files are located at the end of each directory tree
    if root.__contains__('raw') and len(dirs)==0:
        for file in files:
            name, tag = file.split('.')
            if tag == 'ghg':  
                # read info in .ghg files and add to a dataframe
                if i == 0:
                    config,Records = read_GHG(root,name)#.copy()
                    i += 1
                elif i <= 5:
                    config,New_Record = read_GHG(root,name)#.copy()
                    Records = pd.concat(
                        [Records,New_Record],
                    axis=0,
                    ignore_index=True
                    )
                    i += 1
            f.value = i/len(files)

print('Processing time ', time.time()-T1) 
print('To inspect ', i, ' .ghg files')        
# Records.head()
Records.set_index('Timstamp')



Processing ghg files 


FloatProgress(value=0.0, max=1.0)

Processing time  9.639218807220459
To inspect  26  .ghg files


Attribute,site_name,altitude,latitude,longitude,canopy_height,displacement_height,roughness_length,station_name,logger_id,logger_sw_version,...,H2O_XS,H2O_Z,H2O_SD1,H2O_SD2,H2O_SD3,Pressure_A0,Pressure_A1,MaxRef_B,MaxRef_C,TimeStamp
0,BurnsBog2018,-7.4,49.1293869,-122.9848709,0.3,0,0.056,GeogFlux,LI-7200,8.9.0,...,-0.0012,-0.000196,0.0191,1.323,2.385,57.704,15.424,0.05,0.051,2022-06-17 12:09:14
1,BurnsBog2018,-7.4,49.1293869,-122.9848709,0.3,0,0.056,GeogFlux,LI-7200,8.9.0,...,-0.0012,-0.000196,0.0191,1.323,2.385,57.704,15.424,0.05,0.051,2022-06-17 12:30:00
2,BurnsBog2018,-6.7,49.12932968,-122.9848862,0.3,0,0.056,GeogFlux,LI-7200,8.9.0,...,-0.0012,-0.000196,0.0191,1.323,2.385,57.704,15.424,0.05,0.051,2022-06-17 13:00:00
3,BurnsBog2018,0.8,49.12933731,-122.9848938,0.3,0,0.056,GeogFlux,LI-7200,8.9.0,...,-0.0012,-0.000196,0.0191,1.323,2.385,57.704,15.424,0.05,0.051,2022-06-17 13:30:00
4,BurnsBog2018,0.8,49.12933731,-122.9848938,0.3,0,0.056,GeogFlux,LI-7200,8.9.0,...,-0.0012,-0.000196,0.0191,1.323,2.385,57.704,15.424,0.05,0.051,2022-06-17 14:00:00


In [None]:
# Records[]

plt.figure()
plt.plot()

These are all the values contained in the .METADATA, .DATA, .STATUS, system_config/co2app.conf files.

## Metadata Values

More info [here](https://www.licor.com/env/support/EddyPro/topics/metadata-file-editor.html)

Site
['site_name', 'altitude', 'latitude', 'longitude', 'canopy_height', 'displacement_height', 'roughness_length']

Station
['station_name', 'logger_id', 'logger_sw_version']

Timing
['acquisition_frequency', 'file_duration']

Instruments
['instr_1_manufacturer', 'instr_1_model', 'instr_1_sn', 'instr_1_sw_version', 'instr_1_id', 'instr_1_height', 'instr_1_wformat', 'instr_1_wref', 'instr_1_north_offset', 'instr_1_head_corr', 'instr_1_northward_separation', 'instr_1_eastward_separation', 'instr_1_vertical_separation', 'instr_2_manufacturer', 'instr_2_model', 'instr_2_sn', 'instr_2_id', 'instr_2_sw_version', 'instr_2_tube_length', 'instr_2_tube_diameter', 'instr_2_tube_flowrate', 'instr_2_northward_separation', 'instr_2_eastward_separation', 'instr_2_vertical_separation', 'instr_3_manufacturer', 'instr_3_model', 'instr_3_sn', 'instr_3_id', 'instr_3_sw_version', 'instr_3_tube_length', 'instr_3_tube_diameter', 'instr_3_tube_flowrate', 'instr_3_northward_separation', 'instr_3_eastward_separation', 'instr_3_vertical_separation']

FileDescription
['separator', 'flag_discards_if_above', 'header_rows', 'data_label', 'col_1_variable', 'col_1_instrument', 'col_1_measure_type', 'col_1_unit_in', 'col_1_conversion', 'col_1_min_value', 'col_1_max_value', 'col_1_unit_out', 'col_1_a_value', 'col_1_b_value', 'col_1_nom_timelag', 'col_1_min_timelag', 'col_1_max_timelag', ... this pattern then repeats for every subsequent column.  I don't think we need this info.]

## Data Values
More info on the data file [here](https://www.licor.com/env/support/LI-7200RS/topics/data-files.html); info on 7200/CSAT diagnositcs [here](https://www.licor.com/env/support/LI-7200RS/topics/gas-analyzer-diagnostics.html) and [here](https://www.licor.com/env/support/LI-7200RS/topics/diagnostics-eddypro.html#Gas) and on the 7700 diagnostics [here](https://www.licor.com/env/support/LI-7700/topics/data-files.html).

'Seconds', 'Nanoseconds', 'Sequence Number', 'Diagnostic Value',
'Diagnostic Value 2', 'CO2 Absorptance', 'H2O Absorptance',
'CO2 (mmol/m^3)', 'CO2 (mg/m^3)', 'H2O (mmol/m^3)', 'H2O (g/m^3)',
'Block Temperature (C)', 'Total Pressure (kPa)',
'Box Pressure (kPa)', 'Head Pressure (kPa)', 'Aux 1 - U (m/s)',
'Aux 2 - V (m/s)', 'Aux 3 - W (m/s)', 'Aux 4 - SOS (m/s)',
'Cooler Voltage (V)', 'Chopper Cooler Voltage (V)',
'Vin SmartFlux (V)', 'CO2 (umol/mol)', 'CO2 dry(umol/mol)',
'H2O (mmol/mol)', 'H2O dry(mmol/mol)', 'Dew Point (C)',
'Cell Temperature (C)', 'Temperature In (C)',
'Temperature Out (C)', 'Average Signal Strength',
'CO2 Signal Strength', 'H2O Signal Strength',
'Delta Signal Strength', 'Flow Rate (slpm)', 'Flow Rate (lpm)',
'Flow Pressure (kPa)', 'Flow Power (V)', 'Flow Drive (%)',
'H2O Sample', 'H2O Reference', 'CO2 Sample', 'CO2 Reference',
'HIT Power (W)', 'Vin HIT (V)', 'CH4 Seconds', 'CH4 (umol/mol)',
'CH4 (mmol/m^3)', 'CH4 Temperature', 'CH4 Pressure',
'CH4 Signal Strength', 'CH4 Diagnostic Value', 'CH4 Drop Rate (%)',
'CHK'

## Status Values

More info [here](https://www.licor.com/env/support/LI-7700/topics/data-files.html)

'MSEC', 'SECONDS', 'NANOSECONDS', 'DIAG', 'RSSI', 'REFRSSI',
'LCTSETPT', 'LCTACTUAL', 'BCTSETPT', 'BCTACTUAL', 'CHASSISTEMP',
'OPTICSTEMP', 'OPTICSRH', 'AUXREFTEMP', 'MOTORSETPT',
'MOTORACTUAL', 'USB', 'USBCAPACITY', 'USBFREESPACE', 'REF', 'GND',
'OPTICSTDELTA', 'BOTTOMHEATERW', 'TOPHEATERW', 'CHK'

## co2app.conf

Haven't been able to find any useful metadata about this file, but it contsains the timing of the current calibration and coeficient values
