## Read .ghg metadata

In [5]:
import os
import time
import pandas as pd
from Parse_GHG import read_GHG
from ipywidgets import FloatProgress
from IPython.display import display

Dpath = 'C:\\highfreq\\'
Site = 'BB2'
raw_dir = Dpath+Site+'\\raw\\'
meta_dir = Dpath+Site+'\\metadata\\'

# Values we expect to change
dynamicMetaData_Headers = ['altitude','canopy_height']

# Values we want to fix
staticMetaData_FloatHeaders = ['altitude', 'latitude', 'longitude']
staticMetaData_TextHeaders = ['site_name']

T1 = time.time()
i = 0
print('Processing ghg files ')
f = FloatProgress(min=0, max=1) 
display(f) 
# Walk through the directory to find all "raw" folders

N_recs = 10#48*5

for (root, dirs, files) in sorted(os.walk(raw_dir)):
    # if root.__contains__('raw') :
    for file in files:
        name, tag = file.split('.')
         # .ghg files are located at the end of each directory tree
         # Avoids reading any that might be misplaced elsewhere
        if tag == 'ghg' and len(dirs)==0:  
            # read info in .ghg files and add to a dataframe
            if i == 0:
                GHG_out = read_GHG(root,name)
                Records = GHG_out.Summary.copy()
                config_template = GHG_out.config   
                i += 1
            elif i <= N_recs:
                GHG_out = read_GHG(root,name)
                Records = pd.concat(
                    [Records,GHG_out.Summary],
                axis=0,
                ignore_index=True
                )
                i += 1
        f.value = i/len(files)

print('Processing time ', time.time()-T1) 
print('To inspect ', i, ' .ghg files')

staticMetaData = Records[staticMetaData_FloatHeaders].astype(float).mean()
staticMetaData = pd.concat([Records[staticMetaData_TextHeaders].astype(str).mode().T[0],staticMetaData])
print(staticMetaData)

dynamicMetaData = Records.set_index('TimeStamp')[dynamicMetaData_Headers]
dynamicMetaData['date']=dynamicMetaData.index.date
dynamicMetaData['time']=dynamicMetaData.index.time
dynamicMetaData.to_csv(meta_dir+'dynamicMetaData.csv',index=False)
dynamicMetaData


Processing ghg files 


FloatProgress(value=0.0, max=1.0)

Processing time  65.87000775337219
To inspect  241  .ghg files
Attribute
site_name     Burns Bog
altitude       0.639004
latitude       49.11906
longitude   -122.995199
dtype: object


Attribute,altitude,canopy_height,date,time
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-05-19 11:35:30,4.6,0.65,2022-05-19,11:35:30
2022-05-19 12:00:00,4.6,0.65,2022-05-19,12:00:00
2022-05-19 12:30:00,8.3,0.65,2022-05-19,12:30:00
2022-05-19 13:00:00,8.3,0.65,2022-05-19,13:00:00
2022-05-19 13:30:00,17.6,0.65,2022-05-19,13:30:00
...,...,...,...,...
2022-05-24 09:30:00,-11,0.65,2022-05-24,09:30:00
2022-05-24 10:00:00,0,0.65,2022-05-24,10:00:00
2022-05-24 10:30:00,0,0.65,2022-05-24,10:30:00
2022-05-24 11:00:00,2.2,0.65,2022-05-24,11:00:00


In [7]:
# Static = []
# Non_Static = []



for r in Records.columns:
    if len(Records[r].unique())>1 and r in GHG_out.MetaData_Tags:
        print(r,len(Records[r].unique()))

altitude 101
latitude 62
longitude 53
instr_2_tube_flowrate 134
col_10_nom_timelag 239
col_10_min_timelag 238
col_10_max_timelag 239
col_12_nom_timelag 239
col_12_min_timelag 239
col_12_max_timelag 239
col_25_nom_timelag 239
col_25_min_timelag 238
col_25_max_timelag 239
col_26_nom_timelag 239
col_26_min_timelag 238
col_26_max_timelag 239
col_27_nom_timelag 239
col_27_min_timelag 239
col_27_max_timelag 239
col_28_nom_timelag 239
col_28_min_timelag 239
col_28_max_timelag 239
