## Read .ghg metadata

In [1]:
import os
import time
import pandas as pd
from Parse_GHG import read_GHG
from ipywidgets import FloatProgress
from IPython.display import display

# Site = 'BB2'

# Values we expect to change
dynamicMetaData_Headers = ['altitude','canopy_height']

# Values we want to fix
staticMetaData_FloatHeaders = ['altitude', 'latitude', 'longitude']
staticMetaData_TextHeaders = ['site_name']

T1 = time.time()
print('Processing ghg files ')
f = FloatProgress(min=0, max=1) 
display(f) 
# Walk through the directory to find all "raw" folders

N_recs = 4#8*31
def find_new_GHG(Site,make_new=False):

    i = 0
    Dpath = 'C:\\highfreq\\'
    raw_dir = Dpath+Site+'\\raw\\'
    meta_dir = Dpath+Site+'\\metadata\\'
    meta_file = meta_dir+'GHGMetaData.csv'
    channel_file = meta_dir+'EP_Channels.csv'

    if os.path.isfile(meta_file) and make_new is False:
        Records = pd.read_csv(meta_file,parse_dates=['TimeStamp'])
        Channels = pd.read_csv(channel_file)#,parse_dates=['TimeStamp'])
        FileNames = Records['filename'].tolist()
    else:
        Records = pd.DataFrame()
        Channels = pd.DataFrame()
        FileNames = []
        if not os.path.exists(meta_dir):
            os.mkdir(meta_dir)
    

    for (root, dirs, files) in sorted(os.walk(raw_dir)):
        # if root.__contains__('raw') :
        for file in files:
            name, tag = file.split('.')
            # .ghg files are located at the end of each directory tree
            # Avoids reading any that might be misplaced elsewhere
            if tag == 'ghg' and len(dirs)==0 and file not in FileNames:  
                
                # if i <= 48*5:
                if i <= 2:
                    GHG_out = read_GHG(root,name)
                    Records = pd.concat(
                        [Records,GHG_out.Summary],
                        axis=0,
                        ignore_index=True
                    )
                    Channels = pd.concat(
                        [Channels,GHG_out.Channels],
                        axis=0,
                        ignore_index=True
                    )
                    i += 1
            f.value = i/len(files)
    return(Records,Channels,meta_dir,i)

Records,Channels,meta_dir,i = find_new_GHG('BB',make_new=True)

Records.to_csv(meta_dir+'GHGMetaData.csv',index=False)
Channels.to_csv(meta_dir+'EP_Channels.csv',index=False)

elapsed = time.time()-T1
print('Processing time ', elapsed) 
print('To inspect ', i, ' .ghg files')
print('Seconds/file ', elapsed/(i+1)) 

staticMetaData = Records[staticMetaData_FloatHeaders].astype(float).mean()
staticMetaData = pd.concat([Records[staticMetaData_TextHeaders].astype(str).mode().T[0],staticMetaData])
print(staticMetaData)

dynamicMetaData = Records.set_index('TimeStamp')[dynamicMetaData_Headers]
dynamicMetaData['date']=dynamicMetaData.index.date
dynamicMetaData['time']=dynamicMetaData.index.time
dynamicMetaData.to_csv(meta_dir+'dynamicMetaData.csv',index=False)
dynamicMetaData

# Processing time  65.87000775337219
# To inspect  241  .ghg files

Records['altitude'].astype(float).describe()


Processing ghg files 


FloatProgress(value=0.0, max=1.0)

Processing time  1.131380558013916
To inspect  3  .ghg files
Seconds/file  0.282845139503479
Attribute
site_name    BurnsBog2018
altitude        -7.166667
latitude        49.129368
longitude     -122.984876
dtype: object


count    3.000000
mean    -7.166667
std      0.404145
min     -7.400000
25%     -7.400000
50%     -7.400000
75%     -7.050000
max     -6.700000
Name: altitude, dtype: float64

In [2]:
Channels = Channels.set_index('filename')
Channels[Channels.diff()!=0]

Unnamed: 0_level_0,CO2 dry(umol/mol),H2O dry(mmol/mol),CH4 (mmol/m^3),N2O (mmol/m^3),Temperature In (C),Temperature Out (C),Total Pressure (kPa),CH4 Temperature,CH4 Pressure,Cell Temperature (C),Diagnostic Value,CH4 Diagnostic Value,Anemometer Diagnostics,Diagnostic Value 2
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-06-17T120914_LI-7200.ghg,26.0,28.0,50.0,0.0,31.0,32.0,15.0,51.0,52.0,30.0,4.0,54.0,0.0,5.0
2022-06-17T123000_LI-7200.ghg,,,,,,,,,,,,,,
2022-06-17T130000_LI-7200.ghg,,,,,,,,,,,,,,


In [10]:
s = pd.Series([0, 1, 1, 1, 1, 2, 2, 2, 3, 4, 3, 4, 3, 4, 3, 4, 5, 5, 5])

s[s.diff()[s.diff() != 0].index.values]

0     0
1     1
5     2
8     3
9     4
10    3
11    4
12    3
13    4
14    3
15    4
16    5
dtype: int64