This notebook creates a csv with the Rrs data from all the HydroLight printout files in a given folder.

In [1]:
import numpy as np
import pandas as pd
import os
import re
import xarray as xr

`HEfilepath` is the full path to the folder where all the hydrolight printout files are saved - **change this to where the files are saved on your computer**

In [2]:
HEfilepath = '/mnt/storage/labs/mitchell/cmitchell/projects/git-repos/lab-guides-examples-etc/HydroLight/data/HEoutputfiles/'
HEfiles = os.listdir(HEfilepath)

In [3]:
HEfiles

['P23.txt',
 'P0.txt',
 'P1.txt',
 'P2.txt',
 'P3.txt',
 'P4.txt',
 'P5.txt',
 'P6.txt',
 'P7.txt',
 'P8.txt',
 'P9.txt',
 'P10.txt',
 'P11.txt',
 'P12.txt',
 'P13.txt',
 'P14.txt',
 'P15.txt',
 'P16.txt',
 'P17.txt',
 'P18.txt',
 'P19.txt',
 'P20.txt',
 'P21.txt',
 'P22.txt']

# Setup data frame rows and columns

Row and column names - **the wavelengths will probably need to be changed to match what is in your HydroLight printout files.**

In [8]:
wavelengths = np.linspace(422.5,742.5,65)

In [9]:
wavelengths

array([422.5, 427.5, 432.5, 437.5, 442.5, 447.5, 452.5, 457.5, 462.5,
       467.5, 472.5, 477.5, 482.5, 487.5, 492.5, 497.5, 502.5, 507.5,
       512.5, 517.5, 522.5, 527.5, 532.5, 537.5, 542.5, 547.5, 552.5,
       557.5, 562.5, 567.5, 572.5, 577.5, 582.5, 587.5, 592.5, 597.5,
       602.5, 607.5, 612.5, 617.5, 622.5, 627.5, 632.5, 637.5, 642.5,
       647.5, 652.5, 657.5, 662.5, 667.5, 672.5, 677.5, 682.5, 687.5,
       692.5, 697.5, 702.5, 707.5, 712.5, 717.5, 722.5, 727.5, 732.5,
       737.5, 742.5])

In [11]:
wavelengths = [412.5, 429, 443, 453.5, 471, 488, 509.5, 531, 540.5, 547, 551, 555, 563.5, 577.5, 592.5, 607.5,
               622.5, 637.5, 653.5, 667, 676.5, 692, 713, 733, 748]

run = [ss[:-4] for ss in HEfiles]

# Reading the HE output files

The printout files have a weird format - very human readable, not very computer readable.

Approach: read files line by line until it reaches a specific string (e.g. "Rrs = Lw/Ed"), records line number, and then saves the data from a certain line after the line with the string (this line to save the data from changes depending on the string - have a look at a printout file along with the code)

In [12]:
Rrsdf = pd.DataFrame()
for HEf in HEfiles:
    Rrslist = []
    linenum = -1
    with open(HEfilepath+HEf,'r') as f:
        for lineno,line in enumerate(f):
            if 'Rrs = Lw/Ed' in line:
                linenum = lineno+3
            elif lineno == linenum:
                Rrslist += [float(line.split()[-1])]

    Rrsdf[HEf[1:9]] = Rrslist         
    
Rrsdf.columns = run
Rrsdf.index = wavelengths
Rrsdf.index.name = 'Wavelength'

In [25]:
Rrsdf

Unnamed: 0_level_0,P23,P0,P1,P2,P3,P4,P5,P6,P7,P8,...,P13,P14,P15,P16,P17,P18,P19,P20,P21,P22
Wavelength,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
412.5,0.005365,0.00774,0.002732,0.001656,0.001188,0.01,0.00652,0.004814,0.003809,0.01039,...,0.00342,0.00228,0.001709,0.0091,0.006395,0.004907,0.003974,0.009762,0.007691,0.006327
429.0,0.006109,0.007898,0.003034,0.001873,0.001355,0.01123,0.007498,0.005597,0.004455,0.01184,...,0.003524,0.002448,0.001874,0.009557,0.006972,0.005464,0.004484,0.01064,0.008556,0.007136
443.0,0.006793,0.007923,0.003294,0.002073,0.001512,0.0123,0.008397,0.006337,0.005076,0.01317,...,0.003598,0.002586,0.002017,0.009925,0.007463,0.005955,0.004944,0.01139,0.009321,0.007866
453.5,0.007538,0.008381,0.003582,0.00227,0.001661,0.01338,0.009248,0.00702,0.005641,0.0144,...,0.003929,0.002838,0.002219,0.01084,0.008219,0.00659,0.005487,0.01247,0.01027,0.008704
471.0,0.008712,0.008802,0.004041,0.00261,0.001927,0.01515,0.01073,0.00825,0.006679,0.01653,...,0.004213,0.003133,0.00249,0.01177,0.009173,0.007482,0.006302,0.01388,0.01162,0.00997
488.0,0.01033,0.009646,0.004636,0.003033,0.002252,0.0173,0.0125,0.009722,0.007922,0.01901,...,0.004908,0.003674,0.002931,0.01364,0.01076,0.008846,0.007488,0.01611,0.01362,0.01176
509.5,0.0127,0.009406,0.005106,0.003484,0.00264,0.01931,0.0145,0.01153,0.009535,0.02184,...,0.005877,0.004457,0.00358,0.0162,0.01302,0.01083,0.009239,0.01921,0.01645,0.01435
531.0,0.01544,0.009291,0.005592,0.003977,0.003082,0.02142,0.01667,0.01357,0.0114,0.02491,...,0.006905,0.005332,0.004329,0.01889,0.01549,0.01307,0.01127,0.02257,0.01961,0.0173
540.5,0.01685,0.009322,0.00583,0.004218,0.003299,0.02244,0.01772,0.01456,0.01232,0.02638,...,0.007457,0.005799,0.004728,0.02027,0.01676,0.01423,0.01232,0.02427,0.02121,0.0188
547.0,0.01778,0.009043,0.005862,0.004316,0.00341,0.02279,0.01822,0.01511,0.01286,0.02714,...,0.007757,0.006078,0.004978,0.02107,0.01754,0.01497,0.01301,0.02532,0.02224,0.01978


**CHANGE the below to where you'd like the data to be saved, currently it is set to save in a file called 'Rrs_data.csv'**

In [27]:
Rrsdf.to_csv('Rrs_data.csv')