In [33]:
import pandas as pd
import os

def process_group(group, dir_name, mapping_keys_file, variable_name):
    # Create an empty list for content
    df_list = []
    for group_name in group:
        # Iterate through files in the directory
        for file in os.listdir(dir_name):
            # Check for 'Data' and the group name in the file name
            if 'Data' in file and group_name in file:
                df = pd.read_csv(os.path.join(dir_name, file))
                print(df['Variable'].iloc[0])
                df_list.append(df)

    # Concatenate all dataframes in the list
    final_content = pd.concat(df_list, ignore_index=True)

    df = final_content
    df['Variable'] = variable_name

    df = df.sort_values(by='Date')

    df = df.groupby('Date').agg({
        'Variable': 'first',
        'Depth': 'mean',
        'Data': 'sum',
        'QC': 'mean'
    }).reset_index()

    # Reorder columns to match the original order
    df = df[['Variable', 'Date', 'Depth', 'Data', 'QC']]

    # Read the mapping keys (if required for further processing)
    mapping_keys_df = pd.read_csv(mapping_keys_file)

    # Convert value of different units
    conv_factor = mapping_keys_df.loc[mapping_keys_df['Params.Name'] == variable_name, 'Conv'].iloc[0]
    df['Data'] = pd.to_numeric(df['Data'], errors='coerce')  # Convert non-numeric values to NaN
    df['Data'] *= conv_factor
    print(df)

    name = mapping_keys_df.loc[mapping_keys_df['Params.Name'] == variable_name, 'Key Value'].iloc[0]
    # Construct the output filename
    output_filename = f'WoodsLakeMiddle_{name.replace(" ","")}_profile_Data.csv'

    # Write the filtered DataFrame to a CSV file in the specified directory
    df.to_csv(os.path.join(dir_name, output_filename), index=False)

# Directory and mapping keys file
dir_name = "../../../data-warehouse/csv/ht/wlwq"
mapping_keys_file = "mapping_keys.csv"

In [34]:
## Dolichospermum

# List of groups to process
groups = [
    "Cyanobacteria(Dolichospermumsp.)"
]

variable_name = 'Dolichospermum'

# Process groups
process_group(groups, dir_name, mapping_keys_file, variable_name)

Cyanobacteria (Dolichospermum sp.)
          Variable                 Date  Depth          Data   QC
0   Dolichospermum  2022-04-29 12:00:00    0.2  1.202500e-05  4.0
1   Dolichospermum  2022-05-27 13:15:00    0.2  3.575000e-05  4.0
2   Dolichospermum  2022-08-30 12:40:00    0.2  3.055000e-06  4.0
3   Dolichospermum  2023-02-24 11:40:00    0.2  8.125000e-05  4.0
4   Dolichospermum  2023-03-09 11:50:00    0.2  1.072500e-04  4.0
5   Dolichospermum  2023-03-20 12:15:00    0.2  6.175000e-05  4.0
6   Dolichospermum  2023-03-24 11:25:00    0.2  2.470000e-04  4.0
7   Dolichospermum  2023-03-31 11:15:00    0.2  1.690000e-04  4.0
8   Dolichospermum  2023-04-05 12:25:00    0.2  2.112500e-04  4.0
9   Dolichospermum  2023-04-14 11:20:00    0.2  2.535000e-04  4.0
10  Dolichospermum  2023-04-20 13:25:00    0.2  2.340000e-04  4.0
11  Dolichospermum  2023-04-28 11:30:00    0.2  2.730000e-04  4.0
12  Dolichospermum  2023-05-12 12:15:00    0.2  4.225000e-04  4.0
13  Dolichospermum  2023-05-24 15:25:00  

In [35]:
## Chlorophyte

# List of groups to process
groups = [
    "Unidentifiedchlorophyte",
    "Chlorophyta("
]

variable_name = 'Chlorophyte'

# Process groups
process_group(groups, dir_name, mapping_keys_file, variable_name)

Unidentified chlorophyte
Chlorophyta (Sphaerocystis sp.)
Chlorophyta (Golenkiniopsis sp.)
Chlorophyta (Ulothrix sp.)
Chlorophyta (Chodatella sp.)
Chlorophyta (Staurastrum sp.)
Chlorophyta (Chlorogonium spp.)
Chlorophyta (Chlamydomonas sp.)
Chlorophyta (Botryococcus sp.)
Chlorophyta (Dictyosphaerium sp.)
Chlorophyta (Comasiella sp.)
Chlorophyta (Closterium sp.)
Chlorophyta (Closteriopsis sp.)
Chlorophyta (Staurodesmus sp.)
Chlorophyta (Oocystis sp.)
Chlorophyta (Nephrocytium sp.)
Chlorophyta (Sphaerellopsis sp.)
Chlorophyta (Paulschulzia sp.)
Chlorophyta (Mougeotia sp.)
Chlorophyta (Gonium sp.)
Chlorophyta (Monoraphidium sp.)
Chlorophyta (Scenedesmus sp.)
Chlorophyta (Pandorina sp.)
Chlorophyta (Desmodesmus sp.)
Chlorophyta (Planktosphaeria)
Chlorophyta (Micractinium sp.)
Chlorophyta (Pyramimonas sp.)
Chlorophyta (Microspora sp.)
Chlorophyta (Sphaerozosma vertebratum)
Chlorophyta (Kirchneriella sp.)
Chlorophyta (Golenkinia sp.)
Chlorophyta (Tetraedron sp.)
Chlorophyta (Ankistrodesmus sp

In [36]:
## Picoplankton

# List of groups to process
groups = [
    "Unidentifiedpicocyanobacteria",
    "Cyanobacteria(Aphanothece",
    "Cyanobacteria(Rhabdoderma",
    "Cyanobacteria(Synechococcus"
]

variable_name = 'Picoplankton'

# Process groups
process_group(groups, dir_name, mapping_keys_file, variable_name)

Unidentified pico cyanobacteria
Cyanobacteria (Aphanothece sp. 1)
Cyanobacteria (Aphanothece sp.)
Cyanobacteria (Rhabdoderma sp.)
Cyanobacteria (Synechococcus sp.)
        Variable                 Date  Depth          Data   QC
0   Picoplankton  2014-01-08 15:00:00    0.2  2.475000e-06  4.0
1   Picoplankton  2014-05-12 13:45:00    0.2  7.700000e-07  4.0
2   Picoplankton  2014-07-23 09:40:00    0.2  3.575000e-07  4.0
3   Picoplankton  2014-09-18 09:50:00    0.2  1.650000e-07  4.0
4   Picoplankton  2014-11-21 11:30:00    0.2  2.145000e-07  4.0
..           ...                  ...    ...           ...  ...
72  Picoplankton  2023-04-28 11:30:00    0.2  5.775000e-06  4.0
73  Picoplankton  2023-05-12 12:15:00    0.2  4.400000e-07  4.0
74  Picoplankton  2023-05-24 15:25:00    0.2  3.025000e-08  4.0
75  Picoplankton  2023-08-10 13:05:00    0.2  1.925000e-09  4.0
76  Picoplankton  2023-10-17 16:00:00    0.2  1.237500e-07  4.0

[77 rows x 5 columns]


In [37]:
## Diatom (Bacillariophyta)

# List of groups to process
groups = [
    "Bacillariophyta(",
    "BacillariophytaUnid"
]

variable_name = 'Diatom (Bacillariophyta)'

# Process groups
process_group(groups, dir_name, mapping_keys_file, variable_name)

Bacillariophyta (Ulnaria sp.)
Bacillariophyta (Achnanthes sp.)
Bacillariophyta (Aulacoseira sp.)
Bacillariophyta (Gomphonema sp.)
Bacillariophyta (Cyclotella sp.)
Bacillariophyta (Diatoma sp.)
Bacillariophyta (Urosolenia sp.)
Bacillariophyta (Synedra sp.)
Bacillariophyta (Nitzschia sp.)
Bacillariophyta (Asterionella sp.)
Bacillariophyta (Fragilaria sp.)
Bacillariophyta (Navicula sp.)
Bacillariophyta (Cymbella hauckii)
Bacillariophyta Unid Diatom-Bacillariale
                    Variable                 Date  Depth          Data   QC
0   Diatom (Bacillariophyta)  2014-01-08 15:00:00    0.2  2.000000e-07  4.0
1   Diatom (Bacillariophyta)  2014-05-12 13:45:00    0.2  2.020000e-06  4.0
2   Diatom (Bacillariophyta)  2014-07-23 09:40:00    0.2  1.140400e-04  4.0
3   Diatom (Bacillariophyta)  2014-09-18 09:50:00    0.2  2.400000e-04  4.0
4   Diatom (Bacillariophyta)  2014-11-21 11:30:00    0.2  4.000000e-08  4.0
..                       ...                  ...    ...           ...  ...
91  D

In [38]:
## Cyanobacteria

# List of groups to process
groups = [
    "PhytoplanktonBiomass(doli)",
    "PhytoplanktonBiomass(pico)"
]

variable_name = 'Cyanobacteria'

# Process groups
process_group(groups, dir_name, mapping_keys_file, variable_name)

Dolichospermum
Picoplankton
          Variable                 Date  Depth          Data   QC
0    Cyanobacteria  2014-01-08 15:00:00    0.2  2.475000e-06  4.0
1    Cyanobacteria  2014-05-12 13:45:00    0.2  7.700000e-07  4.0
2    Cyanobacteria  2014-07-23 09:40:00    0.2  3.575000e-07  4.0
3    Cyanobacteria  2014-09-18 09:50:00    0.2  1.650000e-07  4.0
4    Cyanobacteria  2014-11-21 11:30:00    0.2  2.145000e-07  4.0
..             ...                  ...    ...           ...  ...
100  Cyanobacteria  2024-01-08 14:55:00    0.2  1.787500e-04  4.0
101  Cyanobacteria  2024-01-18 15:30:00    0.2  2.860000e-04  4.0
102  Cyanobacteria  2024-01-25 11:20:00    0.2  3.575000e-04  4.0
103  Cyanobacteria  2024-02-08 15:05:00    0.2  3.900000e-04  4.0
104  Cyanobacteria  2024-02-16 10:20:00    0.2  3.575000e-04  4.0

[105 rows x 5 columns]
