In [1]:
# Read csv into a pandas dataframe

import pandas as pd

#df = pd.read_csv('Woods Lake Middle AllData.csv', encoding = 'unicode_escape')

url = "https://github.com/AquaticEcoDynamics/tassielakes-data/raw/main/data-lake/HT/Hydrology/Arthurs%20Lake%20Spillway%20(418.1)/WQ%20at%20Morass%20Bay%20(418.24)/Samples/WQSamples.csv"

df = pd.read_csv(url, encoding = 'unicode_escape')

df = df.reset_index(drop=True)

In [2]:
df['SampleValues.Val'] = pd.to_numeric(df['SampleValues.Val'], errors='coerce')

# # Filter rows where Params.Name is "Cobalt - Total/NonFilt as Co" and Params.Units is "ug/l"
# condition = (df['Params.Name'] == "Cobalt - Total/NonFilt as Co") & (df['Params.Units'] == "ug/l")

# # Check if there are any rows to convert
# if condition.any():
#     # Convert the values from ug/l to mg/l using .loc to modify the original DataFrame
#     df.loc[condition, 'SampleValues.Val'] *= 0.001
#     df.loc[condition, 'Params.Units'] = "mg/l"

In [3]:
## processed_data
# Create unique variable datafiles

import os
import numpy as np

mapping_keys_df = pd.read_csv("mapping_keys.csv")

def filter_and_save_data(df, variable_name, output_filename):
    # Filter rows where the variable is equal to the specified variable_name
    variable_data = df.loc[df['Variable'] == variable_name]

    # Extract columns needed for the filtered data
    filtered_data = variable_data.loc[:, ["Variable", "Date", "Depth", "Data", "QC"]]

    # Replace empty cells with NaN
    filtered_data.replace("", np.nan, inplace=True)

    # Set Data value to NaN where QC value is 140
    filtered_data.loc[filtered_data["QC"] == "140", "Data"] = np.nan

    # Convert value of different units
    if variable_name in mapping_keys_df['Params.Name'].values:
        conv_factor = mapping_keys_df.loc[mapping_keys_df['Params.Name'] == variable_name, 'Conv'].iloc[0]
        filtered_data['Data'] = pd.to_numeric(filtered_data['Data'], errors='coerce')  # Convert non-numeric values to NaN
        filtered_data['Data'] *= conv_factor

    # Specify the directory path
    directory = 'processed_data'
    
    # Create the directory if it doesn't exist
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Write the filtered DataFrame to a CSV file in the specified directory
    filtered_data.to_csv(os.path.join(directory, output_filename), index=False)

# Create unique variable datafiles
required_columns = df.loc[1:, ["Params.Name", "Samples.DateAndTime", "SampleDepths.Depth", "SampleValues.Val", "SampleValues.Quality"]]
required_columns.columns = ["Variable", "Date", "Depth", "Data", "QC"]

# Convert the 'Date' column to datetime format
required_columns['Date'] = pd.to_datetime(required_columns['Date'], format='%d/%m/%Y %H:%M', errors='coerce')

# Convert date to desired format
required_columns['Date'] = required_columns['Date'].dt.strftime('%Y-%m-%d %H:%M:%S')

required_columns = required_columns.sort_values(by='Date')

# Replace empty cells with NaN
required_columns.replace("", np.nan, inplace=True)

# Specify the variables needed
first_column = mapping_keys_df.iloc[:, 0]
first_column.values.tolist()
print(first_column)

# Iterate over variables, find the key value, and save filtered data
for variable in first_column:
    # Filter mapping_keys_df to find the row corresponding to the variable
    key_row = mapping_keys_df[mapping_keys_df['Params.Name'] == variable]
    
    # Extract the key value from the row
    key_value = key_row['Key Value'].values[0] if not key_row.empty else None
    
    # Construct the output filename
    output_filename = f'ArthursLakeSpillway_{key_value.replace(" ", "")}_profile_Data.csv'
    
    # Filter and save data
    filter_and_save_data(required_columns, variable, output_filename)

0                           Water Temperature
1                       Nitrogen - Total as N
2                     Phosphorus as P - Total
3              Suspended Solids - Total (TSS)
4                                   Turbidity
                       ...                   
81    Bacillariophyta (Tabellaria flocculosa)
82          Cyanobacteria (Aphanizomenon sp.)
83            Ochrophyta (Ophiocytiaceae sp.)
84               PAR uni-directional in water
85                             Sample bottles
Name: Params.Name, Length: 86, dtype: object


In [18]:
#test = pd.read_csv('processed_data/WoodsLakeMiddle_Water Temperature_profile_Data.csv', encoding = 'unicode_escape')