In [47]:
import os
import pandas as pd
from pyproj import Proj, transform

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    selected_columns = ['Agency Code', 'Program', 'Project', 'Tag', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference', 'Variable ID']
    header_df = header_df[selected_columns]
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    # Rename columns as requested
    combined_df.rename(columns={
        'Tag': 'Agency',
        'Agency Code': 'Agency_Code',
        'Program': 'Program_Name',
        'Project': 'Program_Code',
        'Date': 'Date',
        'Data': 'Data',
        'Depth': 'Depth',
        'Variable ID': 'Variable_ID',
        'Variable': 'Variable_Name',
        'Data File Name': 'Data_File_Name',
        'Location': 'Data_File_Location',
        'Station Status': 'Status',
        'Lat': 'Lat',
        'Long': 'Lon',
        'Time Zone': 'Time_Zone',
        'Vertical Datum': 'Vertical_Datum',
        'National Station ID': 'Station_ID',
        'Site Description': 'Site_Description',
        'Deployment': 'Deployment',
        'Deployment Position': 'Deployment_Postion',
        'Vertical Reference': 'Vertical_Reference'
    }, inplace=True)
    
    # Add new columns with default values
    combined_df['calc_SMD'] = 0.0
    combined_df['mAHD'] = 0.0
    
    # Convert Lat and Lon to UTM coordinates (XUTM, YUTM)
    in_proj = Proj(init='epsg:4326')  # WGS84 coordinates (Lat, Lon)
    out_proj = Proj(init='epsg:32755')  # UTM Zone 55S
    
    def convert_to_utm(lat, lon):
        x, y = transform(in_proj, out_proj, lon, lat)
        return x, y
    
    combined_df['XUTM'], combined_df['YUTM'] = zip(*combined_df.apply(lambda row: convert_to_utm(row['Lat'], row['Lon']), axis=1))

    # Load variable key data
    variable_key_df = pd.read_csv('../../data-governance/variable_key.csv')
    
    # Merge with variable key to get Units
    combined_df = pd.merge(combined_df, variable_key_df[['Variable ID', 'Units']], left_on='Variable_ID', right_on='Variable ID', how='left')
    
    # Rearrange columns as requested
    column_order = ['Agency', 'Agency_Code', 'Program_Name', 'Program_Code', 'Date', 'Data', 'Depth', 'Variable_ID', 'Variable_Name', 'Units', 'Data_File_Name', 'Data_File_Location', 'Status', 'Lat', 'Lon', 'XUTM', 'YUTM', 'Time_Zone', 'Vertical_Datum', 'Station_ID', 'Site_Description', 'Deployment', 'Deployment_Postion', 'Vertical_Reference', 'calc_SMD', 'mAHD']
    combined_df = combined_df[column_order]
    
    return combined_df

def find_csv_header_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Header.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

def find_csv_data_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Data.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

directory = "../../data-warehouse/csv/ht"

header_files = find_csv_header_files(directory)
data_files = find_csv_data_files(directory)

combined_df = pd.DataFrame([])
csv_file_path = "../../data-warehouse/parquet/agency/test_csiem_HT_public.csv"

# Match header files with corresponding data files
for header_file in header_files:
    # Construct the corresponding data file path
    data_file = header_file.replace("_Header.csv", "_Data.csv")
    
    # Ensure the data file exists before processing
    if data_file in data_files:
        print( f"start processing: {header_file} and {data_file}")
        combined_df = merge_header_and_data(header_file, data_file)

        if os.path.exists(csv_file_path):
            combined_df.to_csv(csv_file_path, mode='a', header=None, index=False)
        else:
            combined_df.to_csv(csv_file_path, mode='a', index=False)

        print( f"{header_file} and {data_file} processed")
    else:
        print(f"Data file not found for {header_file}")

# Export combined_df as Parquet file
final_df = pd.read_csv(csv_file_path)
parquet_file_path = "../../data-warehouse/parquet/agency/test_csiem_HT_public.parq"
print(f"Combined DataFrame exported to {parquet_file_path}")
final_df.to_parquet(parquet_file_path, index=False)

start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedfilamentouscyanobacteria_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedfilamentouscyanobacteria_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedfilamentouscyanobacteria_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedfilamentouscyanobacteria_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyll-a_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyll-a_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyll-a_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyll-a_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_DissolvedOxygen_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_DissolvedOxygen_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_DissolvedOxygen_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_DissolvedOxygen_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Charophyta(Elakatothrixsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Charophyta(Elakatothrixsp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Charophyta(Elakatothrixsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Charophyta(Elakatothrixsp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Mougeotiasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Mougeotiasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Mougeotiasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophy

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Dinophyta(Peridiniumsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Dinophyta(Peridiniumsp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_pH_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_pH_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_pH_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_pH_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Uroglenasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Uroglenasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Uroglenasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Uroglenasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Kephyrionsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Kephyrionsp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Kephyrionsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Kephyrionsp.)_profile_Data.csv processed
st

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_AirTemperature_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_AirTemperature_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WindSpeedEstimate_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WindSpeedEstimate_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WindSpeedEstimate_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WindSpeedEstimate_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Nitrate_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Nitrate_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Nitrate_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Nitrate_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chrysophyta(Dinobryonsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chrysophyta(Dinobryonsp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chrysophyta(Dinobryonsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chrysophyta(Dinobryonsp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Aphanizomenonsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Aphanizomenonsp.)_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Aphanizomenonsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Aphanizomenonsp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Botryococcussp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Botryococcussp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Botryococcussp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Botryococcussp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Dictyosphaeriumsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Dictyosphaeriumsp.)_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Dictyosphaeriumsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Dictyosphaeriumsp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Aphanothecesp.1)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Aphanothecesp.1)_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Aphanothecesp.1)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Aphanothecesp.1)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chromista(Unidentifieddinoflagellate)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chromista(Unidentifieddinoflagellate)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chromista(Unidentifieddinoflagellate)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chromista(Unidentifieddinoflagellate)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Discharge(Inflow)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Discharge(Inflow)_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Discharge(Inflow)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Discharge(Inflow)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalKjeldahiNitrogen_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalKjeldahiNitrogen_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalKjeldahiNitrogen_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalKjeldahiNitrogen_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Samplebottles_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Samplebottles_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Samplebottles_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Samplebottles_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedchlorophyte_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedchlorophyte_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedchlorophyte_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedchlorophyte_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Micractiniumsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Micractiniumsp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Micractiniumsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Micractiniumsp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WaterSurfaceHeight_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WaterSurfaceHeight_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WaterSurfaceHeight_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WaterSurfaceHeight_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cryptophyta(Cryptomonassp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cryptophyta(Cryptomonassp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cryptophyta(Cryptomonassp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cryptophyta(Cryptomonassp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Euglenophyta(Euglenasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Euglenophyta(Euglenasp.)_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Euglenophyta(Euglenasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Euglenophyta(Euglenasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WaterSurfaceCondition_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WaterSurfaceCondition_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WaterSurfaceCondition_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WaterSurfaceCondition_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Achnanthessp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Achnanthessp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Achnanthessp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Dinophyta(Gymnodiniumsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Dinophyta(Gymnodiniumsp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Desmodesmussp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Desmodesmussp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Desmodesmussp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Desmodesmussp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_PARuni-directionalinwater_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_PARuni-directionalinwater_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_PARuni-directionalinwater_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_PARuni-directionalinwater_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_O2Saturation_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_O2Saturation_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_O2Saturation_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_O2Saturation_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Crucigeniasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Crucigeniasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Crucigeniasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Crucigeniasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Spiniferomonassp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Spiniferomonassp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Spiniferomonassp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochroph

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Temperature_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Temperature_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Discharge(SpillwayDischarge)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Discharge(SpillwayDischarge)_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Discharge(SpillwayDischarge)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Discharge(SpillwayDischarge)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(TotalBlueGreenAlgae)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(TotalBlueGreenAlgae)_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(TotalBlueGreenAlgae)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(TotalBlueGreenAlgae)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Monoraphidiumsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Monoraphidiumsp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Monoraphidiumsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Monoraphidiumsp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Naviculasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Naviculasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Naviculasp.)_profile_Header.csv and ../../d

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Oocystissp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Oocystissp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Scenedesmussp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Scenedesmussp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Scenedesmussp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Scenedesmussp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Salinity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Salinity_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Salinity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Salinity_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Cosmariumsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Cosmariumsp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Cosmariumsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Cosmariumsp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalPhosphorus_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalPhosphorus_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalPhosphorus_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalPhosphorus_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Turbidity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Turbidity_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Turbidity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Turbidity_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Nitzschiasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Nitzschiasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Nitzschiasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Nitzschiasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedpicocyanobacteria_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedpicocyanobacteria_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Unidentifiedpicocyanobacteria_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/A

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Turbidity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Turbidity_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredReactivePhosphorus_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredReactivePhosphorus_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredReactivePhosphorus_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredReactivePhosphorus_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Cyanodictyonsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Cyanodictyonsp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Cyanodictyonsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Cyanodictyonsp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_PARomni-directional_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_PARomni-directional_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_PARomni-directional_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_PARomni-directional_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Tabellariaflocculosa)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Tabellariaflocculosa)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Tabellariaflocculosa)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Tabellariaflocculosa)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalOxidisedNitriteandNitrate_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalOxidisedNitriteandNitrate_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalOxidisedNitriteandNitrate_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalOxidisedNitriteandNitrate_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredSampleVolume_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredSampleVolume_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredSampleVolume_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredSampleVolume_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Sphaerocystissp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Sphaerocystissp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Sphaerocystissp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpil

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_BacillariophytaUnidDiatom-Bacillariale_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_BacillariophytaUnidDiatom-Bacillariale_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Nitrite_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Nitrite_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Nitrite_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Nitrite_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_SecchiDepth_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_SecchiDepth_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_SecchiDepth_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_SecchiDepth_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_RedoxUSStndOxid_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_RedoxUSStndOxid_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_RedoxUSStndOxid_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_RedoxUSStndOxid_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Temperature_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Temperature_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Temperature_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Temperature_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Anabaenasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Anabaenasp.)_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Anabaenasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Anabaenasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_UnidentifiedChrysophyte_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_UnidentifiedChrysophyte_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_UnidentifiedChrysophyte_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_UnidentifiedChrysophyte_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Paulschulziasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Paulschulziasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Paulschulziasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Paulschulziasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_SpecificConductivity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_SpecificConductivity_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_SpecificConductivity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_SpecificConductivity_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chrysophyta(Synurasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chrysophyta(Synurasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chrysophyta(Synurasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chrysophyta(Synurasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ammonium_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ammonium_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ammonium_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ammonium_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_SpecificConductivity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_SpecificConductivity_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_SpecificConductivity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_SpecificConductivity_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Chrysosphaerellasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Chrysosphaerellasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Chrysosphaerellasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Ochrophyta(Chrysosphaerellasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Tetraedronsp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Tetraedronsp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Tetraedronsp.)_profile_Header.csv and ../../data-wa

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chrysophyta(Globulifera)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chrysophyta(Globulifera)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_O2Saturation_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_O2Saturation_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_O2Saturation_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_O2Saturation_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Urosoleniasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Urosoleniasp.)_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Urosoleniasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Urosoleniasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Rhabdogloeasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Rhabdogloeasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Rhabdogloeasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Rhabdogloeasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WindDirection_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WindDirection_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WindDirection_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_WindDirection_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyll-a(mgperfilter)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyll-a(mgperfilter)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyll-a(mgperfilter)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyll-a(mgperfilter)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Chlorogoniumspp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Chlorogoniumspp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlorophyta(Chlorogoniumspp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Chlor

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalSuspendedSolids_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalSuspendedSolids_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredTurbidity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredTurbidity_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredTurbidity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_FilteredTurbidity_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Phyco_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Phyco_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Phyco_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Phyco_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cryptophyta(Chroomonassp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cryptophyta(Chroomonassp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cryptophyta(Chroomonassp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cryptophyta(Chroomonassp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Amphorasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Amphorasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacillariophyta(Amphorasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Bacilla

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Chlorophyll-a_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillwayContinuous_Chlorophyll-a_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_CloudCover_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_CloudCover_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_CloudCover_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_CloudCover_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Geitlerinemasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Geitlerinemasp.)_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Geitlerinemasp.)_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Cyanobacteria(Geitlerinemasp.)_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalNitrogen_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalNitrogen_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalNitrogen_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalNitrogen_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Equipmentvoltage_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Equipmentvoltage_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Equipmentvoltage_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_Equipmentvoltage_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalAlkalinity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalAlkalinity_profile_Data.csv
../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalAlkalinity_profile_Header.csv and ../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_TotalAlkalinity_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/met/BOM_WindDirection_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_WindDirection_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/met/BOM_WindDirection_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_WindDirection_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/met/BOM_AirTemperature_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_AirTemperature_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/met/BOM_AirTemperature_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_AirTemperature_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/met/BOM_RelativeHumidity_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_RelativeHumidity_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/met/BOM_RelativeHumidity_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_RelativeHumidity_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/met/BOM_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_Precipitation_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


../../data-warehouse/csv/ht/met/BOM_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_Precipitation_profile_Data.csv processed
start processing: ../../data-warehouse/csv/ht/met/BOM_WindSpeed_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_WindSpeed_profile_Data.csv


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


KeyboardInterrupt: 

In [28]:
import os
import pandas as pd
from pyproj import Proj, transform

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    selected_columns = ['Agency Code', 'Program', 'Project', 'Tag', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference', 'Variable ID']
    header_df = header_df[selected_columns]
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    # Rename columns as requested
    combined_df.rename(columns={
        'Tag': 'Agency-str',
        'Agency Code': 'Agency_Code-str',
        'Program': 'Program_Name-str',
        'Project': 'Program_Code-str',
        'Date': 'Date-str',
        'Data': 'Data-f64',
        'Depth': 'Depth-f64',
        'Variable ID': 'Variable_ID-str',
        'Variable': 'Variable_Name-str',
        'Data File Name': 'Data_File_Name-str',
        'Location': 'Data_File_Location-str',
        'Station Status': 'Status-str',
        'Lat': 'Lat-f64',
        'Long': 'Lon-f64',
        'Time Zone': 'Time_Zone-str',
        'Vertical Datum': 'Vertical_Datum-str',
        'National Station ID': 'Station_ID-str',
        'Site Description': 'Site_Description-str',
        'Deployment': 'Deployment-str',
        'Deployment Position': 'Deployment_Postion-str',
        'Vertical Reference': 'Vertical_Reference-str'
    }, inplace=True)
    
    # Add new columns with default values
    combined_df['calc_SMD-f64'] = 0.0
    combined_df['mAHD-f64'] = 0.0
    
    # Convert Lat-f64 and Lon-f64 to UTM coordinates (XUTM-f64, YUTM-f64)
    in_proj = Proj(init='epsg:4326')  # WGS84 coordinates (Lat, Lon)
    out_proj = Proj(init='epsg:32755')  # UTM Zone 55S
    
    def convert_to_utm(lat, lon):
        x, y = transform(in_proj, out_proj, lon, lat)
        return x, y
    
    combined_df['XUTM-f64'], combined_df['YUTM-f64'] = zip(*combined_df.apply(lambda row: convert_to_utm(row['Lat-f64'], row['Lon-f64']), axis=1))

    # Load variable key data
    variable_key_df = pd.read_csv('../../data-governance/variable_key.csv')
    
    # Merge with variable key to get Units
    combined_df = pd.merge(combined_df, variable_key_df[['Variable ID', 'Units']], left_on='Variable_ID-str', right_on='Variable ID', how='left')
    
    # Rename Units column to 'Units-str'
    combined_df.rename(columns={'Units': 'Units-str'}, inplace=True)
    
    # Rearrange columns as requested
    column_order = ['Agency-str', 'Agency_Code-str', 'Program_Name-str', 'Program_Code-str', 'Date-str', 'Data-f64', 'Depth-f64', 'Variable_ID-str', 'Variable_Name-str', 'Units-str', 'Data_File_Name-str', 'Data_File_Location-str', 'Status-str', 'Lat-f64', 'Lon-f64', 'XUTM-f64', 'YUTM-f64', 'Time_Zone-str', 'Vertical_Datum-str', 'Station_ID-str', 'Site_Description-str', 'Deployment-str', 'Deployment_Postion-str', 'Vertical_Reference-str', 'calc_SMD-f64', 'mAHD-f64']
    combined_df = combined_df[column_order]
    
    return combined_df

def find_csv_header_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Header.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

def find_csv_data_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Data.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

directory = "../../data-warehouse/csv/ht/met"

header_files = find_csv_header_files(directory)
data_files = find_csv_data_files(directory)

# Match header files with corresponding data files
for header_file in header_files:
    # Construct the corresponding data file path
    data_file = header_file.replace("_Header.csv", "_Data.csv")
    
    # Ensure the data file exists before processing
    if data_file in data_files:
        combined_df = merge_header_and_data(header_file, data_file)
        print(f"Combined DataFrame for {header_file} and {data_file}:")
        print(combined_df.head())
        print()
    else:
        print(f"Data file not found for {header_file}")


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2000-01-01 00:00:00       NaN          0        var00152        IDCJAC0009   
1  2000-01-02 00:00:00       NaN          0        var00152        IDCJAC0009   
2  2000-01-03 00:00:00       NaN          0        var00152        IDCJAC0009   
3  2000-01-04 00:00:00       NaN          0        var00152        IDCJAC0009   
4  2000-01-05 00:00:00      

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


KeyboardInterrupt: 

In [26]:
import os
import pandas as pd
from pyproj import Proj, transform

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    selected_columns = ['Agency Code', 'Program', 'Project', 'Tag', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference', 'Variable ID']
    header_df = header_df[selected_columns]
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    # Rename columns as requested
    combined_df.rename(columns={
        'Tag': 'Agency-str',
        'Agency Code': 'Agency_Code-str',
        'Program': 'Program_Name-str',
        'Project': 'Program_Code-str',
        'Date': 'Date-str',
        'Data': 'Data-f64',
        'Depth': 'Depth-f64',
        'Variable ID': 'Variable_ID-str',
        'Variable': 'Variable_Name-str',
        'Data File Name': 'Data_File_Name-str',
        'Location': 'Data_File_Location-str',
        'Station Status': 'Status-str',
        'Lat': 'Lat-f64',
        'Long': 'Lon-f64',
        'Time Zone': 'Time_Zone-str',
        'Vertical Datum': 'Vertical_Datum-str',
        'National Station ID': 'Station_ID-str',
        'Site Description': 'Site_Description-str',
        'Deployment': 'Deployment-str',
        'Deployment Position': 'Deployment_Postion-str',
        'Vertical Reference': 'Vertical_Reference-str'
    }, inplace=True)
    
    # Add new columns with default values
    combined_df['calc_SMD-f64'] = 0.0
    combined_df['mAHD-f64'] = 0.0
    
    # Convert Lat-f64 and Lon-f64 to UTM coordinates (XUTM-f64, YUTM-f64)
    in_proj = Proj(init='epsg:4326')  # WGS84 coordinates (Lat, Lon)
    out_proj = Proj(init='epsg:32755')  # UTM Zone 55S
    
    def convert_to_utm(lat, lon):
        x, y = transform(in_proj, out_proj, lon, lat)
        return x, y
    
    combined_df['XUTM-f64'], combined_df['YUTM-f64'] = zip(*combined_df.apply(lambda row: convert_to_utm(row['Lat-f64'], row['Lon-f64']), axis=1))

    # Rearrange columns as requested
    column_order = ['Agency-str', 'Agency_Code-str', 'Program_Name-str', 'Program_Code-str', 'Date-str', 'Data-f64', 'Depth-f64', 'Variable_ID-str', 'Variable_Name-str', 'Data_File_Name-str', 'Data_File_Location-str', 'Status-str', 'Lat-f64', 'Lon-f64', 'XUTM-f64', 'YUTM-f64', 'Time_Zone-str', 'Vertical_Datum-str', 'Station_ID-str', 'Site_Description-str', 'Deployment-str', 'Deployment_Postion-str', 'Vertical_Reference-str', 'calc_SMD-f64', 'mAHD-f64']
    combined_df = combined_df[column_order]
    
    return combined_df

def find_csv_header_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Header.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

def find_csv_data_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Data.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

directory = "../../data-warehouse/csv/ht/met"

header_files = find_csv_header_files(directory)
data_files = find_csv_data_files(directory)

# Match header files with corresponding data files
for header_file in header_files:
    # Construct the corresponding data file path
    data_file = header_file.replace("_Header.csv", "_Data.csv")
    
    # Ensure the data file exists before processing
    if data_file in data_files:
        combined_df = merge_header_and_data(header_file, data_file)
        print(f"Combined DataFrame for {header_file} and {data_file}:")
        print(combined_df.head())
        print()
    else:
        print(f"Data file not found for {header_file}")


  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2000-01-01 00:00:00       NaN          0        var00152        IDCJAC0009   
1  2000-01-02 00:00:00       NaN          0        var00152        IDCJAC0009   
2  2000-01-03 00:00:00       NaN          0        var00152        IDCJAC0009   
3  2000-01-04 00:00:00       NaN          0        var00152        IDCJAC0009   
4  2000-01-05 00:00:00      

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  x, y = transform(in_proj, out_proj, lon, lat)


KeyboardInterrupt: 

In [25]:
import os
import pandas as pd
from pyproj import Proj, transform

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    selected_columns = ['Agency Code', 'Program', 'Project', 'Tag', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference', 'Variable ID']
    header_df = header_df[selected_columns]
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    # Rename columns as requested
    combined_df.rename(columns={
        'Tag': 'Agency-str',
        'Agency Code': 'Agency_Code-str',
        'Program': 'Program_Name-str',
        'Project': 'Program_Code-str',
        'Date': 'Date-str',
        'Data': 'Data-f64',
        'Depth': 'Depth-f64',
        'Variable ID': 'Variable_ID-str',
        'Variable': 'Variable_Name-str',
        'Data File Name': 'Data_File_Name-str',
        'Location': 'Data_File_Location-str',
        'Station Status': 'Status-str',
        'Lat': 'Lat-f64',
        'Long': 'Lon-f64',
        'Time Zone': 'Time_Zone-str',
        'Vertical Datum': 'Vertical_Datum-str',
        'National Station ID': 'Station_ID-str',
        'Site Description': 'Site_Description-str',
        'Deployment': 'Deployment-str',
        'Deployment Position': 'Deployment_Postion-str',
        'Vertical Reference': 'Vertical_Reference-str'
    }, inplace=True)
    
    # Add new columns with default values
    combined_df['calc_SMD-f64'] = 0.0
    combined_df['mAHD-f64'] = 0.0
    
    # Convert Lat-f64 and Lon-f64 to UTM coordinates (XUTM-f64, YUTM-f64)
    in_proj = Proj('epsg:4326')  # WGS84 coordinates (Lat, Lon)
    out_proj = Proj('epsg:32732')  # UTM Zone 32N
    
    def convert_to_utm(lat, lon):
        x, y = transform(in_proj, out_proj, lon, lat)
        return x, y
    
    combined_df['XUTM-f64'], combined_df['YUTM-f64'] = zip(*combined_df.apply(lambda row: convert_to_utm(row['Lat-f64'], row['Lon-f64']), axis=1))

    # Rearrange columns as requested
    column_order = ['Agency-str', 'Agency_Code-str', 'Program_Name-str', 'Program_Code-str', 'Date-str', 'Data-f64', 'Depth-f64', 'Variable_ID-str', 'Variable_Name-str', 'Data_File_Name-str', 'Data_File_Location-str', 'Status-str', 'Lat-f64', 'Lon-f64', 'XUTM-f64', 'YUTM-f64', 'Time_Zone-str', 'Vertical_Datum-str', 'Station_ID-str', 'Site_Description-str', 'Deployment-str', 'Deployment_Postion-str', 'Vertical_Reference-str', 'calc_SMD-f64', 'mAHD-f64']
    combined_df = combined_df[column_order]
    
    return combined_df

def find_csv_header_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Header.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

def find_csv_data_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Data.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

directory = "../../data-warehouse/csv/ht/met"

header_files = find_csv_header_files(directory)
data_files = find_csv_data_files(directory)

# Match header files with corresponding data files
for header_file in header_files:
    # Construct the corresponding data file path
    data_file = header_file.replace("_Header.csv", "_Data.csv")
    
    # Ensure the data file exists before processing
    if data_file in data_files:
        combined_df = merge_header_and_data(header_file, data_file)
        print(f"Combined DataFrame for {header_file} and {data_file}:")
        print(combined_df.head())
        print()
    else:
        print(f"Data file not found for {header_file}")


  x, y = transform(in_proj, out_proj, lon, lat)


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2000-01-01 00:00:00       NaN          0        var00152        IDCJAC0009   
1  2000-01-02 00:00:00       NaN          0        var00152        IDCJAC0009   
2  2000-01-03 00:00:00       NaN          0        var00152        IDCJAC0009   
3  2000-01-04 00:00:00       NaN          0        var00152        IDCJAC0009   
4  2000-01-05 00:00:00      

  x, y = transform(in_proj, out_proj, lon, lat)


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOM_WindDirection_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_WindDirection_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2011-07-14 14:00:00       NaN          0        var00129                WD   
1  2011-07-14 15:00:00    225.00          0        var00129                WD   
2  2011-07-14 16:00:00    236.25          0        var00129                WD   
3  2011-07-14 17:00:00    236.25          0        var00129                WD   
4  2011-07-14 18:00:00    236.25  

  x, y = transform(in_proj, out_proj, lon, lat)


KeyboardInterrupt: 

In [24]:
import os
import pandas as pd
import pyproj

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    selected_columns = ['Agency Code', 'Program', 'Project', 'Tag', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference', 'Variable ID']
    header_df = header_df[selected_columns]
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    # Rename columns as requested
    combined_df.rename(columns={
        'Tag': 'Agency-str',
        'Agency Code': 'Agency_Code-str',
        'Program': 'Program_Name-str',
        'Project': 'Program_Code-str',
        'Date': 'Date-str',
        'Data': 'Data-f64',
        'Depth': 'Depth-f64',
        'Variable ID': 'Variable_ID-str',
        'Variable': 'Variable_Name-str',
        'Data File Name': 'Data_File_Name-str',
        'Location': 'Data_File_Location-str',
        'Station Status': 'Status-str',
        'Lat': 'Lat-f64',
        'Long': 'Lon-f64',
        'Time Zone': 'Time_Zone-str',
        'Vertical Datum': 'Vertical_Datum-str',
        'National Station ID': 'Station_ID-str',
        'Site Description': 'Site_Description-str',
        'Deployment': 'Deployment-str',
        'Deployment Position': 'Deployment_Postion-str',
        'Vertical Reference': 'Vertical_Reference-str'
    }, inplace=True)
    
    # Add new columns with default values
    combined_df['calc_SMD-f64'] = 0.0
    combined_df['mAHD-f64'] = 0.0
    
    # Convert Lat-f64 and Lon-f64 to UTM coordinates (XUTM-f64, YUTM-f64)
    from pyproj import Proj, transform
    in_proj = Proj('epsg:4326')  # WGS84 coordinates (Lat, Lon)
    out_proj = Proj('epsg:32732')  # UTM Zone 32N
    
    combined_df['XUTM-f64'], combined_df['YUTM-f64'] = transform(in_proj, out_proj, combined_df['Lon-f64'].values, combined_df['Lat-f64'].values)

    # Rearrange columns as requested
    column_order = ['Agency-str', 'Agency_Code-str', 'Program_Name-str', 'Program_Code-str', 'Date-str', 'Data-f64', 'Depth-f64', 'Variable_ID-str', 'Variable_Name-str', 'Data_File_Name-str', 'Data_File_Location-str', 'Status-str', 'Lat-f64', 'Lon-f64', 'XUTM-f64', 'YUTM-f64', 'Time_Zone-str', 'Vertical_Datum-str', 'Station_ID-str', 'Site_Description-str', 'Deployment-str', 'Deployment_Postion-str', 'Vertical_Reference-str', 'calc_SMD-f64', 'mAHD-f64']
    combined_df = combined_df[column_order]
    
    return combined_df

def find_csv_header_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Header.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

def find_csv_data_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Data.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

directory = "../../data-warehouse/csv/ht/met"

header_files = find_csv_header_files(directory)
data_files = find_csv_data_files(directory)

# Match header files with corresponding data files
for header_file in header_files:
    # Construct the corresponding data file path
    data_file = header_file.replace("_Header.csv", "_Data.csv")
    
    # Ensure the data file exists before processing
    if data_file in data_files:
        combined_df = merge_header_and_data(header_file, data_file)
        print(f"Combined DataFrame for {header_file} and {data_file}:")
        print(combined_df.head())
        print()
    else:
        print(f"Data file not found for {header_file}")


  combined_df['XUTM-f64'], combined_df['YUTM-f64'] = transform(in_proj, out_proj, combined_df['Lon-f64'].values, combined_df['Lat-f64'].values)


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2000-01-01 00:00:00       NaN          0        var00152        IDCJAC0009   
1  2000-01-02 00:00:00       NaN          0        var00152        IDCJAC0009   
2  2000-01-03 00:00:00       NaN          0        var00152        IDCJAC0009   
3  2000-01-04 00:00:00       NaN          0        var00152        IDCJAC0009   
4  2000-01-05 00:00:00      

  combined_df['XUTM-f64'], combined_df['YUTM-f64'] = transform(in_proj, out_proj, combined_df['Lon-f64'].values, combined_df['Lat-f64'].values)


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOM_WindDirection_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_WindDirection_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2011-07-14 14:00:00       NaN          0        var00129                WD   
1  2011-07-14 15:00:00    225.00          0        var00129                WD   
2  2011-07-14 16:00:00    236.25          0        var00129                WD   
3  2011-07-14 17:00:00    236.25          0        var00129                WD   
4  2011-07-14 18:00:00    236.25  

  combined_df['XUTM-f64'], combined_df['YUTM-f64'] = transform(in_proj, out_proj, combined_df['Lon-f64'].values, combined_df['Lat-f64'].values)


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOM_AirTemperature_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_AirTemperature_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2011-07-14 14:00:00       NaN          0        var00153           AirTemp   
1  2011-07-14 15:00:00     1.550          0        var00153           AirTemp   
2  2011-07-14 16:00:00     1.050          0        var00153           AirTemp   
3  2011-07-14 17:00:00     0.475          0        var00153           AirTemp   
4  2011-07-14 18:00:00     0.125

  combined_df['XUTM-f64'], combined_df['YUTM-f64'] = transform(in_proj, out_proj, combined_df['Lon-f64'].values, combined_df['Lat-f64'].values)


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOM_RelativeHumidity_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_RelativeHumidity_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2011-07-14 14:00:00       NaN          0        var00156          Humidity   
1  2011-07-14 15:00:00       NaN          0        var00156          Humidity   
2  2011-07-14 16:00:00       NaN          0        var00156          Humidity   
3  2011-07-14 17:00:00       NaN          0        var00156          Humidity   
4  2011-07-14 18:00:00      

  combined_df['XUTM-f64'], combined_df['YUTM-f64'] = transform(in_proj, out_proj, combined_df['Lon-f64'].values, combined_df['Lat-f64'].values)


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOM_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_Precipitation_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2011-07-15 10:00:00       NaN          0        var00152          Rainfall   
1  2011-07-15 11:00:00       0.0          0        var00152          Rainfall   
2  2011-07-15 12:00:00       0.0          0        var00152          Rainfall   
3  2011-07-15 13:00:00       0.0          0        var00152          Rainfall   
4  2011-07-15 14:00:00       0.0  

  combined_df['XUTM-f64'], combined_df['YUTM-f64'] = transform(in_proj, out_proj, combined_df['Lon-f64'].values, combined_df['Lat-f64'].values)


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOM_WindSpeed_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOM_WindSpeed_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2011-07-14 14:00:00       NaN          0        var00130                WS   
1  2011-07-14 15:00:00      6.02          0        var00130                WS   
2  2011-07-14 16:00:00      5.67          0        var00130                WS   
3  2011-07-14 17:00:00      4.06          0        var00130                WS   
4  2011-07-14 18:00:00      4.62          

  combined_df['XUTM-f64'], combined_df['YUTM-f64'] = transform(in_proj, out_proj, combined_df['Lon-f64'].values, combined_df['Lat-f64'].values)


In [19]:
import os
import pandas as pd

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    selected_columns = ['Agency Code', 'Program', 'Project', 'Tag', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference', 'Variable ID']
    header_df = header_df[selected_columns]
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)

    # Rearrange columns as requested
    column_order = ['Tag', 'Agency Code', 'Program', 'Project', 'Date', 'Data', 'Depth', 'Variable ID', 'Variable', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference']
    combined_df = combined_df[column_order]
    
    # Rename columns as requested
    combined_df.rename(columns={
        'Tag': 'Agency-str',
        'Agency Code': 'Agency_Code-str',
        'Program': 'Program_Name-str',
        'Project': 'Program_Code-str',
        'Date': 'Date-str',
        'Data': 'Data-f64',
        'Depth': 'Depth-f64',
        'Variable ID': 'Variable_ID-str',
        'Variable': 'Variable_Name-str',
        'Data File Name': 'Data_File_Name-str',
        'Location': 'Data_File_Location-str',
        'Station Status': 'Status-str',
        'Lat': 'Lat-f64',
        'Long': 'Lon-f64',
        'Time Zone': 'Time_Zone-str',
        'Vertical Datum': 'Vertical_Datum-str',
        'National Station ID': 'Station_ID-str',
        'Site Description': 'Site_Description-str',
        'Deployment': 'Deployment-str',
        'Deployment Position': 'Deployment_Postion-str',
        'Vertical Reference': 'Vertical_Reference-str'
    }, inplace=True)
    
    return combined_df

def find_csv_header_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Header.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

def find_csv_data_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Data.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

directory = "../../data-warehouse/csv/ht/met"

header_files = find_csv_header_files(directory)
data_files = find_csv_data_files(directory)

# Match header files with corresponding data files
for header_file in header_files:
    # Construct the corresponding data file path
    data_file = header_file.replace("_Header.csv", "_Data.csv")
    
    # Ensure the data file exists before processing
    if data_file in data_files:
        combined_df = merge_header_and_data(header_file, data_file)
        print(f"Combined DataFrame for {header_file} and {data_file}:")
        print(combined_df.head())
        print()
    else:
        print(f"Data file not found for {header_file}")


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Data.csv:
  Agency-str Agency_Code-str Program_Name-str Program_Code-str  \
0     HT-MET              HT              BOM              MET   
1     HT-MET              HT              BOM              MET   
2     HT-MET              HT              BOM              MET   
3     HT-MET              HT              BOM              MET   
4     HT-MET              HT              BOM              MET   

              Date-str  Data-f64  Depth-f64 Variable_ID-str Variable_Name-str  \
0  2000-01-01 00:00:00       NaN          0        var00152        IDCJAC0009   
1  2000-01-02 00:00:00       NaN          0        var00152        IDCJAC0009   
2  2000-01-03 00:00:00       NaN          0        var00152        IDCJAC0009   
3  2000-01-04 00:00:00       NaN          0        var00152        IDCJAC0009   
4  2000-01-05 00:00:00      

In [17]:
import os
import pandas as pd

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    selected_columns = ['Agency Code', 'Program', 'Project', 'Tag', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference', 'Variable ID']
    header_df = header_df[selected_columns]
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    # Rearrange columns as requested
    column_order = ['Tag', 'Agency Code', 'Program', 'Project', 'Date', 'Data', 'Depth', 'Variable ID', 'Variable', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference']
    combined_df = combined_df[column_order]
    
    return combined_df

def find_csv_header_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Header.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

def find_csv_data_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Data.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

directory = "../../data-warehouse/csv/ht/met"

header_files = find_csv_header_files(directory)
data_files = find_csv_data_files(directory)

# Match header files with corresponding data files
for header_file in header_files:
    # Construct the corresponding data file path
    data_file = header_file.replace("_Header.csv", "_Data.csv")
    
    # Ensure the data file exists before processing
    if data_file in data_files:
        combined_df = merge_header_and_data(header_file, data_file)
        print(f"Combined DataFrame for {header_file} and {data_file}:")
        print(combined_df.head())
        print()
    else:
        print(f"Data file not found for {header_file}")


Combined DataFrame for ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Header.csv and ../../data-warehouse/csv/ht/met/BOMIDC_Precipitation_profile_Data.csv:
      Tag Agency Code Program Project                 Date  Data  Depth  \
0  HT-MET          HT     BOM     MET  2000-01-01 00:00:00   NaN      0   
1  HT-MET          HT     BOM     MET  2000-01-02 00:00:00   NaN      0   
2  HT-MET          HT     BOM     MET  2000-01-03 00:00:00   NaN      0   
3  HT-MET          HT     BOM     MET  2000-01-04 00:00:00   NaN      0   
4  HT-MET          HT     BOM     MET  2000-01-05 00:00:00   NaN      0   

  Variable ID    Variable  Data File Name  ... Station Status     Lat    Long  \
0    var00152  IDCJAC0009  IDCJAC0009.csv  ...         Active  -41.90  146.67   
1    var00152  IDCJAC0009  IDCJAC0009.csv  ...         Active  -41.90  146.67   
2    var00152  IDCJAC0009  IDCJAC0009.csv  ...         Active  -41.90  146.67   
3    var00152  IDCJAC0009  IDCJAC0009.csv  ...         

In [15]:
import os
import pandas as pd

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    selected_columns = ['Agency Code', 'Program', 'Project', 'Tag', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference']
    header_df = header_df[selected_columns]
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    return combined_df

def find_csv_header_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Header.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

def find_csv_data_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Data.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

directory = "../../data-warehouse/csv/ht/met"

header_files = find_csv_header_files(directory)
data_files = find_csv_data_files(directory)

# Match header files with corresponding data files
for header_file in header_files:
    # Construct the corresponding data file path
    data_file = header_file.replace("_Header.csv", "_Data.csv")
    
    # Ensure the data file exists before processing
    if data_file in data_files:
        combined_df = merge_header_and_data(header_file, data_file)
        # # Ensure column selection is correct, adjust as per your requirement
        # selected_columns = ['Agency Code', 'Program', 'Project', 'Tag', 'Data File Name', 'Location', 'Station Status', 'Lat', 'Long', 'Time Zone', 'Vertical Datum', 'National Station ID', 'Site Description', 'Deployment', 'Deployment Position', 'Vertical Reference']
        # combined_df = combined_df[selected_columns + combined_df.columns[-5:-2].tolist()]

        # Print or process combined_df as needed
        # print(f"Combined DataFrame for {header_file} and {data_file}:")
        print(combined_df.head())
        # print(combined_df.columns.values.tolist())
        print()
    else:
        print(f"Data file not found for {header_file}")


  Agency Code Program Project     Tag  Data File Name            Location  \
0          HT     BOM     MET  HT-MET  IDCJAC0009.csv  data-lake/BOM/IDC/   
1          HT     BOM     MET  HT-MET  IDCJAC0009.csv  data-lake/BOM/IDC/   
2          HT     BOM     MET  HT-MET  IDCJAC0009.csv  data-lake/BOM/IDC/   
3          HT     BOM     MET  HT-MET  IDCJAC0009.csv  data-lake/BOM/IDC/   
4          HT     BOM     MET  HT-MET  IDCJAC0009.csv  data-lake/BOM/IDC/   

  Station Status     Lat    Long Time Zone  ... National Station ID  \
0         Active  -41.90  146.67    GMT +6  ...               96033   
1         Active  -41.90  146.67    GMT +6  ...               96033   
2         Active  -41.90  146.67    GMT +6  ...               96033   
3         Active  -41.90  146.67    GMT +6  ...               96033   
4         Active  -41.90  146.67    GMT +6  ...               96033   

  Site Description Deployment Deployment Position Vertical Reference  \
0            96033    Profile      m f

In [None]:
import os
import pandas as pd

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    return combined_df

def find_csv_header_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Header.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

def find_csv_data_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Data.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

directory = "../../data-warehouse/csv/ht/met"

header_files = find_csv_header_files(directory)
data_files = find_csv_data_files(directory)

# # Example usage with the provided files
# header_file = "../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_AirTemperature_profile_Header.csv"
# data_file = "../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_AirTemperature_profile_Data.csv"

combined_df = merge_header_and_data(header_file, data_file)

# Print the combined DataFrame
print(combined_df)


In [4]:
import os
import pandas as pd

def find_csv_file_pairs(directory):
    csv_file_pairs = []
    
    for root, _, files in os.walk(directory):
        header_files = [file for file in files if file.endswith("Header.csv")]
        
        for header_file in header_files:
            data_file = header_file.replace("Header.csv", "Data.csv")
            if data_file in files:
                csv_file_pairs.append((os.path.join(root, header_file), os.path.join(root, data_file)))
    
    return csv_file_pairs

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    return combined_df

def convert_and_rename_columns(df):
    # Convert all columns to string, except for "Lat" and "Long"
    df = df.astype({'Tag': str, 'Agency Code': str, 'Program': str, 'Project': str, 'Variable ID': str, 'Data File Name': str, 'Location': str, 'Station Status': str, 'Time Zone': str, 'Vertical Datum': str, 'National Station ID': str, 'Site Description': str, 'Deployment': str, 'Deployment Position': str, 'Vertical Reference': str})
    df['Lat'] = pd.to_numeric(df['Lat'])
    df['Long'] = pd.to_numeric(df['Long'])
    
    # Determine the data type of each column and create new column names
    new_column_names = [f"{col}-{df[col].dtype}" for col in df.columns]

    # Create a mapping from original column names to new column names
    column_name_mapping = dict(zip(df.columns, new_column_names))

    # Rename columns in the DataFrame
    df.rename(columns=column_name_mapping, inplace=True)
    
    return df

def process_files(directory):
    csv_file_pairs = find_csv_file_pairs(directory)

    combined_df_list = []

    for header_file, data_file in csv_file_pairs:
        combined_df = merge_header_and_data(header_file, data_file)
        combined_df_list.append(combined_df)
    
    if combined_df_list:
        final_df = pd.concat(combined_df_list, ignore_index=True)
        final_df = convert_and_rename_columns(final_df)
    else:
        final_df = pd.DataFrame()

    return final_df

# Define the desired column order
column_order = [
    "Tag", "Agency Code", "Program", "Project", "Variable ID", "Data File Name",
    "Location", "Station Status", "Lat", "Long", "Time Zone", "Vertical Datum",
    "National Station ID", "Site Description", "Deployment", "Deployment Position",
    "Vertical Reference"
]

directory = "../../data-warehouse/csv/ht/met"
# output_path = "../../data-warehouse/parquet/agency/csiem_HT_public.csv"

final_df = process_files(directory)

# Reorder columns based on the specified order
if not final_df.empty:
    final_df = final_df[column_order]
    # Save to CSV if needed
    # final_df.to_csv(output_path, index=False)

print(final_df)


KeyboardInterrupt: 

In [3]:
import os
import pandas as pd

def find_csv_files(directory, suffix):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(suffix):
                csv_files.append(os.path.join(root, file))
    return csv_files

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    return combined_df

def convert_and_rename_columns(df):
    # Convert all columns to string, except for "Lat" and "Long"
    df = df.astype({'Tag': str, 'Agency Code': str, 'Program': str, 'Project': str, 'Variable ID': str, 'Data File Name': str, 'Location': str, 'Station Status': str, 'Time Zone': str, 'Vertical Datum': str, 'National Station ID': str, 'Site Description': str, 'Deployment': str, 'Deployment Position': str, 'Vertical Reference': str})
    df['Lat'] = pd.to_numeric(df['Lat'])
    df['Long'] = pd.to_numeric(df['Long'])
    
    # Determine the data type of each column and create new column names
    new_column_names = []
    for col in df.columns:
        data_type = df[col].dtype
        new_col_name = f"{col}-{data_type}"
        new_column_names.append(new_col_name)

    # Create a mapping from original column names to new column names
    column_name_mapping = dict(zip(df.columns, new_column_names))

    # Rename columns in the DataFrame
    df.rename(columns=column_name_mapping, inplace=True)
    
    return df

def process_files(directory):
    header_files = find_csv_files(directory, "Header.csv")
    data_files = find_csv_files(directory, "Data.csv")

    combined_df_list = []

    for header_file in header_files:
        # Find the corresponding data file
        data_file = header_file.replace("Header.csv", "Data.csv")
        
        if data_file in data_files:
            combined_df = merge_header_and_data(header_file, data_file)
            combined_df_list.append(combined_df)
    
    if combined_df_list:
        final_df = pd.concat(combined_df_list, ignore_index=True)
        final_df = convert_and_rename_columns(final_df)
        return final_df
    else:
        return pd.DataFrame()

# Define the desired column order
column_order = [
    "Tag", "Agency Code", "Program", "Project", "Date", "Data", "Depth", "Variable ID", "Variable_Name", "Data File Name", "Location", "Station Status", "Lat", "Long", "Time Zone", "Vertical Datum", "National Station ID", "Site Description" "Deployment", "Deployment Position", "Vertical Reference"
]

directory = "../../data-warehouse/csv/ht"
# output_path = "../../data-warehouse/parquet/agency/csiem_HT_public.csv"

final_df = process_files(directory)

# Reorder columns based on the specified order
if not final_df.empty:
    final_df = final_df[column_order]
    # Save to CSV if needed
    # final_df.to_csv(output_path, index=False)

print(final_df)


KeyboardInterrupt: 

In [13]:
import os
import pandas as pd

def find_csv_files(directory):
    csv_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith("Header.csv"):
                csv_files.append(os.path.join(root, file))
    return csv_files

def csv_to_single_row_dataframe(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Use the first column as headers and the second column as row values
    single_row_df = pd.DataFrame([df.iloc[:, 1].values], columns=df.iloc[:, 0].values)
    
    return single_row_df

def merge_csv_files(directory, column_order):
    csv_files = find_csv_files(directory)
    merged_df = pd.DataFrame()
    for file in csv_files:
        single_row_df = csv_to_single_row_dataframe(file)
        merged_df = pd.concat([merged_df, single_row_df], ignore_index=True)
    
    # Reorder columns based on the specified order
    merged_df = merged_df[column_order]
    
    return merged_df

def convert_and_rename_columns(df):
    # Convert all columns to string, except for "Lat" and "Long"
    df = df.astype({'Tag': str, 'Agency Code': str, 'Program': str, 'Project': str, 'Variable ID': str, 'Data File Name': str, 'Location': str, 'Station Status': str, 'Time Zone': str, 'Vertical Datum': str, 'National Station ID': str, 'Site Description': str, 'Deployment': str, 'Deployment Position': str, 'Vertical Reference': str})
    df['Lat'] = pd.to_numeric(df['Lat'])
    df['Long'] = pd.to_numeric(df['Long'])
    
    # Determine the data type of each column and create new column names
    new_column_names = []
    for col in df.columns:
        data_type = df[col].dtype
        new_col_name = f"{col}-{data_type}"
        new_column_names.append(new_col_name)

    # Create a mapping from original column names to new column names
    column_name_mapping = dict(zip(df.columns, new_column_names))

    # Rename columns in the DataFrame
    df.rename(columns=column_name_mapping, inplace=True)
    
    return df

# Define the desired column order
column_order = [
    "Tag", "Agency Code", "Program", "Project", "Variable ID", "Data File Name",
    "Location", "Station Status", "Lat", "Long", "Time Zone", "Vertical Datum",
    "National Station ID", "Site Description", "Deployment", "Deployment Position",
    "Vertical Reference"
]

directory = "../../data-warehouse/csv/ht"
# output_path = "../../data-warehouse/parquet/agency/csiem_HT_public.csv"

merged_df = merge_csv_files(directory, column_order)

# Convert data types and rename columns
merged_df = convert_and_rename_columns(merged_df)

# save_merged_csv(merged_df, output_path)

print(merged_df)


    Tag-object Agency Code-object   Program-object Project-object  \
0      HT-ALWQ                 HT  Arthurs Lake WQ           ALWQ   
1      HT-ALWQ                 HT  Arthurs Lake WQ           ALWQ   
2      HT-ALWQ                 HT  Arthurs Lake WQ           ALWQ   
3      HT-ALWQ                 HT  Arthurs Lake WQ           ALWQ   
4      HT-ALWQ                 HT  Arthurs Lake WQ           ALWQ   
..         ...                ...              ...            ...   
300    HT-WLWQ                 HT    Woods Lake WQ           WLWQ   
301    HT-WLWQ                 HT    Woods Lake WQ           WLWQ   
302    HT-WLWQ                 HT    Woods Lake WQ           WLWQ   
303    HT-WLWQ                 HT    Woods Lake WQ           WLWQ   
304    HT-WLWQ                 HT    Woods Lake WQ           WLWQ   

    Variable ID-object              Data File Name-object  \
0             var00755                      WQSamples.csv   
1             var00014                      WQSam

In [2]:
import pandas as pd

def merge_header_and_data(header_path, data_path):
    header_df = csv_to_single_row_dataframe(header_path)
    data_df = pd.read_csv(data_path)
    
    # Repeat the header for each row in the data DataFrame
    header_repeated = pd.concat([header_df] * len(data_df), ignore_index=True)
    
    # Combine the header and data DataFrames
    combined_df = pd.concat([header_repeated, data_df], axis=1)
    
    return combined_df

# Example usage with the provided files
header_file = "../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_AirTemperature_profile_Header.csv"
data_file = "../../data-warehouse/csv/ht/alwq/ArthursLakeSpillway_AirTemperature_profile_Data.csv"

combined_df = merge_header_and_data(header_file, data_file)

# Print the combined DataFrame
print(combined_df)


       Agency Name Agency Code          Program Project      Tag  \
0   Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
1   Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
2   Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
3   Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
4   Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
5   Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
6   Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
7   Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
8   Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
9   Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
10  Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
11  Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
12  Hydro Tasmania          HT  Arthurs Lake WQ    ALWQ  HT-ALWQ   
13  Hydro Tasmania          HT  Arthurs Lake WQ 

In [None]:
import os
import glob
import pandas as pd
from datetime import datetime
import pyarrow.parquet as pq
import pyarrow as pa

def csv_2_parquet_by_agency():
    # Load variable key
    import importlib.util
    varkey_spec = importlib.util.spec_from_file_location("varkey", "../functions/varkey.py")
    varkey = importlib.util.module_from_spec(varkey_spec)
    varkey_spec.loader.exec_module(varkey)
    
    # Load data paths
    import td_data_paths
    datapath = td_data_paths.datapath
    outfilepath = os.path.join(datapath, 'data-warehouse/parquet/agency/')
    filepath = os.path.join(datapath, 'data-warehouse/csv/')
    
    os.makedirs(outfilepath, exist_ok=True)
    
    filelist = glob.glob(os.path.join(filepath, '**/*Header.csv'), recursive=True)
    
    agency = []
    data = []
    
    for file in filelist:
        print(file)
        header = import_header(file)
        data.append({'header': header})
        agency.append(header['Agency_Code'])
    
    unique_agency = list(set(agency))
    
    for ag in unique_agency:
        find_agency = [i for i, x in enumerate(agency) if x == ag]
        
        tab = create_blank_table()
        tablefield = list(tab.keys())
        
        for ff in find_agency:
            print(filelist[ff])
            
            headerfile = filelist[ff]
            datafile = headerfile.replace('Header', 'Data')
            
            header = import_header(headerfile)
            header['calc_SMD'] = 0
            header['mAHD'] = 0
            
            tt2 = import_datafile_raw(datafile)
            print('Finished Import')
            
            dt = (tt2['Date'].diff().mean()).total_seconds() / 86400
            
            tab11 = pd.DataFrame(tt2)
            tab11['Date'] = pd.to_datetime(tab11['Date'])
            
            if dt < 1/(60*24):
                if header['Deployment'] != 'Profile':
                    print('Starting Downsample')
                    tt = tab11.resample('T').nearest()
                else:
                    tt = tab11
                print('Finished Downsample')
            else:
                tt = tab11
            
            csiem = {'Date': tt['Date'], 'Data': tt['Data'].astype(float), 'Depth': []}
            
            if header['Deployment'] == 'Integrated':
                csiem['Depth'] = tt['Depth']
            elif header['Deployment'] == 'Fixed':
                if 'Height' in tt.columns:
                    csiem['Depth'] = (float(header['calc_SMD']) - tt['Height'].astype(float)) * -1
                else:
                    csiem['Depth'] = tt['Depth'].astype(float) * -1
            elif header['Deployment'] == 'Floating':
                if 'Height' in tt.columns:
                    csiem['Depth'] = tt['Height'].astype(float) * -1
                else:
                    csiem['Depth'] = tt['Depth'].astype(float) * -1
            elif header['Deployment'] in ['Profile', 'Satellite']:
                csiem['Depth'] = tt['Depth'].astype(float) * -1
            else:
                print('Not recognised Deployment')
            
            for k in header.keys():
                csiem[k] = header[k]
            
            csiem['X'] = header['Lon']
            csiem['Y'] = header['Lat']
            csiem['XUTM'] = header['X']
            csiem['YUTM'] = header['Y']
            csiem['Agency'] = header['Tag']
            csiem['Units'] = varkey.varkey[header['Variable_ID']]['tfvUnits']
            
            for k in tablefield:
                if k == 'Date':
                    tab['Date'].extend(csiem['Date'].dt.strftime('%Y-%m-%d %H:%M:%S'))
                elif k == 'Data':
                    tab['Data'].extend(csiem['Data'])
                elif k == 'Depth':
                    if isinstance(csiem['Depth'], list):
                        tab['Depth'].extend(csiem['Depth'])
                    else:
                        tab['Depth'].extend([0] * len(csiem['Date']))
                else:
                    tab[k].extend(appenddata(csiem['Date'], csiem[k]))
        
        newtable = pd.DataFrame.from_dict(tab)
        outfile = os.path.join(outfilepath, f'csiem_{ag}_public.parq')
        table = pa.Table.from_pandas(newtable)
        pq.write_table(table, outfile)
        print(f'Written {outfile}')

def import_header(file):
    # Placeholder for actual import_header function
    return {}

def import_datafile_raw(file):
    # Placeholder for actual import_datafile_raw function
    return {}

def create_blank_table():
    # Placeholder for actual create_blank_table function
    return {'Date': [], 'Data': [], 'Depth': []}

def appenddata(date, data):
    # Placeholder for actual appenddata function
    return data

if __name__ == "__main__":
    csv_2_parquet_by_agency()
