In [None]:
# netatmo2lp_auto_naming_single.ypnb

In [8]:
import pandas as pd
import os
from datetime import datetime
import pytz

# Function to convert a row to InfluxDB line protocol
def to_influxdb_line_protocol(row, measurement, tags):
    timestamp = row['Timestamp']
    fields = f"Temperature={row['Temperature']},Humidity={row['Humidity']}"
    tag_set = ",".join([f"{key}={value}" for key, value in tags.items()])
    return f"{measurement},{tag_set} {fields} {timestamp}"

# Define the path to the single CSV file
csv_file_path = 'Outdoor_June_2024.csv'
#print(csv_file_path)

with open(csv_file_path, 'r') as file:
    first_line = file.readline().strip()

# Check if the first line contains the required header
if first_line != "Name;Long;Lat;ModuleName;ModuleType":
    print(f"File {csv_file_path} does not have the required header. Skipping.")
else:
    # Read the metadata (first two rows)
    metadata = pd.read_csv(csv_file_path, sep=';', nrows=2)

    # Remove double quotes from metadata columns
    for col in metadata.columns:
        if metadata[col].dtype == 'object':
            metadata[col] = metadata[col].map(lambda x: x.strip('"') if isinstance(x, str) else x)

    # Extract latitude, longitude, ModuleType, and Timezone from metadata
    lat = metadata['Lat'][0]
    long = metadata['Long'][0]
    module_type = metadata['ModuleType'][0]

    # Timezone is in the second row, second column
    timezone_str = metadata.iloc[1, 1].split(':')[1].strip()
    timezone = pytz.timezone(timezone_str)

    # Read the time-series data starting from the correct row
    time_series_data = pd.read_csv(csv_file_path, sep=';', skiprows=2)

    # Remove double quotes from time-series data columns
    for col in time_series_data.columns:
        if time_series_data[col].dtype == 'object':
            time_series_data[col] = time_series_data[col].map(lambda x: x.strip('"') if isinstance(x, str) else x)

    # Strip any leading/trailing spaces from column names
    time_series_data.columns = time_series_data.columns.str.strip()

    # Extract the date from the first measurement
    first_timestamp = int(time_series_data['Timestamp'].iloc[0])
    first_date_utc = datetime.utcfromtimestamp(first_timestamp).replace(tzinfo=pytz.utc)
    first_date_local = first_date_utc.astimezone(timezone)
    year_month = first_date_local.strftime('%Y_%m')  # Format to "YYYY_MM"

    # Print debug information
    print(f"File: {csv_file_path}")
    print(f"First timestamp: {first_timestamp}")
    print(f"First date (UTC): {first_date_utc}")
    print(f"First date (Local): {first_date_local}")
    print(f"Year and month: {year_month}")

    # Convert time-series data to InfluxDB line protocol
    measurement = "Rosu"
    tags = {"lat": lat, "long": long, "ModuleType": module_type, "Timezone": timezone_str}

    lines = time_series_data.apply(lambda row: to_influxdb_line_protocol(row, measurement, tags), axis=1)

    # Create new filename based on the date of the first measurement
    output_file = f"{measurement}_{year_month}.lp"

    # Save the result to a new CSV file without quotes
    with open(output_file, 'w') as f:
        for line in lines:
            f.write(line + '\n')

    print(f"InfluxDB line protocol data has been saved to {output_file}")


File: Outdoor_June_2024.csv
First timestamp: 1717189436
First date (UTC): 2024-05-31 21:03:56+00:00
First date (Local): 2024-06-01 00:03:56+03:00
Year and month: 2024_06
InfluxDB line protocol data has been saved to Rosu_2024_06.lp
