In [None]:
import pandas as pd
from datetime import datetime

# --- User settings ---
# Modify the individual components of the start and end dates as needed.
start_year = 2025
start_month = 8
start_day = 7

end_year = 2025
end_month = 8
end_day = 8

# Define the names for your input and output files.
input_filename = 'datalog.txt'
output_filename = f"Datalog_{start_date_str.replace('-', '')}_{end_date_str.replace('-', '')}.csv"
# ---------------------


# Reconstruct the date strings from the components with proper formatting.
start_date_str = f"{start_year}-{start_month:02d}-{start_day:02d}"
end_date_str = f"{end_year}-{end_month:02d}-{end_day:02d}"

# This list will hold all the rows that match the date criteria
processed_rows = []

# Use a try-except block to handle potential file errors
try:
    # Open and read the input file line by line
    with open(input_filename, 'r') as f:
        for line in f:
            # Skip any empty or malformed lines to prevent errors
            if not line.strip():
                continue

            # Split the line into its different data components
            data_list = line.strip().strip(',').split(',')

            # Extract the date string from the line
            line_date_str = data_list[0]
            # Convert the string to a datetime object to allow for comparison
            line_date = datetime.strptime(line_date_str, '%Y-%m-%d').date()

            # Check if the line's date is within the desired range (inclusive)
            if start_date <= line_date <= end_date:
                # If the date matches, process the rest of the line
                time = data_list[1]
                unit = data_list[2].split(':')[1].strip()

                # Create a dictionary to hold the data for this row
                row_dict = {
                    'Date': line_date_str,
                    'Time': time,
                    'Unit': unit
                }

                # Loop through the sensor readings (S1, S2, etc.)
                for item in data_list[3:]:
                    key, value = item.split(':')
                    # Add each sensor reading to the dictionary
                    row_dict[key.strip()] = float(value.strip())

                # Append the processed dictionary to our list of rows
                processed_rows.append(row_dict)

    # After checking all lines, see if we found any matching rows
    if processed_rows:
        # Create the final DataFrame from the list of dictionaries
        final_df = pd.DataFrame(processed_rows)

        # Save the final DataFrame to a CSV file, without the pandas index
        final_df.to_csv(output_filename, index=False)
        print(f"Processing complete. Data between {start_date_str} and {end_date_str} has been saved to '{output_filename}'.")
    else:
        print("No data was found within the specified date range.")

except FileNotFoundError:
    print(f"Error: The input file '{input_filename}' was not found. Please make sure the file is in the same directory as the script.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

In [11]:
import pandas as pd
import re

# --- User settings ---
# Define the names for your input and output files.
input_filename = '20250808_1745-1750.TXT'
cleaned_filename = 'cleaned_20250808_1745-1750.TXT'
output_filename = 'Datalog_20250808_1745-1750.csv'
# ---------------------
# This list will hold all the rows for the final CSV
processed_rows = []

try:
    with open(input_filename, 'r') as f:
        for line in f:
            # --- Skip Header Line ---
            if not line.strip() or 'Inicio de registro' in line:
                continue

            # --- Data Extraction ---
            parts = line.strip().split(',')

            # --- THE FIX: Check for malformed lines ---
            # If the line doesn't have at least a date and a time, skip it.
            if len(parts) < 2:
                print(f"Skipping malformed line: {line.strip()}")
                continue
            # -----------------------------------------

            date_str = parts[0]
            time_str = parts[1]

            # Create a dictionary for the row with the basic information
            row_dict = {
                'Date': date_str,
                'Time': time_str,
                'Unit': 'C°'
            }

            # --- Sensor Reading Extraction ---
            sensor_data_string = ','.join(parts[2:])
            sensor_readings = re.findall(r'(S\d+):\s*(-?\d+\.?\d*)', sensor_data_string)

            # Add each found sensor reading to our dictionary
            for key, value in sensor_readings:
                row_dict[key] = float(value)

            # Add the completed row to our list
            processed_rows.append(row_dict)


    # --- DataFrame Creation and Export ---
    if processed_rows:
        # Create the final DataFrame
        final_df = pd.DataFrame(processed_rows)

        # Sort sensor columns numerically
        sensor_columns = sorted([col for col in final_df.columns if col.startswith('S')], key=lambda x: int(x[1:]))
        
        # Define the final column order
        column_order = ['Date', 'Time', 'Unit'] + sensor_columns

        # Reorder the DataFrame and fill missing values
        final_df = final_df.reindex(columns=column_order).fillna('')

        # Save the DataFrame to a CSV file
        final_df.to_csv(output_filename, index=False)
        print(f"Processing complete. Data from '{input_filename}' has been saved to '{output_filename}'.")
    else:
        print(f"No processable data was found in '{input_filename}'.")

except FileNotFoundError:
    print(f"Error: The input file '{input_filename}' was not found. Please make sure it is in the same directory.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Skipping malformed line: Inicio: 2025-08-08 17:45:00;
Processing complete. Data from '20250808_1745-1750.TXT' has been saved to 'Datalog_20250808_1745-1750.csv'.
