In [None]:
import os
import numpy as np
import pandas as pd


In [None]:
# Specify the folder containing the CSV files (created by the code as in examples in the data_processing subfolder), repeat for each gas at each pressure altitude
input_folder = "file_path"
output_folder = "file_path_cleaned"


# Ensure output folder exists
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Function to clean a single CSV file
def clean_csv_file(file_path, output_path):
    try:
        # Read the CSV file as raw text, line by line
        with open(file_path, 'r') as file:
            lines = file.readlines()

        # Split each line by commas and manually clean them
        cleaned_data = []
        max_cols = 0

        for line in lines:
            # Split each line into a list of fields
            row = line.strip().split(',')
            cleaned_data.append(row)
            max_cols = max(max_cols, len(row))  # Track the maximum number of columns

        # Convert the cleaned data to a DataFrame
        df = pd.DataFrame(cleaned_data)

        # Ensure all rows have the same number of columns (max_cols)
        df = df.apply(lambda x: x.append([np.nan] * (max_cols - len(x))) if len(x) < max_cols else x, axis=1)
        
        # Convert all data to numeric, filling non-numeric fields with NaN
        df = df.applymap(lambda x: pd.to_numeric(x, errors='coerce'))

        # Pad with NaN rows if there are fewer than 12 rows
        if df.shape[0] < 12:
            missing_rows = 12 - df.shape[0]
            pad_df = pd.DataFrame(np.nan, index=np.arange(missing_rows), columns=df.columns)
            df = pd.concat([df, pad_df], ignore_index=True)

        # Save the cleaned file to the output folder
        df.to_csv(output_path, header=False, index=False)
        print(f"Cleaned file saved: {output_path}")

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")

# Loop through all CSV files in the input folder and clean them
for filename in os.listdir(input_folder):
    if filename.endswith(".csv"):
        file_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)
        clean_csv_file(file_path, output_path)

print("All files cleaned and saved.")
