In [1]:
import os
import pandas as pd
import sys
import shutil

# Set the source and destination directories
source_directory = '/media/disk_k/30t-DATA/csv/phase0/non-validated/'
destination_directory = '/media/disk_k/30t-DATA/csv/phase0/validated/'

# Ensure the destination directory exists
os.makedirs(destination_directory, exist_ok=True)

# List to store out-of-range values
out_of_range_data = []

# Iterate through all CSV files in the source directory
for file_name in os.listdir(source_directory):
    if file_name.endswith(".csv"):  # Process only CSV files
        source_file_path = os.path.join(source_directory, file_name)
        destination_file_path = os.path.join(destination_directory, file_name)
        
        # Display the current file being processed (overwrite previous output)
        sys.stdout.write(f"\rProcessing: {file_name}                    ")
        sys.stdout.flush()
        
        # Read the CSV file
        try:
            df = pd.read_csv(source_file_path)
            
            # Check if the required columns exist
            if 'spe_mean' in df.columns and 'ch_name' in df.columns:
                # Identify rows where spe_mean is out of range (0.5 to 2)
                out_of_range_mask = (df['spe_mean'] < 0.5) | (df['spe_mean'] > 2)
                filtered_df = df[out_of_range_mask]
                
                # Store out-of-range values for logging
                for _, row in filtered_df.iterrows():
                    out_of_range_data.append((file_name, row['ch_name'], row['spe_mean']))
                
                if out_of_range_mask.any():  # If out-of-range values exist, create adjusted file
                    df.loc[out_of_range_mask, 'spe_mean'] = 1  # Set out-of-range values to 1
                    
                    # Generate new file name with _adjusted
                    adjusted_file_name = file_name.replace(".csv", "_adjusted.csv")
                    adjusted_file_path = os.path.join(destination_directory, adjusted_file_name)
                    
                    # Save modified data to destination directory
                    df.to_csv(adjusted_file_path, index=False)
                else:
                    # Copy original file if no adjustments are needed
                    shutil.copy(source_file_path, destination_file_path)
        
        except Exception as e:
            print(f"\nError occurred while processing {file_name}: {e}")

# Add a line break to separate progress from final output
print("\nProcessing completed!\n")

# Print results for out-of-range values
for file_name, ch_name, spe_mean in out_of_range_data:
    print(f"File: {file_name}, Channel: {ch_name}, spe_mean: {spe_mean}")


Processing: bnl30t_spe_fit_results_250223.csv                    
Processing completed!

File: bnl30t_spe_fit_results_250225.csv, Channel: adc_b1_ch4, spe_mean: 3.8726960233675016
File: bnl30t_spe_fit_results_250228.csv, Channel: adc_b2_ch3, spe_mean: 2.086778461199779
File: bnl30t_spe_fit_results_250226.csv, Channel: adc_b1_ch3, spe_mean: 0.4951178843111806


In [1]:
import os
import pandas as pd
import sys
import re
import shutil

# Set the source and destination directories
source_directory = "/media/disk_k/WbLS-DATA/csv/non-validated/phase8/"
destination_directory = "/media/disk_k/WbLS-DATA/csv/validated/phase8/"

# Ensure the destination directory exists
os.makedirs(destination_directory, exist_ok=True)

# Regular expression to match filenames ending with "_YYYYMM.csv" (e.g., "_250115.csv")
pattern = re.compile(r"^(.*)_(\d{6})\.csv$")  # Matches 'filename_250115.csv' or similar

# Iterate through all CSV files in the source directory
for file_name in os.listdir(source_directory):
    match = pattern.match(file_name)
    
    if match:
        base_name, date_str = match.groups()
        date_int = int(date_str)  # Convert date part to integer
        
        if date_int >= 250115:  # Process only if the date is 250115 or later
            source_file_path = os.path.join(source_directory, file_name)
            
            # Display the current file being processed (overwrite previous output)
            sys.stdout.write(f"\rProcessing: {file_name}                    ")
            sys.stdout.flush()
            
            try:
                df = pd.read_csv(source_file_path)
                
                # Check if required columns exist
                if 'spe_mean' in df.columns and 'ch_name' in df.columns:
                    # Modify spe_mean to 1 where ch_name is 'adc_b1_ch1'
                    df.loc[df['ch_name'] == 'adc_b1_ch1', 'spe_mean'] = 1
                    
                    # Modify spe_mean to 1 if it's out of range (<0.5 or >2)
                    df.loc[(df['spe_mean'] < 0.5) | (df['spe_mean'] > 2), 'spe_mean'] = 1
                    
                    # Generate new file name with "_adjusted" while keeping the same date
                    adjusted_file_name = f"{base_name}_{date_str}_adjusted.csv"
                    adjusted_file_path = os.path.join(destination_directory, adjusted_file_name)
                    
                    # Save modified data to the destination directory
                    df.to_csv(adjusted_file_path, index=False)
                else:
                    # If the file does not need modification, simply copy it to the destination
                    shutil.copy(source_file_path, os.path.join(destination_directory, file_name))
            
            except Exception as e:
                print(f"\nError occurred while processing {file_name}: {e}")

# Add a line break to separate progress from final output
print("\nProcessing completed! (_adjusted.csv files created in the validated directory)")


Processing: bnl1t_spe_fit_results_250222.csv                    
Processing completed! (_adjusted.csv files created in the validated directory)
