In [11]:
import pandas as pd
import re

def format_cell(cell_str):
    # Extract numbers using regex, ignore any encoding characters
    numbers = re.findall(r'-?\d+\.?\d*', cell_str)
    if len(numbers) == 4:  # mean ± std [min - max] format
        mean = float(numbers[0])
        std = float(numbers[1])
        min_val = float(numbers[2])
        max_val = float(numbers[3])
        
        # Round all numbers to 2 decimal places
        formatted = f"{mean:.2f} ± {std:.2f} [{min_val:.2f} - {max_val:.2f}]"
        return formatted
    return cell_str

def process_csv(filepath):
    try:
        # Read the file as text first
        with open(filepath, 'r', encoding='utf-8-sig') as file:
            content = file.read()
        
        # Split the content into individual cells
        cells = content.split('] ')
        formatted_cells = []
        
        for cell in cells:
            if not cell.strip():  # Skip empty cells
                continue
            # Add back the ']' if it's not the last cell
            if not cell.endswith(']'):
                cell = cell + ']'
            formatted = format_cell(cell)
            formatted_cells.append(formatted)
        
        # Save the formatted data
        output_path = filepath.replace('.csv', '_formatted.csv')
        with open(output_path, 'w', encoding='utf-8') as file:
            file.write('\n'.join(formatted_cells))
        
        print(f"Formatted file saved as: {output_path}")
        print("\nFirst few formatted cells:")
        for i, cell in enumerate(formatted_cells[:5]):
            print(cell)
            
    except Exception as e:
        print(f"Error processing file: {e}")

# Use the function
filepath = "/home/aricept094/mydata/ANOVA/radius3/numbers.csv"
process_csv(filepath)

Formatted file saved as: /home/aricept094/mydata/ANOVA/radius3/numbers_formatted.csv

First few formatted cells:
532.6100 ± 36.0181 [452.2389 - 644.9004],530.6917 ± 30.4814 [452.2389 - 603.3146],534.5624 ± 33.1792 [458.7017 - 604.6640],530.0628 ± 33.1467 [444.9528 - 598.0630]
7.4363 ± 2.4329 [2.2034 - 13.0730],7.3459 ± 2.3594 [2.4011 - 14.0641],8.1614 ± 2.5812 [2.0418 - 14.7735],9.6942 ± 4.1396 [3.2471 - 21.8777]
1.5033 ± 0.6321 [0.2606 - 3.2048],1.4861 ± 0.5846 [0.1764 - 3.3137],1.6192 ± 0.6509 [0.1392 - 3.3914],1.8768 ± 0.9277 [0.2017 - 4.2840]
1.0993 ± 0.4208 [0.2133 - 2.4192],1.0613 ± 0.3981 [0.2731 - 2.1272],1.1265 ± 0.4144 [0.1745 - 2.1546],1.3605 ± 0.6669 [0.3063 - 3.2304]
552.2986 ± 35.5791 [466.8492 - 657.4684],549.5998 ± 31.6127 [466.8492 - 622.6218],554.9794 ± 34.1381 [478.7061 - 622.6218],546.9991 ± 32.4933 [478.4159 - 619.2081]
15.0996 ± 4.7033 [5.4726 - 25.3325],14.8485 ± 4.7230 [5.6567 - 27.7566],16.8363 ± 5.2074 [5.4716 - 32.6823],18.9594 ± 7.2991 [4.3342 - 37.1716]
3.9

In [17]:
import pandas as pd
import re
import os

def process_cell(cell):
    """Processes a single cell string to extract and format numbers."""
    if not isinstance(cell, str):
        return cell  # Return non-string values as is
    
    match = re.match(r'([\d.]+) ± ([\d.]+) \[([\d.]+) - ([\d.]+)\]', cell)
    if match:
        mean, std, min_val, max_val = map(float, match.groups())
        return f"{mean:.2f} ± {std:.2f} [{min_val:.2f} - {max_val:.2f}]"
    else:
        return cell # Return original cell if formatting doesn't match

def process_excel_data(file_path):
    """Reads an Excel file, processes each cell, and returns the processed DataFrame."""
    try:
        df = pd.read_excel(file_path, header=None)  # Reads without any predefined header
    except FileNotFoundError:
        print(f"Error: File not found at '{file_path}'. Please check the file path.")
        return None
    except Exception as e:
         print(f"An error occurred while reading the Excel file: {e}")
         return None
    
    processed_df = df.map(process_cell)  # Changed applymap to map
    return processed_df


if __name__ == "__main__":
    file_path = "/home/aricept094/mydata/ANOVA/radius3/numbers.xlsx"
    processed_data = process_excel_data(file_path)

    if processed_data is not None:
        # Construct the output CSV file path
        base_name = os.path.splitext(file_path)[0]
        output_csv_path = f"{base_name}.csv"
        
        try:
            processed_data.to_csv(output_csv_path, index=False, header=False)
            print(f"Processed data saved to '{output_csv_path}'")
        except Exception as e:
            print(f"Error saving CSV file: {e}")

Processed data saved to '/home/aricept094/mydata/ANOVA/radius3/numbers.csv'


In [2]:
import os
import pandas as pd

def count_cases_in_csvs(directory_path):
  """
  Lists the number of cases (rows) in each CSV file within a given directory.

  Args:
    directory_path: The path to the directory containing the CSV files.
  """

  for filename in os.listdir(directory_path):
      if filename.endswith(".csv"):
          file_path = os.path.join(directory_path, filename)
          try:
              df = pd.read_csv(file_path)
              num_cases = len(df)
              print(f"File: {filename}, Cases: {num_cases}")
          except pd.errors.EmptyDataError:
                print(f"File: {filename}, is Empty")
          except Exception as e:
              print(f"Error reading {filename}: {e}")



if __name__ == "__main__":
    directory_path = "/home/aricept094/mydata/ANOVA/radius3" # Note: Used raw string 'r'
    count_cases_in_csvs(directory_path)

File: analysis_results_radial_16_casia_less_than_1.csv, Cases: 182
File: analysis_results_radial_4_casia_less_than_1.csv, Cases: 180
File: analysis_results_radial_20_casia1-2.csv, Cases: 193
File: analysis_results_radial_16_casia_more_than_4.csv, Cases: 40
File: analysis_results_radial_16_casia1-2.csv, Cases: 194
File: analysis_results_radial_16_casia2-4.csv, Cases: 101
File: analysis_results_radial_24_casia_more_than_4.csv, Cases: 42
File: analysis_results_radial_4_casia2-4.csv, Cases: 105
File: analysis_results_radial_20_casia_more_than_4.csv, Cases: 39
File: analysis_results_radial_8_casia1-2.csv, Cases: 193
File: analysis_results_radial_12_casia2-4.csv, Cases: 101
File: analysis_results_radial_12_casia_less_than_1.csv, Cases: 178
File: analysis_results_radial_8_casia_less_than_1.csv, Cases: 180
File: analysis_results_radial_4_casia_more_than_4.csv, Cases: 40
File: analysis_results_radial_12_casia1-2.csv, Cases: 180
File: analysis_results_radial_24_casia1-2.csv, Cases: 201
File: ana