# Change File Names and File Formats of Aiforia Output Files

## Purpose
This code processes the raw output files from Aiforia (typically Excel, CSV, or ZIP files) and converts them into the required format (Excel, CSV, or Feather, depending on the case). It also renames each file based on the value in cell A2 (the first cell in the first non-header row). The resulting files are then used in other notebooks for further analysis.

* Part 1: The code **renames** all Aiforia output Excel or CSV files in the specified folder using the value in cell cell A2 (e.g. “131342-1_Bl6_GFP_NeuN_S1”). The file format (CSV or Excel) remains unchanged.

* Part 2: The code **removes the prefix “IA_details_”** from the names of all Aiforia output Excel or CSV files in the specified folder. The file format (CSV or Excel) remains unchanged.

* Part 3: The code iterates over all Excel files in the specified folder and **converts them into tab-separated CSV files**. Each CSV file is named according to the value in cell A2.

* Part 4: The code iterates over all Excel files in the specified folder and **converts them into Feather files using pandas**. Each Feather file is named according to the value in cell A2.

* Part 5: Depending on the Aiforia download procedure, the output may be provided as **ZIP files containing Excel files**. For each ZIP file in the specified folder, the code extracts the desired Excel file (IA_details.xlsx), renames it using the value in cell A2, and moves it to the folder that originally contained the ZIP files.

* Part 6: Depending on the Aiforia download procedure, the output may be provided as **ZIP files containing CSV files**. For each ZIP file in the specified folder, the code extracts the desired CSV file (IA_details.csv), renames it using the value in cell A2, and moves it to the folder that originally contained the ZIP files.

## Part 1 - Rename Excel/CSV Files Based on Cell A2 (Keep Original Format)

In [None]:
# Specify the folder where all Excel or CSV files are located
folder_old_names = r'C:\Users\...\Raw_data'


In [None]:
import os
import glob
import pandas as pd

file_patterns = [os.path.join(folder_old_names, "*.csv"),
                 os.path.join(folder_old_names, "*.xlsx")]
all_files = [f for pattern in file_patterns for f in glob.glob(pattern)]

for file_path in all_files:
    ext = os.path.splitext(file_path)[1].lower()
    
    # Read the 'Image' column
    try:
        if ext == ".csv":
            df = pd.read_csv(file_path, sep='\t', usecols=['Image'], dtype=str, nrows=1)
        elif ext == ".xlsx":
            df = pd.read_excel(file_path, usecols=['Image'], dtype=str, nrows=1)
        else:
            print(f"Skipping unsupported file type: {file_path}")
            continue
    except ValueError as e:
        print(f"⚠️ Could not read 'Image' column in {file_path}: {e}")
        continue

    # Check if the DataFrame is empty
    if df.empty or df['Image'].iloc[0] is None:
        print(f"⚠️ No value found in 'Image' column for {file_path}")
        continue

    # Extract new name and construct new path
    new_file_name = df['Image'].iloc[0].strip()
    new_file_path = os.path.join(folder_old_names, f"{new_file_name}{ext}")

    # Handle potential naming conflicts
    if os.path.exists(new_file_path):
        print(f"⚠️ Skipping: {new_file_name}{ext} already exists.")
        continue

    # Rename the file
    os.rename(file_path, new_file_path)
    print(f"✅ Renamed: {os.path.basename(file_path)} → {new_file_name}{ext}")


## Part 2 - Remove 'IA_details_' Prefix from Excel/CSV File Names (Keep Original Format)

In [None]:
# Specify the folder where all Excel or CSV files are located
folder_old_names = r'C:\Users\...\Raw_data'

In [None]:
%%time
import os
import glob

# Get all CSV and Excel files in the folder
file_patterns = [os.path.join(folder_old_names, "*.csv"),
                 os.path.join(folder_old_names, "*.xlsx")]
all_files = [f for pattern in file_patterns for f in glob.glob(pattern)]

for file_path in all_files:
    dir_name, base_name = os.path.split(file_path)
    
    # Only rename if the prefix exists
    if 'IA_details_' in base_name:
        new_base_name = base_name.replace('IA_details_', '')
        new_file_path = os.path.join(dir_name, new_base_name)
        os.rename(file_path, new_file_path)
        print(f"Renamed: {base_name} → {new_base_name}")
    else:
        print(f"Skipped (no 'IA_details_' prefix): {base_name}")


## Part 3 - Convert Excel Files to CSV and Rename Based on Cell A2

In [None]:
# Specify the folder where all Excel or CSV files are located
folder_old_xslx_names = r'C:\Users\...\Raw_data'

In [None]:
%%time
import pandas as pd
import os
import glob

all_xlsx_files = glob.glob(os.path.join(folder_old_xslx_names, "*.xlsx"))

for file_path in all_xlsx_files:
    try:
        # Read Excel file
        df = pd.read_excel(file_path, usecols=['Image'], dtype=str, nrows=1)
    except ValueError as e:
        print(f"⚠️ Could not read 'Image' column in {file_path}: {e}")
        continue

    if df.empty or df['Image'].iloc[0] is None:
        print(f"⚠️ No value found in 'Image' column for {file_path}")
        continue

    # Extract new file name from cell A2
    new_file_name = df['Image'].iloc[0].strip()
    new_file_path = os.path.join(folder_old_xslx_names, f"{new_file_name}.csv")

    # Convert Excel to tab-separated CSV
    full_df = pd.read_excel(file_path)  # Read full Excel for conversion
    full_df.to_csv(new_file_path, sep='\t', index=False)
    
    print(f"✅ Converted: {os.path.basename(file_path)} → {new_file_name}.csv")


## Part 4 - Convert Excel Files to Feather Format and Rename Based on Cell A2

In [None]:
# Specify the folder where all Excel or CSV files are located
folder_old_xslx_names = r'C:\Users\...\Raw_data'

In [None]:
%%time
import pandas as pd
import os
import glob

all_xlsx_files = glob.glob(os.path.join(folder_old_xslx_names, "*.xlsx"))

columns = ['Image', 'Parent area name', 'Area/object name', 'Class label',
           'Class confidence (%)', 'Area (μm²)', 'Circumference (µm)']

for file_path in all_xlsx_files:
    try:
        # Read all sheets and concatenate them
        sheet_dfs = pd.read_excel(file_path, sheet_name=None, usecols=columns)
        df = pd.concat(sheet_dfs.values(), ignore_index=True)
    except ValueError as e:
        print(f"⚠️ Could not read Excel file {file_path}: {e}")
        continue

    if df.empty or df['Image'].iloc[0] is None:
        print(f"⚠️ No value found in 'Image' column for {file_path}")
        continue

    # Extract new file name from cell A2
    new_file_name = str(df['Image'].iloc[0]).strip()
    new_file_path = os.path.join(folder_old_xslx_names, f"{new_file_name}.feather")

    # Convert Excel to Feather
    df.to_feather(new_file_path)
    print(f"✅ Converted: {os.path.basename(file_path)} → {new_file_name}.feather")


## Part 5 - Extract Excel Files from ZIPs and Rename Based on Cell A2

In [None]:
# Specify the folder where all ZIP files are located
folder_zip_files = r'C:\Users\...\Raw_data'

In [None]:
%%time
import pandas as pd
import os
import glob
from zipfile import ZipFile

# Get all ZIP files in the folder
all_zip_files = glob.glob(os.path.join(folder_zip_files, "*.zip"))

for zip_path in all_zip_files:
    with ZipFile(zip_path, 'r') as zip_file:
        # Loop over all files in the ZIP
        for member in zip_file.namelist():
            if member.endswith('.xlsx') and 'IA_details' in member:
                # Extract Excel file to the folder
                zip_file.extract(member, path=folder_zip_files)
                extracted_path = os.path.join(folder_zip_files, os.path.basename(member))

                # Read Excel to get new name from cell A2
                try:
                    df = pd.read_excel(extracted_path, usecols=['Image'], dtype=str, nrows=1)
                except ValueError as e:
                    print(f"⚠️ Could not read 'Image' column in {extracted_path}: {e}")
                    continue

                if df.empty or df['Image'].iloc[0] is None:
                    print(f"⚠️ No value found in 'Image' column for {extracted_path}")
                    continue

                # Construct new file path
                new_file_name = df['Image'].iloc[0].strip()
                new_file_path = os.path.join(folder_zip_files, f"{new_file_name}.xlsx")

                # Rename the extracted file
                os.rename(extracted_path, new_file_path)
                print(f"✅ Extracted and renamed: {os.path.basename(zip_path)} → {new_file_name}.xlsx")


## Part 6 - Extract CSV Files from ZIPs and Rename Based on Cell A2

In [None]:
# Specify the folder where all ZIP files are located
folder_zip_files = r'C:\Users\...\Raw_data'

In [None]:
%%time
import pandas as pd
import os
import glob
from zipfile import ZipFile

# Get all ZIP files in the folder
all_zip_files = glob.glob(os.path.join(folder_zip_files, "*.zip"))

for zip_path in all_zip_files:
    with ZipFile(zip_path, 'r') as zip_file:
        # Loop over all files in the ZIP
        for member in zip_file.namelist():
            if member.endswith('.csv') and 'IA_details' in member:
                # Extract CSV to the folder
                zip_file.extract(member, path=folder_zip_files)
                extracted_path = os.path.join(folder_zip_files, os.path.basename(member))

                # Read CSV to get new name from cell A2 ('Image' column)
                try:
                    df = pd.read_csv(extracted_path, sep='\t', usecols=['Image'], dtype=str, nrows=1)
                except ValueError as e:
                    print(f"⚠️ Could not read 'Image' column in {extracted_path}: {e}")
                    continue

                if df.empty or df['Image'].iloc[0] is None:
                    print(f"⚠️ No value found in 'Image' column for {extracted_path}")
                    continue

                # Construct new file path
                new_file_name = df['Image'].iloc[0].strip()
                new_file_path = os.path.join(folder_zip_files, f"{new_file_name}.csv")

                # Rename the extracted file
                os.rename(extracted_path, new_file_path)
                print(f"✅ Extracted and renamed: {os.path.basename(zip_path)} → {new_file_name}.csv")
