In [None]:
# Install pyfiglet if not already installed.
!pip install pyfiglet

import os
import re  # Import regex module to handle pattern matching.
import pandas as pd
from pyfiglet import figlet_format
from google.colab import files
import io

# Helper function to remove common image extensions and trailing " (<digits>)"
def remove_image_extension(filename):
    # Split filename into root and extension.
    root, ext = os.path.splitext(filename)
    # If the extension is one of the common image extensions, process the root.
    if ext.lower() in ['.tif', '.tiff', '.png', '.jpg', '.jpeg', '.gif']:
        # Remove any trailing pattern of the form " (<digits>)"
        root = re.sub(r'\s*\(\d+\)$', '', root)
        # Also remove any trailing whitespace that might be left.
        root = root.strip()
        return root
    else:
        return filename

# Step 1: Upload the CSV files.
print("Upload the reference CSV file:")
uploaded_ref = files.upload()  # Wait for the reference file upload.
ref_filename = list(uploaded_ref.keys())[0]

print("Upload the second CSV file:")
uploaded_second = files.upload()  # Wait for the second file upload.
sec_filename = list(uploaded_second.keys())[0]

# Step 2: Read the CSV contents into DataFrames.
df_ref = pd.read_csv(io.BytesIO(uploaded_ref[ref_filename]))
df_second = pd.read_csv(io.BytesIO(uploaded_second[sec_filename]))

# Create a new column for the processed filename (without image extensions and trailing copy numbers).
df_ref['Filename_processed'] = df_ref['Filename'].apply(remove_image_extension)
df_second['Filename_processed'] = df_second['Filename'].apply(remove_image_extension)

# Step 3: Process each row of the reference CSV.
# For each reference row, find all matching rows in df_second based on the processed filename,
# print the count in bold large ASCII art, and merge each matching pair horizontally.
combined_rows = []

for idx, ref_row in df_ref.iterrows():
    # Use the processed filename for matching.
    ref_name = ref_row['Filename_processed']

    # Find matching rows in the second CSV using the processed filename.
    matching_rows = df_second[df_second['Filename_processed'] == ref_name]
    count = len(matching_rows)

    # Print the count in bold with large ASCII art using pyfiglet.
    large_text = figlet_format(str(count))
    print("\033[1m" + large_text + "\033[0m")

    # For each matching row, merge the reference row and the matching row horizontally.
    for _, match_row in matching_rows.iterrows():
        # Copy match_row and rename its columns if they already exist in ref_row.
        match_row_renamed = match_row.copy()
        new_index = []
        for col in match_row_renamed.index:
            if col in ref_row.index:
                # Append '_copy' to duplicate column names.
                new_index.append(col + "_copy")
            else:
                new_index.append(col)
        match_row_renamed.index = new_index

        # Concatenate the two rows horizontally to create one continuous row.
        combined_series = pd.concat([ref_row, match_row_renamed])
        combined_rows.append(combined_series)

# Step 4: Create the final DataFrame.
if combined_rows:
    final_df = pd.DataFrame(combined_rows)
else:
    final_df = pd.DataFrame()

# Optionally, drop the processed filename columns from the output.
final_df = final_df.drop(columns=[col for col in final_df.columns if col.endswith("processed")], errors='ignore')

# Save the final DataFrame to a new CSV file.
output_file = "merged.csv"
final_df.to_csv(output_file, index=False)
print(f"Combined CSV file has been created as '{output_file}'.")

# Download the output file.
files.download(output_file)


Upload the reference CSV file:


Saving extracted_features_with_prep (32).csv to extracted_features_with_prep (32) (3).csv
Upload the second CSV file:


Saving extracted_features_with_prep (31).csv to extracted_features_with_prep (31) (3).csv
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m ____  
|___ \ 
  __) |
 / __/ 
|_____|
       
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   
[0m
[1m _ 
/ |
| |
| |
|_|
   


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>