In [None]:
import os
import pandas as pd
from tkinter import Tk, filedialog

# Open a folder selection dialog
Tk().withdraw()  # Hide the main tkinter window
folder_path = filedialog.askdirectory(title="archeologyX")

# Function to check if a Series is binary (contains only 0s and 1s)
def is_binary(series):
    return series.dropna().isin([0, 1]).all()

# Process each CSV file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path, header=None)  # no assumption about headers yet

        # Check first row and remove it if not binary
        if not is_binary(df.iloc[0]):
            df = df.iloc[1:].reset_index(drop=True)

        # Check first column and remove it if not binary
        if not is_binary(df.iloc[:, 0]):
            df = df.iloc[:, 1:]

        # Convert to numeric (optional step to coerce strings to numbers)
        df = df.apply(pd.to_numeric, errors='coerce')

        # Replace NaNs with 0
        df.fillna(0, inplace=True)

        # Save the cleaned file (can overwrite or write to new file)
        cleaned_path = os.path.join(folder_path, f"cleaned_{filename}")
        df.to_csv(cleaned_path, index=False, header=False)

print("Cleaning completed.")


In [None]:
import os
import pandas as pd
from tkinter import Tk, filedialog

# Open folder selection dialog
folder_path = r"C:\Users\lucil\Documents\Stage IJN Nestedness\re-analysis empirical nestedness\Extracted_data_all"

def is_binary(series):
    # Must contain only 0 and 1, no NaNs
    unique_vals = pd.unique(series)
    return set(unique_vals).issubset({0, 1, "NA"})

for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path, header=None)

        # Convert everything to numeric first
        df = df.apply(pd.to_numeric, errors='coerce')

        # Remove first row if it is not binary
        if not is_binary(df.iloc[0]):
            df = df.iloc[1:].reset_index(drop=True)

        # Remove first column if it is not binary
        if not is_binary(df.iloc[:, 0]):
            df = df.iloc[:, 1:]

        # Fill NA values with 0
        df.fillna(0, inplace=True)
        
        df = df.astype(int)

        # Save cleaned file
        cleaned_path = os.path.join(folder_path, f"cleaned_{filename}")
        df.to_csv(cleaned_path, index=False, header=False)

print("Finished cleaning all files.")
