In [4]:
import pandas as pd

# Define a function to check and remove rows
def remove_rows_with_T1W(file_path, column_name):
    try:
        # Read the CSV file from the given file path
        df = pd.read_csv(file_path)
        
        # Check if the specified column exists
        if column_name in df.columns:
            # Remove rows where 'T1W' is found in the specified column
            df = df[~df[column_name].str.contains('T1W')]
            
            # Save the updated DataFrame back to the same CSV file
            df.to_csv(file_path, index=False)
            return True
        else:
            return False
    except Exception as e:
        print(f"An error occurred: {e}")
        return False

# File path to the CSV
csv_file_path = '/project/ajoshi_27/akrami/3D_lesion_DF/Data/splits/ATLAS.csv'

# Use the function on the DataFrame
column_exists = remove_rows_with_T1W(csv_file_path, 'img_path')

# Output whether the column was found and the file was updated
print(f"Column 'img_path' found and file updated: {column_exists}")


Column 'img_path' found and file updated: True


In [6]:
# Now that we've confirmed the presence of 'T1W' in the 'img_path' column,
# let's remove the rows where 'T1W' is found and then save the updated DataFrame back to the same CSV file.

def remove_rows_with_T1W_and_save(file_path, column_name):
    try:
        # Read the CSV file from the given file path
        df = pd.read_csv(file_path)
        
        # Check if the specified column exists
        if column_name in df.columns:
            # Remove rows where 'T1W' is found in the specified column (case-insensitive search)
            updated_df = df[~df[column_name].str.contains('T1W', case=False)]
            
            # Save the updated DataFrame back to the same CSV file
            updated_df.to_csv(file_path, index=False)
            return True
        else:
            return False
    except Exception as e:
        print(f"An error occurred: {e}")
        return False

# File path to the CSV
csv_file_path = '/project/ajoshi_27/akrami/3D_lesion_DF/Data/splits/ATLAS.csv'

# Use the function on the DataFrame to remove the row and save the changes
row_removed = remove_rows_with_T1W_and_save(csv_file_path, 'img_path')

# Output whether the row was removed and the file was updated
row_removed


True

In [2]:
import pandas as pd
import nibabel as nib

# Load the CSV file
df = pd.read_csv('/project/ajoshi_27/akrami/3D_lesion_DF/Data/splits/ATLAS.csv')
base_dir = '/scratch1/akrami/Data_train'
# Initialize an empty list to hold the indices of rows to drop
rows_to_drop = []
print(f"Number of rows before cleaning: {len(df)}")
# Iterate through the DataFrame
for index, row in df.iterrows():
    # Load the image and segmentation
    img = nib.load(base_dir+row['img_path'])
    seg = nib.load(base_dir+row['seg_path'])
    
    # Get the data shape (dimensions) of the image and segmentation
    img_shape = img.header.get_data_shape()
    seg_shape = seg.header.get_data_shape()
    
    # Compare their shapes
    if img_shape != seg_shape:
        # If shapes don't match, mark the index for removal
        rows_to_drop.append(index)

# Drop the rows where the shapes don't match
df_cleaned = df.drop(rows_to_drop)
print(f"Number of rows after cleaning: {len(df_cleaned)}")
# Save the cleaned DataFrame back to a CSV
df_cleaned.to_csv('cleaned_ATLAS.csv', index=False)


Number of rows before cleaning: 655
Number of rows after cleaning: 654
