In [1]:
import os
import pandas as pd

def find_min_shape(csv_dir):
    """
    Find the minimum number of rows and columns across all CSV files in the specified directory and its subdirectories.

    Parameters:
    csv_dir (str): Directory containing the CSV files and subdirectories.

    Returns:
    tuple: A tuple containing the minimum number of rows and columns (min_rows, min_cols).
    """
    min_rows = float('inf')
    min_cols = float('inf')

    # Walk through the directory and its subdirectories
    for root, dirs, files in os.walk(csv_dir):
        for file in files:
            if file.endswith('.csv'):
                csv_path = os.path.join(root, file)
                
                try:
                    # Read the CSV file
                    df = pd.read_csv(csv_path)

                    # Update minimum rows and columns
                    rows, cols = df.shape
                    if rows < min_rows:
                        min_rows = rows
                    if cols < min_cols:
                        min_cols = cols
                except Exception as e:
                    print(f"Failed to process {csv_path}: {e}")

    return min_rows, min_cols

def normalize_csv_data(csv_dir, min_rows, min_cols):
    """
    Normalize all CSV files in the specified directory and its subdirectories by truncating them to the minimum number of rows and columns.

    Parameters:
    csv_dir (str): Directory containing the CSV files and subdirectories.
    min_rows (int): Minimum number of rows to normalize to.
    min_cols (int): Minimum number of columns to normalize to.
    """
    # Walk through the directory and its subdirectories
    for root, dirs, files in os.walk(csv_dir):
        for file in files:
            if file.endswith('.csv'):
                csv_path = os.path.join(root, file)
                
                try:
                    # Read the CSV file
                    df = pd.read_csv(csv_path)

                    # Normalize the DataFrame by truncating rows and columns
                    normalized_df = df.iloc[:min_rows, :min_cols]

                    # Save the normalized DataFrame back to CSV
                    normalized_df.to_csv(csv_path, index=False)
                    print(f"Normalized {csv_path} to {min_rows} rows and {min_cols} columns")
                except Exception as e:
                    print(f"Failed to process {csv_path}: {e}")




In [2]:
# Example usage:
csv_directory = "Datacsv"
min_rows, min_cols = find_min_shape(csv_directory)
normalize_csv_data(csv_directory, min_rows, min_cols)

Normalized Datacsv\ADHD\v10p_v10p.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v12p_v12p.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v14p_v14p.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v15p_v15p.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v173_v173.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v177_v177.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v179_v179.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v181_v181.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v183_v183.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v18p_v18p.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v190_v190.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v196_v196.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v198_v198.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v19p_v19p.csv to 7983 rows and 19 columns
Normalized Datacsv\ADHD\v1p_v1p.csv to 7983 rows and 19 columns
Normalized D