In [1]:
import os
import pandas as pd

In [2]:
def read_csv_from_deep_nested_subfolders(main_folder, subfolders):
    csv_files = []

    # Iterate through the specified first-level subfolders
    for subfolder in subfolders:
        subfolder_path = os.path.join(main_folder, subfolder)
        
        # Print the path being checked
        print(f"Checking path: {subfolder_path}")

        if not os.path.exists(subfolder_path):
            print(f"Subfolder path does not exist: {subfolder_path}")
            continue
        
        # Check if the first-level subfolder contains other subfolders (second-level)
        for nested_folder in os.listdir(subfolder_path):
            nested_folder_path = os.path.join(subfolder_path, nested_folder)
            if os.path.isdir(nested_folder_path):  # Ensure it's a directory
                # List all files in the second-level subfolder (third-level)
                for file in os.listdir(nested_folder_path):
                    if file.endswith('.csv'):
                        file_path = os.path.join(nested_folder_path, file)  # Full path to the CSV file
                        # Read the CSV file, skipping first two rows and removing the first three columns
                        try:
                            df = pd.read_csv(file_path, low_memory=False, skiprows=2)
                            df = df.iloc[:, 3:]  # Remove the first three columns
                            csv_files.append((file_path, df))
                        except pd.errors.EmptyDataError:
                            print(f"EmptyDataError: No data found in file: {file_path}")
                        except pd.errors.ParserError:
                            print(f"ParserError: Could not parse file: {file_path}")
    
    return csv_files

In [3]:
# Specify the main folder path
main_folder = r"C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder"  # replace with your main folder path

# List of first-level subfolders
subfolders = ['03MAY24', '04MAY24']  # replace with your first-level subfolder names

# Read CSV files
csv_files = read_csv_from_deep_nested_subfolders(main_folder, subfolders)


Checking path: C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\03MAY24
EmptyDataError: No data found in file: C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\03MAY24\RECOVERY\info.csv
EmptyDataError: No data found in file: C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\03MAY24\RESTING\info.csv
EmptyDataError: No data found in file: C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\03MAY24\TASK\info.csv
Checking path: C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\04MAY24
EmptyDataError: No data found in file: C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\04MAY24\RECOVERY\info.csv
EmptyDataError: No data found in file: C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\04MAY24\RESTING\info.csv
EmptyDataError: No data found in file: C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\04MAY24\TASK\info.csv


In [4]:
# Save the result in a new pickle file
pickle_file = r'C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\csv_files.pkl'  # replace with your desired pickle file path
with open(pickle_file, 'wb') as f:
    pd.to_pickle(csv_files, f)

# Display the last part of each file
for file_path, df in csv_files:
    print(f"File: {file_path}")
    print(df.tail())  # Display the last few rows of each DataFrame

File: C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\03MAY24\RECOVERY\2024-05-03-16-23_stress emg study 1.csv
         LT UPPER TRAP. (uV)  RT UPPER TRAP. (uV)  LT MIDDLE TRAP. (uV)  \
1261995           385.981354           -27.968889              9.446788   
1261996           318.827454           -34.686813              7.311970   
1261997           248.638534           -59.402607              2.731185   
1261998           190.359085           -74.975098            -15.272163   
1261999           146.716721           -58.497902            -38.772453   

         RT MIDDLE TRAP. (uV)  LT LOWER TRAP. (uV)  
1261995             -8.245357             9.712008  
1261996            -10.076577             5.745001  
1261997            -14.656251             2.692392  
1261998            -17.401461            -0.050800  
1261999            -17.096798            -0.969815  
File: C:\Users\khush\OneDrive\Desktop\data preprocessing\main folder\03MAY24\RESTING\2024-05-03-15-48_str