In [3]:
import pandas as pd
from datetime import datetime
import os

# Function to load and preprocess the data
def load_and_preprocess(file_path):
    # Assuming 'date and time' is the first column and it's in standard datetime format
    df = pd.read_csv(file_path, parse_dates=['date and time'], infer_datetime_format=True)
    return df

# Folder paths (adjust these paths if necessary)
folders = ['H:\Shadman\Isabel_2003_FMModels\CloseShore_FM_Wave_2003\BnD_AllForcing\Segment_1', 'H:\Shadman\Isabel_2003_FMModels\CloseShore_FM_Wave_2003\BnD_AllForcing\Segment_2', 
          'H:\Shadman\Isabel_2003_FMModels\CloseShore_FM_Wave_2003\BnD_AllForcing\Segment_3', 'H:\Shadman\Isabel_2003_FMModels\CloseShore_FM_Wave_2003\BnD_AllForcing\Segment_4',
          'H:\Shadman\Isabel_2003_FMModels\CloseShore_FM_Wave_2003\BnD_AllForcing\Segment_5', 'H:\Shadman\Isabel_2003_FMModels\CloseShore_FM_Wave_2003\BnD_AllForcing\Segment_6',
          'H:\Shadman\Isabel_2003_FMModels\CloseShore_FM_Wave_2003\BnD_AllForcing\Segment_7', 'H:\Shadman\Isabel_2003_FMModels\CloseShore_FM_Wave_2003\BnD_AllForcing\Segment_8']

# Base file name
file_base_name = 'Boundary01_'

# Destination folder for combined files (adjust this path as needed)
destination_folder = 'H:\Shadman\Isabel_2011_FMModels\CloseShore_FM_Wave_2011\BnD\Merged'
os.makedirs(destination_folder, exist_ok=True)

# Loop through files (0001 to 0008)
for i in range(1, 9):  # Loop through Boundary01_001 to Boundary01_008
    dataframes = []
    
    # Loop through all folders and collect the files with the same index
    for folder in folders:
        file_path = os.path.join(folder, f'{file_base_name}{str(i).zfill(4)}.csv')
        df = load_and_preprocess(file_path)
        dataframes.append(df)
    
    # Concatenate the dataframes for the current file index (e.g., 001)
    combined = pd.concat(dataframes)

    # Sort by 'date and time' column to ensure continuous time series
    combined = combined.sort_values('date and time')

    # Remove overlaps by dropping duplicates based on the 'date and time' column, keeping the first occurrence
    combined = combined.drop_duplicates(subset=['date and time'], keep='first')

    # Reset the index after sorting and removing duplicates
    combined = combined.reset_index(drop=True)

    # Export the combined DataFrame for the current index to the destination folder
    output_file_path = os.path.join(destination_folder, f'{file_base_name}{str(i).zfill(4)}.csv')
    combined.to_csv(output_file_path, index=False)
    
    # Print to confirm the export
    print(f'Exported combined file: {output_file_path}')


Exported combined file: H:\Shadman\Isabel_2011_FMModels\CloseShore_FM_Wave_2011\BnD\Merged\Boundary01_0001.csv
Exported combined file: H:\Shadman\Isabel_2011_FMModels\CloseShore_FM_Wave_2011\BnD\Merged\Boundary01_0002.csv
Exported combined file: H:\Shadman\Isabel_2011_FMModels\CloseShore_FM_Wave_2011\BnD\Merged\Boundary01_0003.csv
Exported combined file: H:\Shadman\Isabel_2011_FMModels\CloseShore_FM_Wave_2011\BnD\Merged\Boundary01_0004.csv
Exported combined file: H:\Shadman\Isabel_2011_FMModels\CloseShore_FM_Wave_2011\BnD\Merged\Boundary01_0005.csv
Exported combined file: H:\Shadman\Isabel_2011_FMModels\CloseShore_FM_Wave_2011\BnD\Merged\Boundary01_0006.csv
Exported combined file: H:\Shadman\Isabel_2011_FMModels\CloseShore_FM_Wave_2011\BnD\Merged\Boundary01_0007.csv
Exported combined file: H:\Shadman\Isabel_2011_FMModels\CloseShore_FM_Wave_2011\BnD\Merged\Boundary01_0008.csv


In [2]:
folders



['H:\\Shadman\\Isabel_2003_FMModels\\CloseShore_FM_Wave_2003\\BnD_AllForcing']