In [1]:
import pandas as pd
import os

# Directory containing the Excel files
directory = './'

# List to hold dataframes
dfs = []

# Loop through all the files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".xlsx") or filename.endswith(".xls"):
        if filename.startswith("~$"):
            # Skip temporary files created by Excel
            continue
        file_path = os.path.join(directory, filename)
        try:
            # Read each Excel file, specifying the engine
            df = pd.read_excel(file_path, engine='openpyxl')
            # Check if the 'Date' column exists
            if 'Date' in df.columns:
                dfs.append(df)
            else:
                print(f"Skipping {file_path}: 'Date' column not found")
        except Exception as e:
            print(f"Error reading {file_path}: {e}")

if len(dfs) > 0:
    # Merge dataframes on the 'Date' column
    merged_df = dfs[0]
    for df in dfs[1:]:
        merged_df = pd.merge(merged_df, df, on='Date', how='outer')

    # Sort the merged dataframe by date
    merged_df = merged_df.sort_values(by='Date')

    # Fill missing values using forward fill and backward fill
    merged_df = merged_df.fillna(method='ffill').fillna(method='bfill')

    na_positions = merged_df.isna()

    # Save the positions of the NAs (True indicates an NA position)
    na_positions.to_excel('na_positions.xlsx', index=False)

    # Save the summary of NAs (count of NAs per column)
    na_summary = merged_df.isna().sum()
    na_summary.to_excel('na_summary.xlsx', header=["Count"])

    # Save the merged dataframe with filled values to a new Excel file
    merged_df.to_excel('merged_file_filled.xlsx', index=False)

    print("NA positions saved to 'na_positions.xlsx'")
    print("NA summary saved to 'na_summary.xlsx'")
    print("Merged file with filled values saved to 'merged_file_filled.xlsx'")
else:
    print("No valid Excel files found in the directory.")


Skipping /Users/germankosenkov/Code projects/S&D estimation:forecasting/Indonesian S&D/New/Code/Data Preparation/Weather/2.Cleaner/Test/Stasiun Meteorologi Iskandar_31-05-2024.xlsx: 'Date' column not found
Skipping /Users/germankosenkov/Code projects/S&D estimation:forecasting/Indonesian S&D/New/Code/Data Preparation/Weather/2.Cleaner/Test/Stasiun Meteorologi Japura_31-05-2024.xlsx: 'Date' column not found
No valid Excel files found in the directory.
