In [None]:
import os
import pandas as pd

RAW_DATA_PATH = "../../data/raw"
CLEANED_DATA_PATH = "../../data/cleaned/cleaned_data.csv"

def load_and_clean():
    dataframes = []

    # Load and basic clean each dataset
    for file in os.listdir(RAW_DATA_PATH):
        if file.endswith(".csv"):
            df = pd.read_csv(os.path.join(RAW_DATA_PATH, file))

            # Remove duplicates
            df.drop_duplicates(inplace=True)

            # Clean timestamp columns if they exist
            for col in df.columns:
                if "time" in col.lower() or "date" in col.lower():
                    df[col] = pd.to_datetime(df[col], errors="coerce")

            dataframes.append(df)

    # Merge all into one dataframe (row-wise)
    cleaned_df = pd.concat(dataframes, ignore_index=True)

    # Remove rows with missing essential fields if any
    cleaned_df.dropna(axis=0, how="any", inplace=True)

    # Save output
    cleaned_df.to_csv(CLEANED_DATA_PATH, index=False)
    print(f"âœ… Cleaned dataset saved to: {CLEANED_DATA_PATH}")

if __name__ == "__main__":
    load_and_clean()
