In [1]:
pip install pandas openpyxl sqlalchemy python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [8]:
import pandas as pd
from pathlib import Path

# دالة التنضيف
def clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    df.columns = df.columns.str.strip().str.replace(' ', '_').str.lower()
    df = df.drop_duplicates()

    for col in df.columns:
        if 'date' in col:
            df[col] = pd.to_datetime(df[col], errors='coerce')

    num_cols = df.select_dtypes(include='number').columns
    df[num_cols] = df[num_cols].fillna(0)

    text_cols = df.select_dtypes(include='object').columns
    df[text_cols] = df[text_cols].apply(lambda s: s.str.title())

    return df

# المسارات
RAW_DIR = Path("Normalized_Data")
CLEAN_DIR = Path("Cleaned_Data")
CLEAN_DIR.mkdir(exist_ok=True)

# لفة على كل الملفات
for file in RAW_DIR.iterdir():
    if file.suffix in [".csv", ".xlsx"]:
        print(f"Processing {file.name}")
        
        if file.suffix == ".csv":
            df = pd.read_csv(file)
        else:
            df = pd.read_excel(file)
        
        df_clean = clean_dataframe(df)

        output_path = CLEAN_DIR / f"{file.stem}_clean.xlsx"
        df_clean.to_excel(output_path, index=False, engine='openpyxl')
        print(f"Saved cleaned file: {output_path}")

Processing Dim_Customers.csv
Saved cleaned file: Cleaned_Data\Dim_Customers_clean.xlsx
Processing Dim_Order_Date.csv
Saved cleaned file: Cleaned_Data\Dim_Order_Date_clean.xlsx
Processing Dim_Product.csv
Saved cleaned file: Cleaned_Data\Dim_Product_clean.xlsx
Processing Dim_Shipment_Date.csv
Saved cleaned file: Cleaned_Data\Dim_Shipment_Date_clean.xlsx
Processing Dim_Shipment_Mode.csv
Saved cleaned file: Cleaned_Data\Dim_Shipment_Mode_clean.xlsx
Processing Dim_Warehouse.csv
Saved cleaned file: Cleaned_Data\Dim_Warehouse_clean.xlsx
Processing Fact_Orders.csv
Saved cleaned file: Cleaned_Data\Fact_Orders_clean.xlsx
Processing Fact_Shipment.csv
Saved cleaned file: Cleaned_Data\Fact_Shipment_clean.xlsx
Processing Fulfillment.csv
Saved cleaned file: Cleaned_Data\Fulfillment_clean.xlsx
Processing Inventory.csv
Saved cleaned file: Cleaned_Data\Inventory_clean.xlsx
