In [None]:
import pandas as pd
import numpy as np
import sqlite3
from pathlib import Path

# A comment for the diff
def find_project_root(anchor_file=".git"):
    """
    Searches upward from the current directory for an anchor file or directory
    to robustly find the project's root directory.
    """
    current_path = Path.cwd()
    while current_path != current_path.parent: # Loop until we reach the filesystem root
        if (current_path / anchor_file).exists():
            print(f"Project root found at: {current_path}")
            return current_path
        current_path = current_path.parent
    raise FileNotFoundError(f"Project root with anchor '{anchor_file}' not found.")

def create_date_dimension(start_date: str, end_date: str, db_path: Path):
    """
    Generates a date dimension table and populates it in a SQLite database.
    """
    try:
        print(f"Checking for and removing existing table in '{db_path}'...")
        with sqlite3.connect(db_path) as conn:
            conn.execute("DROP TABLE IF EXISTS dim_dates;")
        print("Table removed successfully.")
    except sqlite3.Error as e:
        print(f"An error occurred during cleanup: {e}")
        return None # Return None on failure

    print("Generating date range and features...")
    rng = pd.to_datetime(pd.date_range(start=start_date, end=end_date, freq='D'))
    df = pd.DataFrame(index=range(len(rng)), columns=[])
    df["date"] = rng

    # Feature Engineering...
    df["date_id"] = pd.Series(rng.strftime("%Y%m%d")).astype(int)
    df["day_of_week"] = rng.weekday + 1
    df["day_name"] = rng.strftime('%a').str.upper()
    df["is_weekend"] = (df["day_of_week"] >= 6).astype(int)
    df["year"] = rng.year.astype(int)
    df["quarter"] = rng.quarter.astype(int)
    df["month"] = rng.month.astype(int)
    df["day"] = rng.day.astype(int)
    df["month_name"] = rng.month_name()
    df["year_month"] = (df["year"] * 100 + df["month"]).astype(int)
    # ... (omitting all feature engineering lines for brevity, they are unchanged)
    iso = rng.isocalendar()
    df["iso_year"] = iso["year"].values.astype(int)
    df["iso_week"] = iso["week"].values.astype(int)
    df["is_business_day_generic"] = (~(df["day_of_week"] >= 6)).astype(int)
    
    df['date'] = df['date'].dt.strftime('%Y-%m-%d')
    
    cols = [ "date","date_id","day_of_week","day_name","is_weekend","year","quarter",
             "month","day","month_name","year_month", "iso_year", "iso_week",
             "is_business_day_generic" ]
    df = df[cols] # Re-order and select columns to match a simplified table

    try:
        with sqlite3.connect(db_path) as conn:
            print("Populating database table...")
            # Use the DataFrame's to_sql method for a simpler and safer insertion
            df.to_sql('dim_dates', conn, if_exists='replace', index=False)
            
            # Add indexes after writing data
            conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_dim_dates_date_id ON dim_dates(date_id);")
            print("Database population complete.")

    except sqlite3.Error as e:
        print(f"An error occurred during database population: {e}")
        return None
        
    return df

def main():
    """
    Main function to configure and run the date dimension creation script.
    """
    try:
        project_root = find_project_root()
    except FileNotFoundError as e:
        print(f"ERROR: {e}")
        return

    # Point to the 'processed' sub-directory using the robustly found root
    data_dir = project_root / "data" / "processed"
    data_dir.mkdir(parents=True, exist_ok=True)
    sqlite_path = data_dir / "datadim_dates_v2.sqlite"

    start_date = "2020-01-01"
    end_date = "2029-12-31"
    
    created_df = create_date_dimension(start_date, end_date, sqlite_path)
    
    if created_df is not None:
        print("\nScript finished successfully.")
        print(f"Database file is at: {sqlite_path.resolve()}")
    else:
        print("\nScript finished with errors.")
    
    return created_df

# Run the main function
df_for_inspection = main()

Project root found at: c:\toy_code
Checking for and removing existing table in 'c:\toy_code\data\processed\datadim_dates_v2.sqlite'...
Table removed successfully.
Generating date range and features...
Populating database table...
Database population complete.

Script finished successfully.
Database file is at: C:\toy_code\data\processed\datadim_dates_v2.sqlite


In [5]:
print(df.head())  # Display the first few rows of the DataFrame for verification

NameError: name 'df' is not defined