In [None]:
import pandas as pd
import glob
import os
import warnings
warnings.filterwarnings('ignore')

# --- CONFIGURATION ---
# Assuming 'data' folder is one level up from 'notebooks'
DATA_DIR = '../data' 

# --- 1. GENERIC LOADER & AGGREGATOR FUNCTION ---
def create_monthly_base_table(folder_name, file_pattern, date_col, value_cols=None):
    """
    Loads chunks, converts dates to Month (YYYY-MM), and aggregates by District.
    """
    path = os.path.join(DATA_DIR, folder_name, file_pattern)
    files = glob.glob(path)
    print(f"Scanning {path}...")
    print(f"Found {len(files)} files.")
    
    if not files:
        print("WARNING: No files found! Check your path.")
        return None
        
    li = []
    for filename in files:
        # Load data (Reading all rows for Phase 1 accuracy)
        df = pd.read_csv(filename, index_col=None, header=0)
        
        # 1. Date Conversion
        # Try converting date with dayfirst=True (common in Indian Govt data: DD-MM-YYYY)
        df['Date'] = pd.to_datetime(df[date_col], dayfirst=True, errors='coerce')
        
        # 2. Create Month Column (YYYY-MM)
        df['Month'] = df['Date'].dt.to_period('M')
        
        # 3. Aggregation (State, District, Month)
        # Summing all numeric columns
        group_cols = ['State', 'District', 'Month']
        
        # Group by district and month, summing up counts
        df_agg = df.groupby(group_cols).sum(numeric_only=True).reset_index()
        li.append(df_agg)
        
    # Combine all chunks
    df_final = pd.concat(li, axis=0, ignore_index=True)
    
    # Final Aggregation (Merge same district-month rows from different chunks)
    df_final = df_final.groupby(['State', 'District', 'Month']).sum(numeric_only=True).reset_index()
    
    # Convert Month to string for easier saving/viewing
    df_final['Month'] = df_final['Month'].astype(str)
    
    return df_final

print("Setup Complete. Loader function ready.")