In [7]:
# ministry_accountability_scoring.py
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import joblib

file_path = "ongoing_projects2_clean.csv"
df = pd.read_csv(file_path).rename(columns=lambda c: c.strip())

# --- Normalize names ---
for c in df.columns:
    if "Latest Revised Cost" in c:
        df = df.rename(columns={c: "Latest_Revised_Cost_RsCr"})
    if "Original Cost" in c:
        df = df.rename(columns={c: "Original_Cost_RsCr"})
    if "Project Count" in c:
        df = df.rename(columns={c: "Project_Count"})
    if "Cumulative" in c:
        df = df.rename(columns={c: "Cumulative_Expenditure_RsCr"})
    if "Allocated To" in c:
        df = df.rename(columns={c: "Allocated_To"})

# --- Ensure numerics ---
for col in ["Project_Count", "Cumulative_Expenditure_RsCr", "Original_Cost_RsCr", "Latest_Revised_Cost_RsCr"]:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

# --- Compute escalation if missing ---
if "Cost_Escalation_Pct" not in df.columns:
    df["Cost_Escalation_Pct"] = np.where(
        (df["Original_Cost_RsCr"] > 0),
        (df["Latest_Revised_Cost_RsCr"] - df["Original_Cost_RsCr"]) / df["Original_Cost_RsCr"] * 100,
        np.nan
    )

# --- Aggregate by ministry ---
ministry = df.groupby("Allocated_To", dropna=False).agg(
    Total_Projects=("Project_Count", "sum"),
    Total_Cumulative_Expenditure_RsCr=("Cumulative_Expenditure_RsCr", "sum"),
    Total_Original_Cost_RsCr=("Original_Cost_RsCr", "sum"),
    Total_Latest_Revised_Cost_RsCr=("Latest_Revised_Cost_RsCr", "sum"),
    Avg_Escalation_Pct=("Cost_Escalation_Pct", "mean")
).reset_index()

# --- Derived metric ---
ministry["Spending_Efficiency"] = ministry["Total_Cumulative_Expenditure_RsCr"] / ministry["Total_Latest_Revised_Cost_RsCr"]
ministry["Spending_Efficiency"] = ministry["Spending_Efficiency"].replace([np.inf, -np.inf], np.nan).fillna(0)

# --- Normalize metrics (0–1) ---
scaler = MinMaxScaler()
for col in ["Avg_Escalation_Pct", "Spending_Efficiency", "Total_Projects"]:
    ministry[col] = ministry[col].fillna(0)
    ministry[col + "_norm"] = scaler.fit_transform(ministry[[col]])

# Save scaler for prediction use
joblib.dump(scaler, "accountability_scaler.pkl")

# --- Compute Accountability Score ---
ministry["Accountability_Score"] = (
    (1 - ministry["Avg_Escalation_Pct_norm"]) * 0.5 +
    ministry["Spending_Efficiency_norm"] * 0.35 +
    ministry["Total_Projects_norm"] * 0.15
)

# --- Output ---
ministry = ministry.sort_values("Accountability_Score", ascending=False).reset_index(drop=True)
out = ministry[["Allocated_To", "Accountability_Score", "Avg_Escalation_Pct", "Spending_Efficiency", "Total_Projects"]]
print("Top ministries by Accountability Score:\n", out.head(30).to_string(index=False))

out.to_csv("ministry_accountability_scores.csv", index=False)
print("\nSaved: ministry_accountability_scores.csv")

# Save the full ministry dataset for reference
ministry.to_csv("ministry_full_scores.csv", index=False)

# --- Save normalization data for future predictions ---
joblib.dump({
    "min": {
        "Avg_Escalation_Pct": ministry["Avg_Escalation_Pct"].min(),
        "Spending_Efficiency": ministry["Spending_Efficiency"].min(),
        "Total_Projects": ministry["Total_Projects"].min()
    },
    "max": {
        "Avg_Escalation_Pct": ministry["Avg_Escalation_Pct"].max(),
        "Spending_Efficiency": ministry["Spending_Efficiency"].max(),
        "Total_Projects": ministry["Total_Projects"].max()
    }
}, "accountability_norm_data.pkl")

Top ministries by Accountability Score:
                                           Allocated_To  Accountability_Score  Avg_Escalation_Pct  Spending_Efficiency  Total_Projects
                            Ministry of Civil Aviation              0.837042            3.114316             1.034546              29
                   Ministry of Housing & Urban Affairs              0.687878            2.183306             0.604330              54
                        Department of Higher Education              0.687611            2.907246             0.643208              30
                                  Ministry of Railways              0.667925           32.104301             0.720329             340
                   Ministry of Petroleum & Natural Gas              0.648545           16.221094             0.634222             142
                                      Ministry of Coal              0.632707            1.148063             0.360522             127
                   Mi

['accountability_norm_data.pkl']

In [8]:
# ------------------------------------------------------------------------
# 🔮 Function: Predict Accountability Score for a new ministry or scenario
# ------------------------------------------------------------------------
def predict_accountability_score(avg_escalation_pct, spending_efficiency, total_projects, norm_file="accountability_norm_data.pkl"):
    """
    Predicts the Accountability Score for a new ministry based on 3 key inputs.
    
    Parameters:
        avg_escalation_pct (float): Average cost escalation percentage.
        spending_efficiency (float): Ratio (0–1) of cumulative expenditure / revised cost.
        total_projects (float): Total number of projects.
        norm_file (str): Path to normalization data file.
    
    Returns:
        float: Accountability Score (0–1)
    """
    data = joblib.load(norm_file)
    mins = data["min"]
    maxs = data["max"]

    # Manual normalization (MinMax)
    def normalize(value, vmin, vmax):
        if vmax - vmin == 0:
            return 0
        return (value - vmin) / (vmax - vmin)

    avg_esc_norm = normalize(avg_escalation_pct, mins["Avg_Escalation_Pct"], maxs["Avg_Escalation_Pct"])
    spend_eff_norm = normalize(spending_efficiency, mins["Spending_Efficiency"], maxs["Spending_Efficiency"])
    total_proj_norm = normalize(total_projects, mins["Total_Projects"], maxs["Total_Projects"])

    # Accountability formula
    score = (1 - avg_esc_norm) * 0.5 + spend_eff_norm * 0.35 + total_proj_norm * 0.15
    return round(score, 3)

In [9]:
# ✅ Example usage
if __name__ == "__main__":
    new_score = predict_accountability_score(
        avg_escalation_pct=15.2,
        spending_efficiency=0.82,
        total_projects=18
    )
    print(f"\n🔮 Predicted Accountability Score for new ministry: {new_score}")



🔮 Predicted Accountability Score for new ministry: 0.672
