In [1]:
import pandas as pd

# Load the dataset
file_path = "RanCat_predictions_JFMAMJ(Final)_segment_reasons_buckets.csv"  
df = pd.read_csv(file_path)

# Concatenate the Date Fields into a proper format for Policy Start and Policy End Dates
df["Policy Start Date"] = (df["policy start date_YEAR"].astype(str) + "-" +
                           df["policy start date_MONTH"].astype(str).str.zfill(2) + "-" +
                           df["policy start date_DAY"].astype(str).str.zfill(2))
df["Policy End Date"] = (df["policy end date_YEAR"].astype(str) + "-" +
                         df["policy end date_MONTH"].astype(str).str.zfill(2) + "-" +
                         df["policy end date_DAY"].astype(str).str.zfill(2))

# Convert the concatenated strings to datetime
df["Policy Start Date"] = pd.to_datetime(df["Policy Start Date"], errors="coerce")
df["Policy End Date"] = pd.to_datetime(df["Policy End Date"], errors="coerce")

# Extract month and year from the Policy End Date for filtering
df["Policy End Month"] = df["Policy End Date"].dt.month
df["Policy End Year"] = df["Policy End Date"].dt.year

# Optionally drop the original date component columns
drop_cols = ["policy start date_YEAR", "policy start date_MONTH", "policy start date_DAY",
             "policy end date_YEAR", "policy end date_MONTH", "policy end date_DAY"]
df.drop(columns=drop_cols, inplace=True)

# (Optional) Define the required order of columns. Adjust the base columns as needed.
base_cols = ["Cleaned Zone 2", "Cleaned State2", "Cleaned Branch Name 2", "policy no",
             "Policy Start Date", "Policy End Date", "renewal type", "product name"]
other_cols = [col for col in df.columns if col not in base_cols]
df = df[base_cols + other_cols]

# Function to create the three sheets for a specific month
def create_monthly_report(df, month, year, output_file):
    # Ensure the required month/year columns exist
    if "Policy End Month" not in df.columns or "Policy End Year" not in df.columns:
        raise ValueError("Missing 'Policy End Month' or 'Policy End Year' in DataFrame.")

    # Filter data for the given month and year (based on Policy End Date)
    monthly_df = df[(df["Policy End Month"] == month) & (df["Policy End Year"] == year)]
    
    # Sheet 1: Summary by State (using "Cleaned State2")
    summary_df = monthly_df.groupby("Cleaned State2").agg(
        Total_Policies=("policy no", "count"),
        Policies_Likely_to_Churn=("Predicted Status", lambda x: (x == "Not Renewed").sum()),
        Policies_Likely_to_Renew=("Predicted Status", lambda x: (x == "Renewed").sum())
    ).reset_index()

    # Add a serial number column (SI. No) starting from 1
    summary_df.insert(0, "SI. No", range(1, len(summary_df) + 1))
    
    # Compute overall totals for the numeric columns
    overall_totals = summary_df[["Total_Policies", "Policies_Likely_to_Churn", "Policies_Likely_to_Renew"]].sum()
    
    # Create a DataFrame for the overall totals row
    overall_totals_df = pd.DataFrame({
        "SI. No": ["Total"],
        "Cleaned State2": ["Total"],
        "Total_Policies": [overall_totals["Total_Policies"]],
        "Policies_Likely_to_Churn": [overall_totals["Policies_Likely_to_Churn"]],
        "Policies_Likely_to_Renew": [overall_totals["Policies_Likely_to_Renew"]]
    })
    
    # Append the overall totals row to the summary DataFrame
    summary_df = pd.concat([summary_df, overall_totals_df], ignore_index=True)
    
    # Sheet 2: Policies Not Renewed
    not_renewed_df = monthly_df[monthly_df["Predicted Status"] == "Not Renewed"]
    
    # Sheet 3: Policies Renewed
    renewed_df = monthly_df[monthly_df["Predicted Status"] == "Renewed"]
    
    # Save all three sheets to Excel
    with pd.ExcelWriter(output_file) as writer:
        summary_df.to_excel(writer, sheet_name="State Summary", index=False)
        not_renewed_df.to_excel(writer, sheet_name="Policies_Likely_to_Churn", index=False)
        renewed_df.to_excel(writer, sheet_name="Policies_Likely_to_Renew", index=False)

# Dictionary mapping month number to month name
months = {1: "January", 2: "February", 3: "March", 4: "April", 5: "May", 6: "June"}

# Process monthly reports for January through June of 2025
for month, month_name in months.items():
    output_file = f"{month_name}_Report.xlsx"
    create_monthly_report(df, month, 2025, output_file)
    print(f"{month_name} report generated: {output_file}")

print("All reports generated successfully.")

FileNotFoundError: [Errno 2] No such file or directory: 'RanCat_predictions_JFMAMJ(Final)_segment_reasons_buckets.csv'

In [2]:
import pandas as pd

# Load the dataset
file_path = "SOND_predictions_xgb 1 - Copy.csv"  
df = pd.read_csv(file_path)

# Rename columns for clarity
df.rename(columns={
    "Policy Start Date_YEAR": "start_year",
    "Policy End Date_YEAR": "end_year",
    "Policy Start Date_MONTH": "start_month",
    "Policy End Date_MONTH": "end_month",
    "Policy Start Date_DAY": "start_day",
    "Policy End Date_DAY": "end_day"
}, inplace=True)

# Convert date columns to integer
df[["start_year", "end_year", "start_month", "end_month", "start_day", "end_day"]] = df[
    ["start_year", "end_year", "start_month", "end_month", "start_day", "end_day"]
].astype(int)

# Concatenate the Date Fields into a Proper Format
df["Policy Start Date"] = df["start_year"].astype(str) + "-" + df["start_month"].astype(str).str.zfill(2) + "-" + df["start_day"].astype(str).str.zfill(2)
df["Policy End Date"] = df["end_year"].astype(str) + "-" + df["end_month"].astype(str).str.zfill(2) + "-" + df["end_day"].astype(str).str.zfill(2)

# Convert to proper datetime format
df["Policy Start Date"] = pd.to_datetime(df["Policy Start Date"], errors="coerce")
df["Policy End Date"] = pd.to_datetime(df["Policy End Date"], errors="coerce")

# Extract Month and Year for filtering (this ensures 'Policy End Month' exists)
df["Policy End Month"] = df["Policy End Date"].dt.month
df["Policy End Year"] = df["Policy End Date"].dt.year

# Drop original date component columns
df.drop(columns=["start_year", "end_year", "start_month", "end_month", "start_day", "end_day"], inplace=True)

# Define the required order of columns
column_order = [
    "Zone", "State", "New Branch Name", "Insured name", "Policy No", 
    "Policy Start Date", "Policy End Date", "Renewal Type", "Product name "
] + [col for col in df.columns if col not in 
     ["Zone", "State", "New Branch Name", "Insured name", "Policy No",
      "Policy Start Date", "Policy End Date", "Renewal Type", "Product name "]]

df = df[column_order]  # Reordering columns

# Function to create the three sheets for a specific month
def create_monthly_report(df, month, year, output_file):
    # Ensure 'Policy End Month' and 'Policy End Year' exist
    if "Policy End Month" not in df.columns or "Policy End Year" not in df.columns:
        raise ValueError("Missing 'Policy End Month' or 'Policy End Year' in DataFrame.")

    # Filter data for the given month and year
    monthly_df = df[(df["Policy End Month"] == month) & (df["Policy End Year"] == year)]

    # Sheet 1: Summary by State
    summary_df = monthly_df.groupby("State").agg(
        Total_Policies=("Policy No", "count"),
        Policies_Likely_to_Churn=("Predicted Status", lambda x: (x == "Not Renewed").sum()),
        Policies_Likely_to_Renew=("Predicted Status", lambda x: (x == "Renewed").sum())
    ).reset_index()

    # Sheet 2: Policies Not Renewed
    not_renewed_df = monthly_df[monthly_df["Predicted Status"] == "Not Renewed"]

    # Sheet 3: Policies Renewed
    renewed_df = monthly_df[monthly_df["Predicted Status"] == "Renewed"]

    # Save to Excel
    with pd.ExcelWriter(output_file) as writer:
        summary_df.to_excel(writer, sheet_name="State Summary", index=False)
        not_renewed_df.to_excel(writer, sheet_name="Policies_Likely_to_Churn", index=False)
        renewed_df.to_excel(writer, sheet_name="Policies_Likely_to_Renew", index=False)

# Process for January, February, and March
create_monthly_report(df, 9, 2024, "Sep_Report.xlsx")
create_monthly_report(df, 10, 2024, "Oct_Report.xlsx")
create_monthly_report(df, 11, 2024, "Nov_Report.xlsx")
create_monthly_report(df, 12, 2024, "Dec_Report.xlsx")

print("Reports generated successfully.")

Reports generated successfully.
