In [1]:
import pandas as pd

# Placeholder DataFrame: Replace this with actual data loading
df = pd.read_excel("Updated_customer_aggregated_yearly_data.xlsx")

In [None]:
import pandas as pd

# Ensure proper datetime formatting
df['Policy Start Date'] = pd.to_datetime(df['Policy Start Date'], errors='coerce')
df['Policy End Date'] = pd.to_datetime(df['Policy End Date'], errors='coerce')
df['Start Year-Month'] = df['Policy Start Date'].dt.to_period('M')
df['End Year-Month'] = df['Policy End Date'].dt.to_period('M')

# 1. Start Year wise customer counts and percentages
start_year_metrics = (
    df.groupby('Start Year')
    .apply(lambda x: pd.Series({
        'Total Customers': x['CustomerID'].nunique(),
        'New Customers': x[x['New Customer'] == 'Yes']['CustomerID'].nunique(),
        'Old Customers': x[x['New Customer'] == 'No']['CustomerID'].nunique()
    }))
    .reset_index()
)

start_year_metrics['New Customer %'] = (start_year_metrics['New Customers'] / start_year_metrics['Total Customers']) * 100
start_year_metrics['Old Customer %'] = (start_year_metrics['Old Customers'] / start_year_metrics['Total Customers']) * 100

# 2. Start Date year-month-wise customer counts and percentages
start_year_month_metrics = (
    df.groupby('Start Year-Month')
    .apply(lambda x: pd.Series({
        'Total Customers': x['CustomerID'].nunique(),
        'New Customers': x[x['New Customer'] == 'Yes']['CustomerID'].nunique(),
        'Old Customers': x[x['New Customer'] == 'No']['CustomerID'].nunique()
    }))
    .reset_index()
)

start_year_month_metrics['New Customer %'] = (start_year_month_metrics['New Customers'] / start_year_month_metrics['Total Customers']) * 100
start_year_month_metrics['Old Customer %'] = (start_year_month_metrics['Old Customers'] / start_year_month_metrics['Total Customers']) * 100

# 3. Churned count by end year
churned_year_metrics = df[df['Churn Label'] == 'Yes'].groupby('End_Year')['CustomerID'].nunique().reset_index(name='Churned Count (Year)')

# 4. Churned count by end year-month
churned_year_month_metrics = df[df['Churn Label'] == 'Yes'].groupby('End Year-Month')['CustomerID'].nunique().reset_index(name='Churned Count (Year-Month)')

# 5. Total loss by end year
loss_year_metrics = df.groupby('End_Year')['Total Premium (Not Renewed)'].sum().reset_index(name='Total Loss (Year)')

# 6. Total loss by end year-month
loss_year_month_metrics = df.groupby('End Year-Month')['Total Premium (Not Renewed)'].sum().reset_index(name='Total Loss (Year-Month)')

# Save metrics to an Excel file
file_path = 'Year_and_Year-Month_Metrics.xlsx'
with pd.ExcelWriter(file_path, engine='xlsxwriter') as writer:
    start_year_metrics.to_excel(writer, index=False, sheet_name='Start Year Metrics')
    start_year_month_metrics.to_excel(writer, index=False, sheet_name='Start Year-Month Metrics')
    churned_year_metrics.to_excel(writer, index=False, sheet_name='Churned Year Metrics')
    churned_year_month_metrics.to_excel(writer, index=False, sheet_name='Churned Year-Month Metrics')
    loss_year_metrics.to_excel(writer, index=False, sheet_name='Loss Year Metrics')
    loss_year_month_metrics.to_excel(writer, index=False, sheet_name='Loss Year-Month Metrics')

print(f"Metrics saved to {file_path}")

In [None]:
pip install XlsxWriter

In [None]:
import pandas as pd
from sqlalchemy import create_engine

# Database connection setup
db_config = {
    'host': '10.10.10.100',
    'database': 'postgres',
    'user': 'postgres',
    'password': 'kaviyam123',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Step 1: Load data from PostgreSQL
query = 'SELECT * FROM "overall_policy_level_data_EF";'
df = pd.read_sql(query, con=engine)

# Ensure proper datetime formatting
df['Policy Start Date'] = pd.to_datetime(df['Policy Start Date'], errors='coerce')
df['Policy End Date'] = pd.to_datetime(df['Policy End Date'], errors='coerce')
df['Start Year'] = df['Policy Start Date'].dt.year
df['End Year'] = df['Policy End Date'].dt.year
df['Start Year-Month'] = df['Policy Start Date'].dt.to_period('M')
df['End Year-Month'] = df['Policy End Date'].dt.to_period('M')

# Remove duplicates for customer counts and churns
unique_customers = df.drop_duplicates(subset=['CustomerID', 'Start Year', 'Policy No'])

# 1. Start Year-wise customer counts and percentages
start_year_metrics = (
    unique_customers.groupby('Start Year')
    .apply(lambda x: pd.Series({
        'Total Customers': x['CustomerID'].nunique(),
        'New Customers': x[x['New Customers'] == 'Yes']['CustomerID'].nunique(),
        'Old Customers': x[x['New Customers'] == 'No']['CustomerID'].nunique()
    }))
    .reset_index()
)

start_year_metrics['New Customer %'] = (start_year_metrics['New Customers'] / start_year_metrics['Total Customers']) * 100
start_year_metrics['Old Customer %'] = (start_year_metrics['Old Customers'] / start_year_metrics['Total Customers']) * 100

# 2. Start Date year-month-wise customer counts and percentages
start_year_month_metrics = (
    unique_customers.groupby('Start Year-Month')
    .apply(lambda x: pd.Series({
        'Total Customers': x['CustomerID'].nunique(),
        'New Customers': x[x['New Customers'] == 'Yes']['CustomerID'].nunique(),
        'Old Customers': x[x['New Customers'] == 'No']['CustomerID'].nunique()
    }))
    .reset_index()
)

start_year_month_metrics['New Customer %'] = (start_year_month_metrics['New Customers'] / start_year_month_metrics['Total Customers']) * 100
start_year_month_metrics['Old Customer %'] = (start_year_month_metrics['Old Customers'] / start_year_month_metrics['Total Customers']) * 100

# 3. Churned count by end year
churned_year_metrics = (
    unique_customers[unique_customers['Churn Label'] == 'Yes']
    .groupby('End Year')['CustomerID']
    .nunique()
    .reset_index(name='Churned Count (Year)')
)

# 4. Churned count by end year-month
churned_year_month_metrics = (
    unique_customers[unique_customers['Churn Label'] == 'Yes']
    .groupby('End Year-Month')['CustomerID']
    .nunique()
    .reset_index(name='Churned Count (Year-Month)')
)

# 7. Open policies by end year-month
open_policies_metrics = (
    df[df['Policy Status'] == 'Open']
    .groupby('End Year-Month')['CustomerID']
    .nunique()
    .reset_index(name='Open Policies Count (Year-Month)')
)

# Save metrics to an Excel file
file_path = 'Year_and_Year-Month_Metrics(2023&2024).xlsx'
with pd.ExcelWriter(file_path, engine='xlsxwriter') as writer:
    start_year_metrics.to_excel(writer, index=False, sheet_name='Start Year Metrics')
    start_year_month_metrics.to_excel(writer, index=False, sheet_name='Start Year-Month Metrics')
    churned_year_metrics.to_excel(writer, index=False, sheet_name='Churned Year Metrics')
    churned_year_month_metrics.to_excel(writer, index=False, sheet_name='Churned Year-Month Metrics')
    open_policies_metrics.to_excel(writer, index=False, sheet_name='Open Policies Metrics')

print(f"Metrics saved to {file_path}")

In [1]:
import pandas as pd
from sqlalchemy import create_engine

# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'postgres',
    'user': 'postgres',
    'password': 'kaviyam123',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Step 1: Load data from PostgreSQL
query = 'SELECT * FROM "overall_policy_level_data_EF";'
df = pd.read_sql(query, con=engine)

# Ensure proper datetime formatting
df['Policy Start Date'] = pd.to_datetime(df['Policy Start Date'], errors='coerce')
df['Policy End Date'] = pd.to_datetime(df['Policy End Date'], errors='coerce')
df['Start Year'] = df['Policy Start Date'].dt.year

df.dropna(subset=['CustomerID', 'Start Year', 'Policy No'], inplace=True)

# Step 2: Remove duplicates for customer counts
unique_customers = df.drop_duplicates(subset=['CustomerID', 'Start Year', 'Policy No'])

# Step 3: Start Year-wise customer counts and policies
start_year_metrics = (
    unique_customers.groupby('Start Year')
    .apply(lambda x: pd.Series({
        'Total Customers': x['CustomerID'].nunique(),
        'New Customers': x[x['New Customers'] == 'Yes']['CustomerID'].nunique(),
        'Old Customers': x[x['New Customers'] == 'No']['CustomerID'].nunique(),
        'New Customers Total Policies': x[x['New Customers'] == 'Yes']['Policy No'].nunique(),
        'Old Customers Total Policies': x[x['New Customers'] == 'No']['Policy No'].nunique(),
        'Total Policies': x['Policy No'].nunique()
    }))
    .reset_index()
)

# Step 4: Save the results to Excel
output_file = "start_year_metrics with policies.xlsx"
start_year_metrics.to_excel(output_file, index=False)
print(f"Results saved to {output_file}")

  .apply(lambda x: pd.Series({


Results saved to start_year_metrics with policies.xlsx
