##### Prompt:
###### You are a python programmer with libraries faker, duckdb, parquet and pandas as tools and you are helping me to simulate a comprehensive data warehouse for general insurance organization with sample data having tables like premium register, claim register, customer acquisition table with cost, marketing table with per customer cost and budget, customer demographics table, product table, policy table, customer complaint table, policy / product cancellation table and all such required table, claim fraud table. Further write the code to estimate for each customer customer acquisition cost, customer churn, customer claims, new product marketing cost to customer and finally customer lifetime value 

In [2]:
import pandas as pd
import random
from faker import Faker
import datetime

fake = Faker("en_IN")
# Cities:

all_cities = {
        "Delhi": "Delhi",
        "Mumbai": "Maharashtra",
        "Bangalore": "Karnataka",
        "Chennai": "Tamil Nadu",
        "Kolkata": "West Bengal",
        "Hyderabad": "Telangana",
        "Pune": "Maharashtra",
        "Ahmedabad": "Gujarat",
        "Surat": "Gujarat",
        "Vadodara": "Gujarat",
        "Indore": "Madhya Pradesh",
        "Jaipur": "Rajasthan",
        "Lucknow": "Uttar Pradesh",
        "Kanpur": "Uttar Pradesh",
        "Coimbatore": "Tamil Nadu",
        "Kochi": "Kerala",
        "Chandigarh": "Chandigarh",
        "Bhubaneswar": "Odisha",
        "Patna": "Bihar",
        "Visakhapatnam": "Andhra Pradesh",
        "Nagpur": "Maharashtra",
        "Nashik": "Maharashtra",
        "Aurangabad": "Maharashtra",
        "Ghaziabad": "Uttar Pradesh",
        "Faridabad": "Haryana",
        "Gurugram": "Haryana"
}

# Generate customer demographics table
def generate_customers(num_customers):
    customers = []
    for _ in range(num_customers):
        City=random.choice(list(all_cities.keys()))
        customer = {
            "Customer_id": fake.uuid4(),
            "First_name": fake.first_name(),
            "Last_name": fake.last_name(),
            "Gender": random.choice(["Male", "Female"]),
            "DOB": fake.date_between(datetime.date(1950, 1, 1),datetime.date(2006, 1, 1)),
            "City": City,
            "State": all_cities[City]
        }
        customers.append(customer)
    return pd.DataFrame(customers)

# Create 1000 customers
customer_df = generate_customers(1000)
customer_df.head(5)

Unnamed: 0,Customer_id,First_name,Last_name,Gender,DOB,City,State
0,7c3dd1e7-f96f-4bc8-a505-f71e9ee88bd0,Radhika,Amble,Male,1988-04-17,Ghaziabad,Uttar Pradesh
1,e34ebd3a-176f-4403-95a1-9f34cc9cf548,Daniel,Chowdhury,Female,2000-10-08,Kochi,Kerala
2,f6fbfa4c-32ea-43a4-9c0f-d192ad53ba30,Aarush,Balay,Female,1998-04-27,Pune,Maharashtra
3,03cf9c3f-05f9-4bb2-a187-5b367f9a5293,Advay,Modi,Female,1958-12-22,Patna,Bihar
4,3f5239af-179e-4609-a96b-dc267eeb4bf7,Lekha,Dave,Male,1971-05-29,Ahmedabad,Gujarat


In [13]:
# Define  product master table with 20 automobile products and it's pricing based on the Indian automobile make model city 
# 5 health products based on customers age and City
data = {
    'Product ID' : ['AUTO_'+str(i) for i in range(1,51)],
    'Make': ['Maruti Suzuki', 'Hyundai', 'Tata', 'Mahindra', 'Kia', 'Toyota', 'Maruti Suzuki', 'Honda', 'Renault', 'Nissan', 
            'MG', 'Volkswagen', 'Skoda', 'BMW', 'Mercedes-Benz', 'Audi', 'Volvo', 'Jaguar', 'Land Rover', 'Porsche', 
            'Maruti Suzuki', 'Hyundai', 'Tata', 'Mahindra', 'Kia', 'Toyota', 'Maruti Suzuki', 'Honda', 'Renault', 'Nissan', 
            'MG', 'Volkswagen', 'Skoda', 'BMW', 'Mercedes-Benz', 'Audi', 'Volvo', 'Jaguar', 'Land Rover', 'Porsche', 
            'Maruti Suzuki', 'Hyundai', 'Tata', 'Mahindra', 'Kia', 'Toyota', 'Maruti Suzuki', 'Honda', 'Renault', 'Nissan'],
    'Model': ['Swift', 'Creta', 'Nexon', 'Thar', 'Seltos', 'Innova Crysta', 'Dzire', 'City', 'Kiger', 'Magnite', 
            'Hector', 'Polo', 'Octavia', '3 Series', 'C-Class', 'A4', 'XC40', 'F-Pace', 'Discovery', 'Cayenne', 
            'Baleno', 'Verna', 'Punch', 'Scorpio', 'Sonet', 'Fortuner', 'Alto', 'Amaze', 'Triber', 'Kicks', 
            'Gloster', 'Vento', 'Superb', '5 Series', 'E-Class', 'Q5', 'XC60', 'F-Type', 'Defender', '911', 
            'WagonR', 'i20', 'Altroz', 'XUV700', 'Carens', 'Camry', 'Ertiga', 'Jazz', 'Kwid', 'Magnite'],
    'Price (INR)': [600000, 1200000, 800000, 1500000, 1100000, 2000000, 550000, 1400000, 650000, 700000, 
                    1800000, 850000, 2500000, 4500000, 5000000, 4000000, 4200000, 6000000, 8000000, 12000000, 
                    500000, 1000000, 700000, 1800000, 900000, 2800000, 300000, 1150000, 500000, 800000, 
                    3000000, 1000000, 3000000, 5500000, 6000000, 4500000, 4800000, 7000000, 10000000, 
                    400000, 700000, 650000, 2200000, 1000000, 3500000, 600000, 1200000, 400000, 800000,750000]
}
# Create a DataFrame
make_model_master = pd.DataFrame(data)
# Logic behind pricing a policy for make model is models age and price
make_model_master.head(5)

Unnamed: 0,Product ID,Make,Model,Price (INR)
0,AUTO-1,Maruti Suzuki,Swift,600000
1,AUTO-2,Hyundai,Creta,1200000
2,AUTO-3,Tata,Nexon,800000
3,AUTO-4,Mahindra,Thar,1500000
4,AUTO-5,Kia,Seltos,1100000


In [14]:
data = {
    'Product ID': ['HEALTH_001', 'HEALTH_002', 'HEALTH_003', 'HEALTH_004', 'HEALTH_005'],
    'Product Name': ['Individual Health', 'Family Floater', 'Senior Citizen', 'Critical Illness', 'Top-up'],
    'Description': [
        'Covers medical expenses for a single individual.',
        'Covers medical expenses for the entire family under a single policy.',
        'Specifically designed for individuals aged 60 and above.',
        'Pays a lump-sum benefit upon diagnosis of a critical illness.',
        'Acts as a secondary layer of coverage after the primary insurance is exhausted.'
    ],
    'Target Audience': ['Individuals', 'Families', 'Seniors', 'Individuals', 'Individuals/Families'],
    'Key Features': [
        'Cashless Hospitalization, Daycare Treatment', 
        'Cashless Hospitalization, Maternity Coverage', 
        'Pre-existing Conditions Coverage (with waiting period)', 
        'Coverage for 30+ critical illnesses', 
        'High Sum Insured Options'
    ],
    'Sum Insured Options': ['5L, 10L, 20L', '5L, 10L, 15L, 20L', '5L, 10L, 15L', '5L, 10L', 'High Limits (varies by primary policy)'],
    'Premium Range': ['Low-Medium', 'Medium-High', 'High', 'Medium', 'Low'],
    'Waiting Periods': ['Standard waiting periods', 'Standard waiting periods', 'Reduced waiting periods for some conditions', 'Nil', 'Nil'],
    'Exclusions': [
        'Pre-existing conditions (with some exceptions)', 
        'Cosmetic surgeries, self-inflicted injuries', 
        'Pre-existing conditions (with some exceptions)', 
        'Non-critical illnesses', 
        'Coverage below primary policy limit' 
    ]
}

# Create DataFrame
health = pd.DataFrame(data)
health.head(5)

Unnamed: 0,Product ID,Product Name,Description,Target Audience,Key Features,Sum Insured Options,Premium Range,Waiting Periods,Exclusions
0,HEALTH_001,Individual Health,Covers medical expenses for a single individual.,Individuals,"Cashless Hospitalization, Daycare Treatment","5L, 10L, 20L",Low-Medium,Standard waiting periods,Pre-existing conditions (with some exceptions)
1,HEALTH_002,Family Floater,Covers medical expenses for the entire family ...,Families,"Cashless Hospitalization, Maternity Coverage","5L, 10L, 15L, 20L",Medium-High,Standard waiting periods,"Cosmetic surgeries, self-inflicted injuries"
2,HEALTH_003,Senior Citizen,Specifically designed for individuals aged 60 ...,Seniors,Pre-existing Conditions Coverage (with waiting...,"5L, 10L, 15L",High,Reduced waiting periods for some conditions,Pre-existing conditions (with some exceptions)
3,HEALTH_004,Critical Illness,Pays a lump-sum benefit upon diagnosis of a cr...,Individuals,Coverage for 30+ critical illnesses,"5L, 10L",Medium,Nil,Non-critical illnesses
4,HEALTH_005,Top-up,Acts as a secondary layer of coverage after th...,Individuals/Families,High Sum Insured Options,High Limits (varies by primary policy),Low,Nil,Coverage below primary policy limit
