# CRM Data Generator

In [208]:
import pandas as pd
from faker import Faker
from random import random, randint, lognormvariate, normalvariate
from math import log
from datetime import datetime

## Random data generators

In [239]:
def random_names(n:int = 1) -> list:
    faker = Faker()
    return [faker.name() for _ in range(n)]

def random_client_ids(n:int = 1) -> list:
    faker= Faker()
    return [f'C00{randint(1000, 9999)}' for _ in range(n)]

def random_account_types(n:int = 1) -> list:
    return [['Personal', 'Commercial'][randint(0, 1)] for _ in range(n)]

def random_addresses(n:int = 1) -> list:
    faker = Faker()
    return [faker.address().replace('\n', ', ') for _ in range(n)]

def random_serving_broker_ids(n:int = 1) -> list:
    faker= Faker()
    return [f'S00{randint(1000, 9999)}' for _ in range(n)]

def random_phone_numbers(n:int = 1) -> list:
    faker= Faker()
    return [faker.phone_number() for _ in range(n)]

def random_emails(names: list) -> list:
    faker = Faker()
    op = []
    for name in names:
        email = faker.company_email()
        op.append('.'.join(name.lower().split()) + email[email.index('@'):])
    return op

def random_agency_branch_ids(n:int = 1, unique_branches=3) -> list:
    branches = [randint(100, 999) for _ in range(unique_branches)]
    return [f'B_US00{branches[randint(0, unique_branches-1)]}' for _ in range(n)]

def random_statuses(n:int = 1) -> list:
    return [['Active', 'Inactive', 'Prospect'][randint(0, 2)] for _ in range(n)]

def random_dates_of_creation(n:int = 1) -> list:
    faker = Faker()
    return [faker.date_between(datetime(2005, 1, 1), datetime(2007, 1, 1)) for _ in range(n)]

def random_dates_of_updation(today = datetime(2025, 1, 1), n:int = 1) -> list:
    return [today for _ in range(n)]

In [242]:
def random_policy_ids(n:int = 1) -> list:
    faker= Faker()
    return [f'P00{randint(1000, 9999)}' for _ in range(n)]

def random_job_title(n:int = 1) -> list:
    faker = Faker()
    return [faker.job() for _ in range(n)]

def random_insurer(n:int = 1) -> list:
    faker = Faker()
    return [f'{faker.company()} Insurance Inc.' for _ in range(n)]

def random_expiry_dates(issue_dates, term=20) -> list:
    op = []
    for date in issue_dates:
        op.append(datetime(date.year+term, date.month, date.day))
    return op

def random_premium_amounts(n:int = 1) -> list:
    premiums = []

    for _ in range(n):
        # 70% Term Life, 30% Whole Life
        if random() < 0.7:
            # Term Life: median ~350, moderate spread
            mean = log(350)
            sigma = 0.5
        else:
            # Whole Life: median ~2500, large spread
            mean = log(2500)
            sigma = 0.7

        premium = lognormvariate(mean, sigma)
        premiums.append(premium)

    return premiums

def random_premium_bill_to(n:int = 1) -> list:
    return [[
        "Direct Bill",
        "Agency Bill",
        "Insured",
        "Premium Finance Company",
        "Mortgagee",
        "Parent Company",
        "Subsidiary",
        "Third-Party Billing",
        "Payroll Vendor"
    ][randint(0, 8)] for _ in range(n)]

def random_renewal_status(n:int = 1) -> list:
    return[[
        'New', 
        'Canceled', 
        'Marketing', 
        'Renewal Quoted'
    ][randint(0, 3)] for _ in range(n)]

def random_broker_commisions(n:int = 1) -> list:
    return [normalvariate(11.5, 4) for _ in range(n)]

## Clients Table Creation

In [244]:
n = 1000
today = datetime(2025, 11, 28)
names = random_names(n)
clientIds = random_client_ids(n)
issueDates = random_dates_of_creation(n)
updtationDates = random_dates_of_updation(today=today, n=n)
term = 20
clients = pd.DataFrame({
    'client_name': names,
    'client_id': clientIds,
    'account_type': random_account_types(n),
    'address': random_addresses(n),
    'phone_number': random_phone_numbers(n),
    'email': random_emails(names),
    'branch_id': random_agency_branch_ids(n, 10),
    'servicing_broker_id': random_serving_broker_ids(n),
    'status': random_statuses(n),
    'issued_on': issueDates,
    'updated_on': updtationDates
})

In [245]:
clients.tail(3)


Unnamed: 0,client_name,client_id,account_type,address,phone_number,email,branch_id,servicing_broker_id,status,issued_on,updated_on
997,Kristina Lane,C003783,Commercial,"6551 Ford Groves Suite 350, Bellburgh, OH 99366",(231)236-7583,kristina.lane@wolf-pugh.com,B_US00333,S006923,Active,2005-04-16,2025-11-28
998,Steven Aguilar,C001220,Personal,"15937 Dennis Station, Cervantesshire, AZ 99485",(438)653-0313,steven.aguilar@wheeler.net,B_US00988,S002189,Prospect,2006-06-27,2025-11-28
999,John Lee,C001653,Personal,"1540 Miller Streets, West Troy, NJ 05722",517.675.6556,john.lee@gibson.biz,B_US00137,S007124,Prospect,2006-07-20,2025-11-28


## Policies Table Creation

In [247]:
policies = pd.DataFrame({
    'policy_id':random_policy_ids(n),
    'client_id': clientIds,
    'line_of_business': random_job_title(n),
    'issuing_carrier': random_insurer(n),
    'issued_on': issueDates,
    'expiry_date': random_expiry_dates(issue_dates=issueDates, term=term),
    'premium_amount': random_premium_amounts(n),
    'premium_bill_to': random_premium_bill_to(n),
    'renewal_status': random_renewal_status(n),
    'last_updated': updtationDates,
    'broker_commision_pct': random_broker_commisions(n),
})

In [248]:
policies.head(3)

Unnamed: 0,policy_id,client_id,line_of_business,issuing_carrier,issued_on,expiry_date,premium_amount,premium_bill_to,renewal_status,last_updated,broker_commision_pct
0,P005796,C005854,Learning disability nurse,"Mcdonald, Yoder and Sexton Insurance Inc.",2006-09-17,2026-09-17,322.989075,Insured,Marketing,2025-11-28,13.97452
1,P007684,C005226,Regulatory affairs officer,Rivera-Ferguson Insurance Inc.,2006-12-14,2026-12-14,4084.34539,Agency Bill,New,2025-11-28,11.780687
2,P006212,C008711,Social researcher,King LLC Insurance Inc.,2006-02-25,2026-02-25,133.11533,Subsidiary,Canceled,2025-11-28,14.306389


In [249]:
clients.to_csv('fake_CRM_data/clients.csv', index=False)
policies.to_csv('fake_CRM_data/policies.csv', index=False)