# Random Data Generator

In [3]:
import pandas as pd
from faker import Faker
from random import random, randint, lognormvariate, normalvariate, seed, sample
from math import log
from datetime import datetime, timedelta

## Random data generators

### Clients Table

In [22]:
def random_names(n:int = 1) -> list:
    faker = Faker()
    return [faker.name() for _ in range(n)]

def random_client_ids(n:int = 1, random_state=None) -> list:
    if not (random_state is None):
        seed(random_state)
    faker= Faker()
    ids = []
    for _ in range(n):
        randID = f'C00{randint(1000, 9999)}'
        while randID in ids:
            randID = f'C00{randint(1000, 9999)}'
        ids.append(randID)
    return ids

def random_account_types(n:int = 1) -> list:
    return [['Personal', 'Commercial'][randint(0, 1)] for _ in range(n)]

def random_addresses(n:int = 1) -> list:
    faker = Faker()
    return [faker.address().replace('\n', ', ') for _ in range(n)]

def random_serving_broker_ids(n:int = 1) -> list:
    faker= Faker()
    return [f'S00{randint(1000, 9999)}' for _ in range(n)]

def random_phone_numbers(n:int = 1) -> list:
    faker= Faker()
    return [faker.phone_number() for _ in range(n)]

def random_emails(names: list) -> list:
    faker = Faker()
    op = []
    for name in names:
        email = faker.company_email()
        op.append('.'.join(name.lower().split()) + email[email.index('@'):])
    return op

def random_agency_branch_ids(n:int = 1, unique_branches=3) -> list:
    branches = [randint(100, 999) for _ in range(unique_branches)]
    return [f'B_US00{branches[randint(0, unique_branches-1)]}' for _ in range(n)]

def random_statuses(n:int = 1) -> list:
    return [['Active', 'Inactive', 'Prospect'][randint(0, 2)] for _ in range(n)]

def random_dates_of_creation(today: datetime, term: int | float, n:int = 1) -> list:
    faker = Faker()
    range_issue = 365.25
    days = term*365.25
    return [faker.date_between(today - timedelta(days=(days+range_issue)), today - timedelta(days=days)) for _ in range(n)]

def random_dates_of_updation(today = datetime(2025, 1, 1), n:int = 1) -> list:
    return [today for _ in range(n)]

### Policies Table

In [23]:
def random_policy_ids(n:int = 1, random_state=None) -> list:
    if not (random_state is None):
        seed(random_state)
    faker= Faker()
    ids = []
    for _ in range(n):
        randID = f'P00{randint(1000, 9999)}'
        while randID in ids:
            randID = f'P00{randint(1000, 9999)}'
        ids.append(randID)
    return ids

def random_job_title(n:int = 1) -> list:
    faker = Faker()
    return [faker.job() for _ in range(n)]

def random_insurer(n:int = 1) -> list:
    faker = Faker()
    return [f'{faker.company()} Insurance Inc.' for _ in range(n)]

def random_expiry_dates(issue_dates, term=20) -> list:
    op = []
    for date in issue_dates:
        op.append(date + timedelta(days=365.25*term))
    return op

def random_premium_amounts(n:int = 1) -> list:
    premiums = []

    for _ in range(n):
        # 70% Term Life, 30% Whole Life
        if random() < 0.7:
            # Term Life: median ~350, moderate spread
            mean = log(350)
            sigma = 0.5
        else:
            # Whole Life: median ~2500, large spread
            mean = log(2500)
            sigma = 0.7

        premium = lognormvariate(mean, sigma)
        premiums.append(premium)

    return premiums

def random_premium_bill_to(n:int = 1) -> list:
    return [[
        "Direct Bill",
        "Agency Bill",
        "Insured",
        "Premium Finance Company",
        "Mortgagee",
        "Parent Company",
        "Subsidiary",
        "Third-Party Billing",
        "Payroll Vendor"
    ][randint(0, 8)] for _ in range(n)]

def random_renewal_status(n:int = 1) -> list:
    return[[
        'New', 
        'Canceled', 
        'Marketing', 
        'Renewal Quoted'
    ][randint(0, 3)] for _ in range(n)]

def random_broker_commisions(n:int = 1) -> list:
    return [normalvariate(11.5, 4) for _ in range(n)]

### MS Users Table

In [24]:
def random_department(n:int = 1) -> list:
    faker = Faker()
    return [faker.department() for _ in range(n)]

def random_location(n:int = 1) -> list:
    faker = Faker()
    return [faker.city() for _ in range(n)]

### MS CalendarEvents Table

In [25]:
def random_event_ids(n:int = 1, random_state=None) -> list:
    if not (random_state is None):
        seed(random_state)
    faker= Faker()
    ids = []
    for _ in range(n):
        randID = f'E{faker.date_this_year().year}00{randint(100, 999)}'
        while randID in ids:
            randID = f'E{faker.date_this_year().year}00{randint(100, 999)}'
        ids.append(randID)
    return ids


def random_meeting_titles(n:int = 1) -> list:
    def sample_space() -> list:
        faker = Faker()
        return [
            f'Policy Renewal Review : {faker.company()}',
            f'Client Onboarding: {faker.company()} : Kickoff Call',
            f'Broker-Client Check-in : Renewal Discussion',
            f'Underwriting Submission Call : XYZ Logistics',
            f'Quarterly Account Review : Top Clients',
            f'Annual Premium Negotiation – {faker.company()}',
            f'Risk Assessment Workshop with Underwriting Team',
            f'Q3 Portfolio Performance Review – Key Accounts',
            f'Internal Sync: Renewal Pipeline Status Update',
            f'Claims Trend Analysis – High-Risk Clients'
            ]
    return [sample(sample_space(), 1)[0] for _ in range(n)]

def random_datetime(today: datetime, n:int = 1) -> list:
    faker = Faker()
    return [str(faker.date_time_between(today, datetime(today.year+1, today.month, today.day, 16, 50, 0, 0))) for _ in range(n)]


def random_meeting_location(n:int = 1) -> list:
    def sample_space() -> list:
        faker = Faker()
        return ['online']*80 + [faker.city() for x in range(20)]
    return [sample(sample_space(), 1)[0] for _ in range(n)]

def random_meeting_attendees(n:int = 1) -> list:
    names = random_names(randint(2, 10))
    return [{'names':names, 'emails':random_emails(names)} for _ in range(n)]

def random_meeting_id() -> str:
    def letter() -> str:
        return chr(randint(ord('a'), ord('z')))
    op = ''
    for i in range(3):
        for _ in range(3):
            op += letter()
        op += '-'

    return op[:-1]

def random_meeting_urls(locations: list) -> list:
    op = []
    for location in locations:
        if location == 'online':
            op += ['https://fakemsteams.com/meet/' + random_meeting_id()]
        else:
            op += [None]
    return op

def random_org_emails(names: list) -> list:
    faker = Faker()
    op = []
    email = faker.company_email()
    for name in names:
        op.append('.'.join(name.lower().split()) + email[email.index('@'):])
    return op

def random_ms365_ids(n:int = 1) -> list:
    return [f'MSU00{randint(1000, 9999)}' for _ in range(n)]


### E-Mails Table

In [26]:
def random_message_ids(dates: list) -> list:
    faker=Faker()
    return [f'MESS{date}00{randint(1000, 9999)}' for date in dates]

def random_dates_this_year(n:int = 1) -> list:
    faker=Faker()
    return [faker.date_this_year() for _ in range(n)]

def random_dates_this_year_after(dates: list) -> list:
    return [date + timedelta(days=1) for date in dates]

def random_email_subjects(n:int = 1) -> list:
    faker = Faker()
    def sample_space():
        return [f'Updated Premium Quotes for {faker.company()} – Review Needed',
        f'Client Claim Status: Requesting Supporting Documents',
        f'Policy Renewal Reminders for This Week',
        f'Meeting Agenda: New IRDAI Compliance Guidelines',
        f'Need Clarification on Endorsement Changes for Motor Fleet Policy',
        f'Follow-Up: Health Insurance Proposal Sent to Client',
        f'Urgent: Underwriter Feedback on Fire Insurance Application',
        f'Reconciliation of Commission Statements – {faker.date_this_year().month}',
        f'Request for KYC/Business Proofs for New Corporate Client',
        f'Internal Review: Claim Settlement Delay – Escalation Required']
    return [sample(sample_space(), 1)[0] for _ in range(n)]

def random_read_state(n:int = 1) -> list:
    sample_space = ['read']*80 + ['unread']*20
    return [sample(sample_space, 1)[0] for _ in range(n)]

def random_attachment_state(n:int = 1) -> list:
    sample_space = [True]*80 + [False]*20
    return [sample(sample_space, 1)[0] for _ in range(n)]

In [27]:
random_read_state(10)

['read',
 'read',
 'read',
 'read',
 'read',
 'unread',
 'unread',
 'read',
 'unread',
 'read']

## Clients Table Creation

In [28]:
n = 1000
today = datetime(2025, 11, 28)
term = 20
names = random_names(n)
clientIds = random_client_ids(n, random_state=69)
issueDates = random_dates_of_creation(today=today, term=term, n=n)
updtationDates = random_dates_of_updation(today=today, n=n)
branches = 10
clients = pd.DataFrame({
    'client_name': names,
    'client_id': clientIds,
    'account_type': random_account_types(n),
    'address': random_addresses(n),
    'phone_number': random_phone_numbers(n),
    'email': random_emails(names),
    'branch_id': random_agency_branch_ids(n, branches),
    'servicing_broker_id': random_serving_broker_ids(n),
    'status': random_statuses(n),
    'issued_on': issueDates,
    'updated_on': updtationDates
})

In [29]:
clients.tail(3)

Unnamed: 0,client_name,client_id,account_type,address,phone_number,email,branch_id,servicing_broker_id,status,issued_on,updated_on
997,Shannon Pham,C007388,Personal,"Unit 3312 Box 3933, DPO AA 32343",001-748-973-2840x38451,shannon.pham@dixon.com,B_US00436,S008765,Prospect,2005-01-18,2025-11-28
998,Patricia Cole,C009407,Personal,"3278 Brewer Hill, Jamesmouth, IA 52820",(300)290-7799x9062,patricia.cole@griffin.com,B_US00833,S004676,Inactive,2005-05-21,2025-11-28
999,Jasmine West,C007407,Personal,"86152 Bell Highway Suite 749, North Kimberlyvi...",(723)290-1570,jasmine.west@carrillo.net,B_US00377,S008983,Inactive,2005-03-17,2025-11-28


## Policies Table Creation

In [30]:
policies = pd.DataFrame({
    'policy_id':random_policy_ids(n),
    'client_id': clientIds,
    'line_of_business': random_job_title(n),
    'issuing_carrier': random_insurer(n),
    'issued_on': issueDates,
    'expiry_date': random_expiry_dates(issue_dates=issueDates, term=term),
    'premium_amount': random_premium_amounts(n),
    'premium_bill_to': random_premium_bill_to(n),
    'renewal_status': random_renewal_status(n),
    'last_updated': updtationDates,
    'broker_commision_pct': random_broker_commisions(n),
})

In [31]:
policies.head(3)

Unnamed: 0,policy_id,client_id,line_of_business,issuing_carrier,issued_on,expiry_date,premium_amount,premium_bill_to,renewal_status,last_updated,broker_commision_pct
0,P008414,C001611,"Pharmacist, hospital",Ortiz PLC Insurance Inc.,2005-09-27,2025-09-27,1164.561809,Direct Bill,Canceled,2025-11-28,17.694337
1,P007998,C002570,Veterinary surgeon,Smith and Sons Insurance Inc.,2005-08-10,2025-08-10,2175.399539,Agency Bill,New,2025-11-28,9.411621
2,P004008,C003734,Occupational hygienist,"Martinez, Coleman and Ford Insurance Inc.",2005-08-27,2025-08-27,931.708588,Insured,Canceled,2025-11-28,19.817233


In [32]:
clients.to_csv('fake_CRM_data/clients.csv', index=False)
policies.to_csv('fake_CRM_data/policies.csv', index=False)

## MS Users table Creation

In [33]:
broker_count = 10
brokers = random_names(broker_count)
ms_users = pd.DataFrame({
    'ms365_id':random_ms365_ids(broker_count),
    'name': brokers,
    'e_mail': random_org_emails(brokers),
    'job_title':sample(['branch manager']*2 + 8*['insurance broker'], broker_count),
    'office_location': random_location(2)*(broker_count//2),
    'mobile_phone':random_phone_numbers(broker_count),
    
})

In [34]:
ms_users.head(30)

Unnamed: 0,ms365_id,name,e_mail,job_title,office_location,mobile_phone
0,MSU003475,Sherry Howard,sherry.howard@mcbride-willis.org,insurance broker,Johnsonshire,+1-751-272-7351x8961
1,MSU005098,Megan Jones,megan.jones@mcbride-willis.org,insurance broker,Port Amanda,(977)847-0481
2,MSU005029,Jordan Smith,jordan.smith@mcbride-willis.org,insurance broker,Johnsonshire,001-609-218-4524x2409
3,MSU006978,Robert Patterson,robert.patterson@mcbride-willis.org,insurance broker,Port Amanda,001-268-602-1045
4,MSU009448,Mr. Daniel Wade,mr..daniel.wade@mcbride-willis.org,insurance broker,Johnsonshire,+1-784-707-0761
5,MSU001791,Casey Moore,casey.moore@mcbride-willis.org,branch manager,Port Amanda,8398828771
6,MSU006119,Holly Pham,holly.pham@mcbride-willis.org,branch manager,Johnsonshire,(606)735-9212x3765
7,MSU009547,Nicholas David,nicholas.david@mcbride-willis.org,insurance broker,Port Amanda,(545)228-5606x6312
8,MSU002922,Joseph Cox,joseph.cox@mcbride-willis.org,insurance broker,Johnsonshire,+1-258-960-4340x37154
9,MSU009978,Samuel Harris,samuel.harris@mcbride-willis.org,insurance broker,Port Amanda,+1-764-562-5930x55246


## MS Calendar Events Table Creation

In [35]:
events_count = 30
locations = random_meeting_location(events_count)
brokers_sset = ms_users.sample(events_count, replace=True)
calendar_events = pd.DataFrame({
    'event_id': random_event_ids(events_count),
    'subject': random_meeting_titles(events_count),
    'organizer_name': brokers_sset.name.to_list(),
    'organizer_email': brokers_sset.e_mail.to_list(),
    'start_time': random_datetime(today=today, n=events_count),
    'location': locations,
    'attendees': random_meeting_attendees(events_count),
    'meeting_url': random_meeting_urls(locations)  
})

In [36]:
calendar_events.head(3)

Unnamed: 0,event_id,subject,organizer_name,organizer_email,start_time,location,attendees,meeting_url
0,E202500282,Q3 Portfolio Performance Review – Key Accounts,Jordan Smith,jordan.smith@mcbride-willis.org,2026-09-01 09:00:53,online,"{'names': ['Joshua Adams', 'Kevin Turner', 'Da...",https://fakemsteams.com/meet/iqd-asi-ula
1,E202500175,Policy Renewal Review : Collier-Leon,Nicholas David,nicholas.david@mcbride-willis.org,2026-03-13 01:46:29,online,"{'names': ['Joshua Adams', 'Kevin Turner', 'Da...",https://fakemsteams.com/meet/heq-nih-frp
2,E202500779,Quarterly Account Review : Top Clients,Samuel Harris,samuel.harris@mcbride-willis.org,2026-05-24 13:11:11,online,"{'names': ['Joshua Adams', 'Kevin Turner', 'Da...",https://fakemsteams.com/meet/dep-ldz-sdu


## MS E-Mails Table Creation

In [37]:
def random_broker_sset(cnt):
    return ms_users.sample(cnt, replace=True)

email_count = 40
fromBrokers = random_broker_sset(email_count)
toBrokers = [','.join(random_broker_sset(randint(1, 5)).e_mail) for _ in range(email_count)]
cc = [','.join(random_broker_sset(randint(1, 5)).e_mail) for _ in range(email_count)]
sentDates = random_dates_this_year(email_count)
emails = pd.DataFrame({
    'message_id': random_message_ids(sentDates),
    'received_date': random_dates_this_year_after(sentDates),
    'sent_date': sentDates,
    'subject': random_email_subjects(email_count),
    'from_name': fromBrokers.name.tolist(),
    'from_email': fromBrokers.e_mail.to_list(),
    'to_email': toBrokers,
    'cc': cc,
    'read_status': random_read_state(email_count),
    'hasAttachment': random_attachment_state(email_count)
    })

In [38]:
emails.head(3)

Unnamed: 0,message_id,received_date,sent_date,subject,from_name,from_email,to_email,cc,read_status,hasAttachment
0,MESS2025-01-31001907,2025-02-01,2025-01-31,Need Clarification on Endorsement Changes for ...,Casey Moore,casey.moore@mcbride-willis.org,"samuel.harris@mcbride-willis.org,sherry.howard...",holly.pham@mcbride-willis.org,read,True
1,MESS2025-05-01006770,2025-05-02,2025-05-01,Request for KYC/Business Proofs for New Corpor...,Samuel Harris,samuel.harris@mcbride-willis.org,"sherry.howard@mcbride-willis.org,robert.patter...","robert.patterson@mcbride-willis.org,jordan.smi...",read,True
2,MESS2025-04-15004711,2025-04-16,2025-04-15,Urgent: Underwriter Feedback on Fire Insurance...,Casey Moore,casey.moore@mcbride-willis.org,"joseph.cox@mcbride-willis.org,jordan.smith@mcb...","samuel.harris@mcbride-willis.org,sherry.howard...",read,True


In [39]:
ms_users.to_csv('fake_MS_Teams_Data/teams_users.csv', index=False)
calendar_events.to_csv('fake_MS_Teams_Data/onedrive_calendar_events.csv', index=False)
emails.to_csv('fake_MS_Teams_Data/emails.csv', index=False)