# Random Data Generator

In [156]:
import pandas as pd
from faker import Faker
from random import random, randint, lognormvariate, normalvariate, seed, sample
from math import log
from datetime import datetime, timedelta

## Random data generators

### Clients Table

In [7]:
def random_names(n:int = 1) -> list:
    faker = Faker()
    return [faker.name() for _ in range(n)]

def random_client_ids(n:int = 1, random_state=None) -> list:
    if not (random_state is None):
        seed(random_state)
    faker= Faker()
    return [f'C00{randint(1000, 9999)}' for _ in range(n)]

def random_account_types(n:int = 1) -> list:
    return [['Personal', 'Commercial'][randint(0, 1)] for _ in range(n)]

def random_addresses(n:int = 1) -> list:
    faker = Faker()
    return [faker.address().replace('\n', ', ') for _ in range(n)]

def random_serving_broker_ids(n:int = 1) -> list:
    faker= Faker()
    return [f'S00{randint(1000, 9999)}' for _ in range(n)]

def random_phone_numbers(n:int = 1) -> list:
    faker= Faker()
    return [faker.phone_number() for _ in range(n)]

def random_emails(names: list) -> list:
    faker = Faker()
    op = []
    for name in names:
        email = faker.company_email()
        op.append('.'.join(name.lower().split()) + email[email.index('@'):])
    return op

def random_agency_branch_ids(n:int = 1, unique_branches=3) -> list:
    branches = [randint(100, 999) for _ in range(unique_branches)]
    return [f'B_US00{branches[randint(0, unique_branches-1)]}' for _ in range(n)]

def random_statuses(n:int = 1) -> list:
    return [['Active', 'Inactive', 'Prospect'][randint(0, 2)] for _ in range(n)]

def random_dates_of_creation(n:int = 1) -> list:
    faker = Faker()
    return [faker.date_between(datetime(2005, 1, 1), datetime(2007, 1, 1)) for _ in range(n)]

def random_dates_of_updation(today = datetime(2025, 1, 1), n:int = 1) -> list:
    return [today for _ in range(n)]

### Policies Table

In [8]:
def random_policy_ids(n:int = 1) -> list:
    faker= Faker()
    return [f'P00{randint(1000, 9999)}' for _ in range(n)]

def random_job_title(n:int = 1) -> list:
    faker = Faker()
    return [faker.job() for _ in range(n)]

def random_insurer(n:int = 1) -> list:
    faker = Faker()
    return [f'{faker.company()} Insurance Inc.' for _ in range(n)]

def random_expiry_dates(issue_dates, term=20) -> list:
    op = []
    for date in issue_dates:
        op.append(datetime(date.year+term, date.month, date.day))
    return op

def random_premium_amounts(n:int = 1) -> list:
    premiums = []

    for _ in range(n):
        # 70% Term Life, 30% Whole Life
        if random() < 0.7:
            # Term Life: median ~350, moderate spread
            mean = log(350)
            sigma = 0.5
        else:
            # Whole Life: median ~2500, large spread
            mean = log(2500)
            sigma = 0.7

        premium = lognormvariate(mean, sigma)
        premiums.append(premium)

    return premiums

def random_premium_bill_to(n:int = 1) -> list:
    return [[
        "Direct Bill",
        "Agency Bill",
        "Insured",
        "Premium Finance Company",
        "Mortgagee",
        "Parent Company",
        "Subsidiary",
        "Third-Party Billing",
        "Payroll Vendor"
    ][randint(0, 8)] for _ in range(n)]

def random_renewal_status(n:int = 1) -> list:
    return[[
        'New', 
        'Canceled', 
        'Marketing', 
        'Renewal Quoted'
    ][randint(0, 3)] for _ in range(n)]

def random_broker_commisions(n:int = 1) -> list:
    return [normalvariate(11.5, 4) for _ in range(n)]

### MS Users Table

In [17]:
def random_department(n:int = 1) -> list:
    faker = Faker()
    return [faker.department() for _ in range(n)]

def random_location(n:int = 1) -> list:
    faker = Faker()
    return [faker.city() for _ in range(n)]

### MS CalendarEvents Table

In [132]:
def random_event_ids(n:int = 1) -> list:
    faker = Faker()
    return [f'E{faker.date_this_year().year}00{randint(100, 999)}' for _ in range(n)]

def random_meeting_titles(n:int = 1) -> list:
    def sample_space() -> list:
        faker = Faker()
        return [
            f'Policy Renewal Review : {faker.company()}',
            f'Client Onboarding: {faker.company()} : Kickoff Call',
            f'Broker-Client Check-in : Renewal Discussion',
            f'Underwriting Submission Call : XYZ Logistics',
            f'Quarterly Account Review : Top Clients',
            f'Annual Premium Negotiation – {faker.company()}',
            f'Risk Assessment Workshop with Underwriting Team',
            f'Q3 Portfolio Performance Review – Key Accounts',
            f'Internal Sync: Renewal Pipeline Status Update',
            f'Claims Trend Analysis – High-Risk Clients'
            ]
    return [sample(sample_space(), 1)[0] for _ in range(n)]

def random_datetime(today: datetime, n:int = 1) -> list:
    faker = Faker()
    return [str(faker.date_time_between(today, datetime(today.year+1, today.month, today.day, 16, 50, 0, 0))) for _ in range(n)]


def random_meeting_location(n:int = 1) -> list:
    def sample_space() -> list:
        faker = Faker()
        return ['online']*80 + [faker.city() for x in range(20)]
    return [sample(sample_space(), 1)[0] for _ in range(n)]

def random_meeting_attendees(n:int = 1) -> list:
    names = random_names(randint(2, 10))
    return [{'names':names, 'emails':random_emails(names)} for _ in range(n)]

def random_meeting_id() -> str:
    def letter() -> str:
        return chr(randint(ord('a'), ord('z')))
    op = ''
    for i in range(3):
        for _ in range(3):
            op += letter()
        op += '-'

    return op[:-1]

def random_meeting_urls(locations: list) -> list:
    op = []
    for location in locations:
        if location == 'online':
            op += ['https://fakemsteams.com/meet/' + random_meeting_id()]
        else:
            op += [None]
    return op

def random_org_emails(names: list) -> list:
    faker = Faker()
    op = []
    email = faker.company_email()
    for name in names:
        op.append('.'.join(name.lower().split()) + email[email.index('@'):])
    return op

def random_ms365_ids(n:int = 1) -> list:
    return [f'MSU00{randint(1000, 9999)}' for _ in range(n)]


### E-Mails Table

In [177]:
def random_message_ids(dates: list) -> list:
    faker=Faker()
    return [f'MESS{date}00{randint(1000, 9999)}' for date in dates]

def random_dates_this_year(n:int = 1) -> list:
    faker=Faker()
    return [faker.date_this_year() for _ in range(n)]

def random_dates_this_year_after(dates: list) -> list:
    return [date + timedelta(days=1) for date in dates]

def random_email_subjects(n:int = 1) -> list:
    faker = Faker()
    def sample_space():
        return [f'Updated Premium Quotes for {faker.company()} – Review Needed',
        f'Client Claim Status: Requesting Supporting Documents',
        f'Policy Renewal Reminders for This Week',
        f'Meeting Agenda: New IRDAI Compliance Guidelines',
        f'Need Clarification on Endorsement Changes for Motor Fleet Policy',
        f'Follow-Up: Health Insurance Proposal Sent to Client',
        f'Urgent: Underwriter Feedback on Fire Insurance Application',
        f'Reconciliation of Commission Statements – {faker.date_this_year().month}',
        f'Request for KYC/Business Proofs for New Corporate Client',
        f'Internal Review: Claim Settlement Delay – Escalation Required']
    return [sample(sample_space(), 1)[0] for _ in range(n)]

def random_read_state(n:int = 1) -> list:
    sample_space = ['read']*80 + ['unread']*20
    return [sample(sample_space, 1)[0] for _ in range(n)]

def random_attachment_state(n:int = 1) -> list:
    sample_space = [True]*80 + [False]*20
    return [sample(sample_space, 1)[0] for _ in range(n)]

In [163]:
random_read_state(10)

['read',
 'read',
 'unread',
 'read',
 'unread',
 'unread',
 'read',
 'read',
 'read',
 'read']

## Clients Table Creation

In [87]:
n = 1000
today = datetime(2025, 11, 28)
names = random_names(n)
clientIds = random_client_ids(n, random_state=69)
issueDates = random_dates_of_creation(n)
updtationDates = random_dates_of_updation(today=today, n=n)
term = 20
branches = 10
clients = pd.DataFrame({
    'client_name': names,
    'client_id': clientIds,
    'account_type': random_account_types(n),
    'address': random_addresses(n),
    'phone_number': random_phone_numbers(n),
    'email': random_emails(names),
    'branch_id': random_agency_branch_ids(n, branches),
    'servicing_broker_id': random_serving_broker_ids(n),
    'status': random_statuses(n),
    'issued_on': issueDates,
    'updated_on': updtationDates
})

In [105]:
clients.tail(3)

Unnamed: 0,client_name,client_id,account_type,address,phone_number,email,branch_id,servicing_broker_id,status,issued_on,updated_on
997,Darlene Madden,C006921,Personal,"322 Arnold Roads, Lake Donald, PR 68141",816-381-5991x65155,darlene.madden@rocha.com,B_US00638,S005780,Active,2005-08-14,2025-11-28
998,Hannah Jones,C007082,Commercial,"144 Laura Mills, Roweport, MH 88623",962-595-8629,hannah.jones@spence-harris.com,B_US00961,S008081,Prospect,2005-10-12,2025-11-28
999,Keith Silva,C008506,Commercial,"1756 Amy Crescent Apt. 777, South Richardburgh...",001-593-900-7643x4541,keith.silva@williams-gomez.com,B_US00590,S002544,Prospect,2006-04-17,2025-11-28


## Policies Table Creation

In [89]:
policies = pd.DataFrame({
    'policy_id':random_policy_ids(n),
    'client_id': clientIds,
    'line_of_business': random_job_title(n),
    'issuing_carrier': random_insurer(n),
    'issued_on': issueDates,
    'expiry_date': random_expiry_dates(issue_dates=issueDates, term=term),
    'premium_amount': random_premium_amounts(n),
    'premium_bill_to': random_premium_bill_to(n),
    'renewal_status': random_renewal_status(n),
    'last_updated': updtationDates,
    'broker_commision_pct': random_broker_commisions(n),
})

In [90]:
policies.head(3)

Unnamed: 0,policy_id,client_id,line_of_business,issuing_carrier,issued_on,expiry_date,premium_amount,premium_bill_to,renewal_status,last_updated,broker_commision_pct
0,P003944,C001611,"Education officer, museum","Stewart, Carson and Mckay Insurance Inc.",2006-06-05,2026-06-05,4398.737534,Premium Finance Company,New,2025-11-28,15.536213
1,P001615,C002570,"Engineer, biomedical",Garcia-Ballard Insurance Inc.,2005-08-03,2025-08-03,693.891941,Insured,Marketing,2025-11-28,18.76672
2,P004749,C003734,Technical brewer,Nelson PLC Insurance Inc.,2005-02-26,2025-02-26,220.460744,Mortgagee,Renewal Quoted,2025-11-28,18.419806


In [91]:
clients.to_csv('fake_CRM_data/clients.csv', index=False)
policies.to_csv('fake_CRM_data/policies.csv', index=False)

## MS Users table Creation

In [181]:
broker_count = 10
brokers = random_names(broker_count)
ms_users = pd.DataFrame({
    'ms365_id':random_ms365_ids(broker_count),
    'name': brokers,
    'e_mail': random_org_emails(brokers),
    'job_title':sample(['branch manager']*2 + 8*['insurance broker'], broker_count),
    'office_location': random_location(2)*(broker_count//2),
    'mobile_phone':random_phone_numbers(broker_count),
    
})

In [182]:
ms_users.head(30)

Unnamed: 0,ms365_id,name,e_mail,job_title,office_location,mobile_phone
0,MSU009483,Jason Schmitt,jason.schmitt@adams-velazquez.net,insurance broker,South Luiston,735-911-7559
1,MSU009084,William Thomas,william.thomas@adams-velazquez.net,insurance broker,Washingtonside,443.824.8697x3537
2,MSU004354,Colleen Rice,colleen.rice@adams-velazquez.net,insurance broker,South Luiston,(202)920-6161x71596
3,MSU004177,Alexis Gilbert,alexis.gilbert@adams-velazquez.net,insurance broker,Washingtonside,(961)316-3580x5344
4,MSU007206,Yolanda Davies,yolanda.davies@adams-velazquez.net,insurance broker,South Luiston,854.314.0479x6819
5,MSU005770,Sheila Smith,sheila.smith@adams-velazquez.net,insurance broker,Washingtonside,001-833-802-2083x148
6,MSU007758,Michael Baker,michael.baker@adams-velazquez.net,insurance broker,South Luiston,496-540-0656
7,MSU002813,Joseph Burke,joseph.burke@adams-velazquez.net,insurance broker,Washingtonside,(218)438-6448x81439
8,MSU005012,Paul Graves,paul.graves@adams-velazquez.net,branch manager,South Luiston,001-911-598-9369x5756
9,MSU006085,Linda Johnson,linda.johnson@adams-velazquez.net,branch manager,Washingtonside,796.360.0017


## MS Calendar Events Table Creation

In [183]:
events_count = 30
locations = random_meeting_location(events_count)
brokers_sset = ms_users.sample(events_count, replace=True)
calendar_events = pd.DataFrame({
    'event_id': random_event_ids(events_count),
    'subject': random_meeting_titles(events_count),
    'organizer_name': brokers_sset.name.to_list(),
    'organizer_email': brokers_sset.e_mail.to_list(),
    'start_time': random_datetime(today=today, n=events_count),
    'location': locations,
    'attendees': random_meeting_attendees(events_count),
    'meeting_url': random_meeting_urls(locations)  
})

In [184]:
calendar_events.head(3)

Unnamed: 0,event_id,subject,organizer_name,organizer_email,start_time,location,attendees,meeting_url
0,E202500168,Internal Sync: Renewal Pipeline Status Update,Michael Baker,michael.baker@adams-velazquez.net,2026-07-18 17:58:26.639919,online,"{'names': ['Amanda Ortiz', 'Cheryl Miller', 'S...",https://fakemsteams.com/meet/jyd-yxg-cqu
1,E202500349,Client Onboarding: Brewer LLC : Kickoff Call,Colleen Rice,colleen.rice@adams-velazquez.net,2026-10-01 21:32:57.253947,online,"{'names': ['Amanda Ortiz', 'Cheryl Miller', 'S...",https://fakemsteams.com/meet/ocx-vys-jgo
2,E202500625,"Client Onboarding: Hill, Edwards and Morton : ...",Yolanda Davies,yolanda.davies@adams-velazquez.net,2026-11-12 21:16:55.453666,online,"{'names': ['Amanda Ortiz', 'Cheryl Miller', 'S...",https://fakemsteams.com/meet/zgk-tyc-llk


## MS E-Mails Table Creation

In [185]:
def random_broker_sset(cnt):
    return ms_users.sample(cnt, replace=True)

email_count = 40
fromBrokers = random_broker_sset(email_count)
toBrokers = [','.join(random_broker_sset(randint(1, 5)).e_mail) for _ in range(email_count)]
cc = [','.join(random_broker_sset(randint(1, 5)).e_mail) for _ in range(email_count)]
sentDates = random_dates_this_year(email_count)
emails = pd.DataFrame({
    'message_id': random_message_ids(sentDates),
    'received_date': random_dates_this_year_after(sentDates),
    'sent_date': sentDates,
    'subject': random_email_subjects(email_count),
    'from_name': fromBrokers.name.tolist(),
    'from_email': fromBrokers.e_mail.to_list(),
    'to_email': toBrokers,
    'cc': cc,
    'read_status': random_read_state(email_count),
    'hasAttachment': random_attachment_state(email_count)
    })

In [186]:
emails.head(3)

Unnamed: 0,message_id,received_date,sent_date,subject,from_name,from_email,to_email,cc,read_status,hasAttachment
0,MESS2025-02-06004183,2025-02-07,2025-02-06,Need Clarification on Endorsement Changes for ...,William Thomas,william.thomas@adams-velazquez.net,"joseph.burke@adams-velazquez.net,william.thoma...",alexis.gilbert@adams-velazquez.net,read,False
1,MESS2025-03-23005417,2025-03-24,2025-03-23,Urgent: Underwriter Feedback on Fire Insurance...,Michael Baker,michael.baker@adams-velazquez.net,"yolanda.davies@adams-velazquez.net,joseph.burk...","jason.schmitt@adams-velazquez.net,joseph.burke...",read,True
2,MESS2025-01-01007443,2025-01-02,2025-01-01,"Updated Premium Quotes for Smith, Carrillo and...",Alexis Gilbert,alexis.gilbert@adams-velazquez.net,"jason.schmitt@adams-velazquez.net,joseph.burke...","joseph.burke@adams-velazquez.net,jason.schmitt...",read,True


In [187]:
ms_users.to_csv('teams_users.csv', index=False)
calendar_events.to_csv('onedrive_calendar_events.csv', index=False)
emails.to_csv('emails.csv', index=False)