# Generating Fake Data

It's often useful, especially in healthcare, to generate fake data in order to test your system while protecting the privacy of your production data sets.

In [1]:
## let's generate a fake person

In [2]:
person = {}

In [3]:
print(person)

{}


In [4]:
person['age'] = 28

In [5]:
print(person)

{'age': 28}


In [6]:
# let's generate a random age
import random
person['age'] = random.randrange(100)
print(person)

{'age': 2}


In [7]:
person['age'] = random.randrange(100)
print(person)

{'age': 42}


In [8]:
# in healthcare kids and elderly have special coverage so let's exclude both groups
person['age'] = random.randint(18, 65)
print(person)

{'age': 32}


In [9]:
person['employment'] = 'Full Time'
print(person)

{'age': 32, 'employment': 'Full Time'}


In [10]:
# let's set employment status
employment_status = ['Full Time', 'Part Time', 'Contract', 'Seasonal', 'Unemployed', 'Retired']
person['employment'] = random.choice(employment_status)
print(person)

{'age': 32, 'employment': 'Unemployed'}


In [11]:
person['employment'] = random.choice(employment_status)
print(person)

{'age': 32, 'employment': 'Retired'}


In [12]:
person['employment'] = random.choices(employment_status, [.5, .2, .1, .1, .05, .05])[0]
print(person)

{'age': 32, 'employment': 'Part Time'}


In [13]:
person['employment'] = random.choices(employment_status, [.5, .2, .1, .1, .05, .05])[0]
print(person)

{'age': 32, 'employment': 'Part Time'}


In [14]:
def generate_employment():
    employment_status = ['Full Time', 'Part Time', 'Contract', 'Seasonal', 'Unemployed', 'Retired']
    employment = random.choices(employment_status, [.5, .2, .1, .1, .05, .05])[0]
    return employment
    

In [15]:
# let's construct some fake addresses
def generate_address():
    street_number = random.randint(1, 100)
    street_name= random.choice(['Main', 'Bluff', 'Federal'])
    city = random.choice(['Pittsburgh', 'Cleveland' ])

    city_details = { 'Pittsburgh' : { 'zip': 15106, 'state': 'PA' },
                    'Cleveland' : { 'zip': 44101, 'state': 'OH' } }
    state = city_details[city]['zip']
    zip_code = city_details[city]['state']
    
    address = f"{street_number} {street_name}\n{city}, {state} {zip_code}"
    return address

In [16]:
address = generate_address()
print(address)

24 Federal
Pittsburgh, 15106 PA


In [17]:
def generate_name():
    first = ['Ben', 'Jen', 'Joan', 'John']
    last = ['Jones', 'Smith', 'Doe']
    return f"{random.choice(first)} {random.choice(last)}"

In [18]:
name = generate_name()
print(name)

John Doe


In [19]:
def generate_person():
    person = {}
    person['name'] = generate_name()
    person['address'] = generate_address()
    person['employment'] = generate_employment()
    return person

In [20]:
person = generate_person()
print(person)

{'name': 'Ben Jones', 'address': '12 Bluff\nCleveland, 44101 OH', 'employment': 'Seasonal'}


In [21]:
person = generate_person()
print(person)

{'name': 'John Jones', 'address': '64 Federal\nCleveland, 44101 OH', 'employment': 'Full Time'}


In [22]:
person = generate_person()
print(person)

{'name': 'Ben Jones', 'address': '47 Federal\nPittsburgh, 15106 PA', 'employment': 'Part Time'}
