### Fake Voter Record Generation

**Jan 16, 2025**

**Description:** Generating fake voter record and petition records for testing purposes.


In [1]:
# !pip install Faker

In [2]:
from faker import Faker
import pandas as pd
fake = Faker()
fake.unique.clear()
num_records = 100000


### Generating fake voter records

These are the "true" registered voters.

In [3]:
# generating names
names = [fake.unique.name() for i in range(num_records)]
first_names = [name.split(' ')[0] for name in names]
last_names = [' '.join(name.split(' ')[1:]) for name in names]

# generating street addresses
street_addresses = [fake.unique.street_address() for i in range(num_records)]
street_numbers = [address.split(' ')[0] for address in street_addresses]
street_names = [' '.join(address.split(' ')[1:]) for address in street_addresses]


In [4]:
# display street names
street_names[:10]


['Martin Island',
 'Kathryn Mall',
 'Tara Ports Apt. 314',
 'Landry Hills',
 'Susan Cliffs Suite 119',
 'Murphy Loaf',
 'Crystal Summit Suite 401',
 'Melissa Neck Suite 811',
 'Michael Mountains Suite 010',
 'Brooks Corners Apt. 910']

In [5]:
from pandas import DataFrame
df = DataFrame({'First_Name': first_names, 'Last_Name': last_names, 'Street_Number': street_numbers, 'Street_Name': street_names})
df['Street_Type'] = ' '
df['Street_Dir_Suffix'] = ' '
df.to_csv('fake_data_collection/fake_voter_records.csv', index=False)

df.head()

Unnamed: 0,First_Name,Last_Name,Street_Number,Street_Name,Street_Type,Street_Dir_Suffix
0,Erica,Massey,6071,Martin Island,,
1,Terry,Osborne,395,Kathryn Mall,,
2,David,Holmes,30154,Tara Ports Apt. 314,,
3,Michele,Ballard,310,Landry Hills,,
4,Mary,Wiggins,26734,Susan Cliffs Suite 119,,


#### Selecting 400 random rows

Select 400 random rows from the fake voter records. These are the "true" registered voters.

In [6]:
# select 400 random rows
df_true = df.sample(n=400, random_state=42)
# df_true.to_csv('fake_data_collection/voter_sample.csv', index=False)

df_true.head()

Unnamed: 0,First_Name,Last_Name,Street_Number,Street_Name,Street_Type,Street_Dir_Suffix
75721,Adam,Welch,5211,Shaw Wall,,
80184,Jody,Compton,37705,Raymond Gardens,,
19864,Jordan,Mcdonald,165,Madison Ramp,,
76699,Tanya,Wilkinson,813,Kimberly Cliff Apt. 762,,
92991,Tracie,Meyer,95915,Terry Ford Apt. 548,,


#### Generating spurious names and addresses

These are names and addresses that are not in the fake voter records.

In [7]:
# generating names not in fake voter records
spurious_names = [fake.unique.name() for i in range(100)]
spurious_addresses = [fake.unique.street_address() for i in range(100)]

spurious_first_names = [name.split(' ')[0] for name in spurious_names]
spurious_last_names = [' '.join(name.split(' ')[1:]) for name in spurious_names]

spurious_street_numbers = [address.split(' ')[0] for address in spurious_addresses]
spurious_street_names = [' '.join(address.split(' ')[1:]) for address in spurious_addresses]

df_spurious = DataFrame({'First_Name': spurious_first_names, 'Last_Name': spurious_last_names, 'Street_Number': spurious_street_numbers, 'Street_Name': spurious_street_names})
df_spurious['Street_Type'] = ' '
df_spurious['Street_Dir_Suffix'] = ' '
df_spurious.to_csv('fake_data_collection/spurious_signers.csv', index=False)

df_spurious.head()

Unnamed: 0,First_Name,Last_Name,Street_Number,Street_Name,Street_Type,Street_Dir_Suffix
0,Jared,Freeman,1147,Daniel Motorway Suite 235,,
1,April,White,279,Cheyenne Forges,,
2,Angelica,Terrell MD,191,Gutierrez Points Apt. 138,,
3,Kristen,Christensen,954,Todd Village,,
4,Ian,Kelly,810,Harrington Gardens,,


#### Creating a combined dataset


In [8]:

# Combine the fake voter records and the spurious names and addresses.
df_combined = pd.concat([df_true, df_spurious], ignore_index=True)
df_combined.to_csv('fake_data_collection/all_petition_signers.csv', index=False)

df_combined.head()

Unnamed: 0,First_Name,Last_Name,Street_Number,Street_Name,Street_Type,Street_Dir_Suffix
0,Adam,Welch,5211,Shaw Wall,,
1,Jody,Compton,37705,Raymond Gardens,,
2,Jordan,Mcdonald,165,Madison Ramp,,
3,Tanya,Wilkinson,813,Kimberly Cliff Apt. 762,,
4,Tracie,Meyer,95915,Terry Ford Apt. 548,,


In [9]:
df_combined

Unnamed: 0,First_Name,Last_Name,Street_Number,Street_Name,Street_Type,Street_Dir_Suffix
0,Adam,Welch,5211,Shaw Wall,,
1,Jody,Compton,37705,Raymond Gardens,,
2,Jordan,Mcdonald,165,Madison Ramp,,
3,Tanya,Wilkinson,813,Kimberly Cliff Apt. 762,,
4,Tracie,Meyer,95915,Terry Ford Apt. 548,,
...,...,...,...,...,...,...
495,Jeremy,Figueroa,3890,Edward Parkways,,
496,Richard,Klein,23923,Newton Pines Suite 528,,
497,Peter,Harvey,12239,Victoria Mountains Suite 294,,
498,George,Vargas Jr.,435,Nelson Fork,,
