In [1]:
!pip install Faker



In [2]:
import random
from datetime import datetime, timedelta
import pandas as pd
from faker import Faker

fake = Faker()

def random_date(start, end):
    return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))

# Specify the number of applicants and starting applicant ID
num_applicants = 100
start_applicant_id = 1

# Constants representing age limits
AGE_OF_ADULTHOOD = 18
AGE_OF_RETIREMENT = 65

# Constants representing state and city match percentages
STATE_MATCH_PERCENTAGE = 0.8
CITY_MATCH_PERCENTAGE = 0.2

# Define area codes for relevant states
area_codes = {
    'New York': ['212', '315', '518', '585', '631', '716', '845', '914'],
    'California': ['209', '213', '310', '323', '408', '415', '510', '530', '559', '562', '619', '626', '650', '707', '714', '760', '805', '818', '831', '858', '909', '916', '925', '949', '951'],
    'Texas': ['210', '214', '254', '281', '325', '346', '361', '409', '430', '432', '469', '512', '682', '713', '737', '806', '817', '830', '832', '903', '915', '936', '940', '956', '972', '979'],
    # All 50 States will be added
}

# Creating the Passport Application Dataset
applicant_data = {
    'applicant_id': range(start_applicant_id, start_applicant_id + num_applicants),
    'last_name': [],
    'first_name': [],
    'middle_name': [],
    'ssn': [],
    'date_of_birth': [],
    'gender': [],
    'place_of_birth': [],
    'contact_phone_number': [],
    'occupation': [],
    'employer_or_school': [],
    'height_ft': [],
    'height_in': [],
    'hair_color': [],
    'eye_color': [],
    'permanent_address': [],
    'marital_status': [],
    'spouse_date_of_marriage': [],
    'spouse_full_name': [],
    'spouse_place_of_birth': [],
    'spouse_date_of_birth': [],
    'spouse_us_citizen': [],
    'widow_or_divorced': [],
    'widow_divorce_date': [],
    'departure_date': [],
    'arrival_date': [],
    'travel_countries': [],
    'emergency_contact_name': [],
    'emergency_contact_city': [],
    'emergency_contact_state': [],
    'emergency_contact_zip_code': [],
    'emergency_contact_phone_number': [],
    'emergency_contact_relationship': [],
    'emergency_contact_address': [],
    'previous_passport_name': [],
    'previous_passport_number': [],
    'previous_passport_issue_date': [],
    'previous_passport_status': [],
    'father_last_name': [],
    'father_first_name': [],
    'father_middle_name': [],
    'father_place_of_birth': [],
    'father_gender': [],
    'father_us_citizen': [],
    'mother_last_name': [],
    'mother_first_name': [],
    'mother_middle_name': [],
    'mother_place_of_birth': [],
    'mother_gender': [],
    'mother_us_citizen': [],
    'has_same_address': []  # New column for indicating if applicant has same address as parents
}

start_date = datetime(1960, 1, 1)
end_date = datetime.now()

hair_colors = ['Black', 'Brown', 'Blonde', 'Red']
eye_colors = ['Brown', 'Blue', 'Green']
countries = ['USA', 'Canada', 'UK', 'France', 'Germany']
marital_statuses = ['Single', 'Married', 'Widowed', 'Divorced']
us_citizen_options = ['Yes', 'No']
widow_divorced_options = ['Yes', 'No']
relationships = ['Parent', 'Sibling', 'Friend', 'Spouse']

# Generate the country for the family to be born in
family_country = 'USA'

for applicant in range(num_applicants):
    # Generate unique last names for parents
    father_last_name = fake.unique.last_name_male()
    mother_last_name = fake.unique.last_name_female()

    # Generate gender for the applicant
    gender = random.choice(['Male', 'Female'])

    # Generate unique first names based on gender
    if gender == 'Male':
        father_first_name = fake.unique.first_name_male()
        father_middle_name = fake.first_name_male()
        mother_first_name = fake.unique.first_name_female()
        mother_middle_name = fake.first_name_female()
    else:
        father_first_name = fake.unique.first_name_female()
        father_middle_name = fake.first_name_female()
        mother_first_name = fake.unique.first_name_male()
        mother_middle_name = fake.first_name_male()

    # Use the appropriate first name and middle name based on gender
    first_name = father_first_name if gender == 'Male' else mother_first_name
    middle_name = father_middle_name if gender == 'Male' else mother_middle_name

    # Check if applicant's first name is the same as either parent's first name
    if first_name == father_first_name:
        # Generate a new first name for the applicant
        first_name = fake.unique.first_name_male() if gender == 'Male' else fake.unique.first_name_female()

    # Check if applicant's middle name is the same as either parent's middle name
    if middle_name == father_middle_name:
        # Generate a new middle name for the applicant
        middle_name = fake.first_name_male() if gender == 'Male' else fake.first_name_female()

    last_name = father_last_name

    ssn = fake.unique.ssn()  # Generate unique SSN

    date_of_birth = random_date(start_date, end_date).strftime('%Y-%m-%d')

    # Determine applicant's location/state and city
    location = random.choice(list(area_codes.keys()))
    city = fake.city()

    # Determine area code based on the location/state
    if location in area_codes:
        area_code = random.choice(area_codes[location])
    else:
        area_code = str(fake.random_int(200, 999))

    # Generate applicant's phone number with the assigned area code
    contact_phone_number = f'({area_code}) {fake.random_int(100, 999)}-{fake.random_int(1000, 9999)}'

    # Determine if place_of_birth and permanent_address should match state or city
    if random.random() < STATE_MATCH_PERCENTAGE:
        place_of_birth = city + ', ' + location  # Same state, different city
        permanent_address = fake.address().replace('\n', ', ') + ', ' + location  # Same state, different city
    elif random.random() < CITY_MATCH_PERCENTAGE:
        place_of_birth = city + ', ' + location  # Same city and state
        permanent_address = fake.address().replace('\n', ', ') + ', ' + location  # Same city and state
    else:
        place_of_birth = fake.city() + ', ' + location  # Different city, same state
        permanent_address = fake.address().replace('\n', ', ') + ', ' + location  # Different city, same state

    # Determine the age based on the date of birth
    age = (datetime.now() - datetime.strptime(date_of_birth, '%Y-%m-%d')).days // 365

    # Adjust height for different age groups
    if age < AGE_OF_ADULTHOOD:
        occupation = 'Student'
        employer_or_school = fake.company() + 'School'

        if age < 1:
            # For babies (less than 1 year old)
            height_ft = 0
            height_in = random.randint(16, 36)  # Height in inches for babies (16-36 inches)
        else:
            # For minors (1-17 years old)
            if age <= 5:
                # For toddlers and young children (1-5 years old)
                height_ft = 2
                height_in = random.randint(0, 11)  # Height in inches (0-11 inches)
            else:
                # For older children and teenagers (6-17 years old)
                height_ft = random.randint(4, 5)  # Height in feet for minors (4-5 feet)
                height_in = random.randint(0, 11)  # Height in inches (0-11 inches)
    elif age > AGE_OF_RETIREMENT:
        occupation = 'Retired'
        employer_or_school = None
        height_ft = random.randint(5, 6)  # Height in feet for retirees (5-6 feet)
        height_in = random.randint(0, 11)  # Height in inches (0-11 inches)
    else:
        occupation = fake.job()
        employer_or_school = fake.company()
        height_ft = random.randint(4, 6)  # Height in feet for adults (4-6 feet)
        height_in = random.randint(0, 11)  # Height in inches (0-11 inches)

    # Convert height to a formatted string
    height = f"{height_ft}'{height_in}\""

    hair_color = random.choice(hair_colors)
    eye_color = random.choice(eye_colors)
    height_ft = random.randint(4, 6)  # Height in feet for adults (4-6 feet)
    height_in = random.randint(0, 11)  # Height in inches (0-11 inches)
    height = f"{height_ft}'{height_in}\""

    marital_status = random.choice(marital_statuses)

    # Generate spouse details if marital_status is 'Married'
    if marital_status == 'Married':
        spouse_date_of_marriage = random_date(start_date, end_date).strftime('%Y-%m-%d')
        spouse_full_name = fake.name()
        spouse_place_of_birth = fake.city() + ', ' + family_country
        spouse_date_of_birth = random_date(start_date, end_date).strftime('%Y-%m-%d')
        spouse_us_citizen = random.choice(us_citizen_options)
    else:
        spouse_date_of_marriage = None
        spouse_full_name = None
        spouse_place_of_birth = None
        spouse_date_of_birth = None
        spouse_us_citizen = None

    # Generate travel countries
    travel_countries = random.choices(countries, k=random.randint(0, 3))
    if not travel_countries:
        travel_countries = ['N/A']

    widow_or_divorced = random.choice(widow_divorced_options)
    widow_divorce_date = random_date(start_date, end_date).strftime('%Y-%m-%d')

    departure_date = random_date(end_date, end_date + timedelta(days=365)).strftime('%Y-%m-%d')
    arrival_date = random_date(end_date + timedelta(days=1), end_date + timedelta(days=366)).strftime('%Y-%m-%d')

    emergency_contact_name = fake.name()
    emergency_contact_city = fake.city()
    emergency_contact_state = location
    emergency_contact_zip_code = fake.zipcode()
    emergency_contact_phone_number = fake.phone_number()
    emergency_contact_relationship = random.choice(relationships)
    emergency_contact_address = fake.address().replace('\n', ', ')

    previous_passport_name = f"{first_name} {last_name}"
    previous_passport_number = fake.random_number(digits=9)
    previous_passport_issue_date = random_date(start_date, end_date).strftime('%Y-%m-%d')
    previous_passport_status = random.choice(['Submitted with application', 'Stolen', 'In my possession and expired'])

    # Determine if the applicant has the same address as parents
    has_same_address = (last_name == father_last_name or last_name == mother_last_name)

    applicant_data['last_name'].append(last_name)
    applicant_data['first_name'].append(first_name)
    applicant_data['middle_name'].append(fake.first_name())
    applicant_data['ssn'].append(ssn)
    applicant_data['date_of_birth'].append(date_of_birth)
    applicant_data['gender'].append(gender)
    applicant_data['place_of_birth'].append(place_of_birth)
    applicant_data['contact_phone_number'].append(contact_phone_number)
    applicant_data['occupation'].append(occupation)
    applicant_data['employer_or_school'].append(employer_or_school)
    applicant_data['height_ft'].append(height_ft)
    applicant_data['height_in'].append(height_in)
    applicant_data['hair_color'].append(hair_color)
    applicant_data['eye_color'].append(eye_color)
    applicant_data['permanent_address'].append(permanent_address)
    applicant_data['marital_status'].append(marital_status)
    applicant_data['spouse_date_of_marriage'].append(spouse_date_of_marriage)
    applicant_data['spouse_full_name'].append(spouse_full_name)
    applicant_data['spouse_place_of_birth'].append(spouse_place_of_birth)
    applicant_data['spouse_date_of_birth'].append(spouse_date_of_birth)
    applicant_data['spouse_us_citizen'].append(spouse_us_citizen)
    applicant_data['widow_or_divorced'].append(widow_or_divorced)
    applicant_data['widow_divorce_date'].append(widow_divorce_date)
    applicant_data['departure_date'].append(departure_date)
    applicant_data['arrival_date'].append(arrival_date)
    applicant_data['travel_countries'].append(travel_countries)
    applicant_data['emergency_contact_name'].append(emergency_contact_name)
    applicant_data['emergency_contact_city'].append(emergency_contact_city)
    applicant_data['emergency_contact_state'].append(emergency_contact_state)
    applicant_data['emergency_contact_zip_code'].append(emergency_contact_zip_code)
    applicant_data['emergency_contact_phone_number'].append(emergency_contact_phone_number)
    applicant_data['emergency_contact_relationship'].append(emergency_contact_relationship)
    applicant_data['emergency_contact_address'].append(emergency_contact_address)
    applicant_data['previous_passport_name'].append(previous_passport_name)
    applicant_data['previous_passport_number'].append(previous_passport_number)
    applicant_data['previous_passport_issue_date'].append(previous_passport_issue_date)
    applicant_data['previous_passport_status'].append(previous_passport_status)
    applicant_data['father_last_name'].append(father_last_name)
    applicant_data['father_first_name'].append(father_first_name)
    applicant_data['father_middle_name'].append(father_middle_name)
    applicant_data['father_place_of_birth'].append(city + ', ' + location)
    applicant_data['father_gender'].append('Male')
    applicant_data['father_us_citizen'].append(random.choice(us_citizen_options))
    applicant_data['mother_last_name'].append(mother_last_name)
    applicant_data['mother_first_name'].append(mother_first_name)
    applicant_data['mother_middle_name'].append(mother_middle_name)
    applicant_data['mother_place_of_birth'].append(city + ', ' + location)
    applicant_data['mother_gender'].append('Female')
    applicant_data['mother_us_citizen'].append(random.choice(us_citizen_options))
    applicant_data['has_same_address'].append(has_same_address)

applicant_df = pd.DataFrame(applicant_data)


NameError: ignored

In [None]:
applicant_df