<a href="https://colab.research.google.com/github/Savvy-Slowley/Synthetic-Data/blob/main/syth_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import pandas as pd
from datetime import datetime, timedelta
from faker import Faker

fake = Faker()

# Constants
AGE_OF_ADULTHOOD = 18
AGE_OF_RETIREMENT = 65

# Define the number of families
num_families = 5

# Generate a random date between two given dates
def random_date(start, end):
    return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))

# Generate a unique list of hair colors and eye colors
hair_colors = [fake.color_name() for _ in range(5)]
eye_colors = [fake.color_name() for _ in range(5)]

# Define a list of US area codes for emergency contact state
area_codes = {
    'AL': '205', 'AK': '907', 'AZ': '480', 'AR': '479', 'CA': '213',
    'CO': '303', 'CT': '203', 'DE': '302', 'FL': '305', 'GA': '404',
    'HI': '808', 'ID': '208', 'IL': '312', 'IN': '317', 'IA': '319',
    'KS': '913', 'KY': '502', 'LA': '504', 'ME': '207', 'MD': '410',
    'MA': '617', 'MI': '313', 'MN': '612', 'MS': '601', 'MO': '314',
    'MT': '406', 'NE': '402', 'NV': '702', 'NH': '603', 'NJ': '201',
    'NM': '505', 'NY': '212', 'NC': '919', 'ND': '701', 'OH': '216',
    'OK': '405', 'OR': '503', 'PA': '215', 'RI': '401', 'SC': '803',
    'SD': '605', 'TN': '615', 'TX': '512', 'UT': '801', 'VT': '802',
    'VA': '703', 'WA': '206', 'WV': '304', 'WI': '414', 'WY': '307'
}

# Define a list of possible emergency contact relationships
relationships = ['Parent', 'Sibling', 'Spouse', 'Friend', 'Other']

# Define a list of possible US citizen options
us_citizen_options = ['Yes', 'No']

# Define a list of countries for travel history
countries = ['USA', 'Canada', 'Mexico', 'UK', 'France', 'Germany', 'Japan', 'Australia']

# Define start and end dates for birthdates and travel history
start_date = datetime(1960, 1, 1)
end_date = datetime(2005, 12, 31)

# Create a list to store family information
family_data = []

# Loop to create families
for family_id in range(1, num_families + 1):
    # Generate a unique last name for the family
    family_last_name = fake.unique.last_name_male()  # Use male last name for simplicity

    # Generate parents' data
    father_first_name = fake.unique.first_name_male()
    mother_first_name = fake.unique.first_name_female()
    mother_middle_name = fake.first_name()
    mother_last_name = family_last_name

    # Add father's data to the family data
    family_data.append({
        'family_id': family_id,
        'last_name': family_last_name,
        'first_name': father_first_name,
        'middle_name': fake.first_name(),
        'ssn': fake.unique.ssn(),
        'date_of_birth': random_date(start_date, end_date).strftime('%Y-%m-%d'),
        'gender': 'Male',
        'place_of_birth': fake.city(),
        'contact_phone_number': fake.phone_number(),
        'occupation': fake.job(),
        'employer_or_school': fake.company(),
        'height_ft': random.randint(4, 6),
        'height_in': random.randint(0, 11),
        'hair_color': random.choice(hair_colors),
        'eye_color': random.choice(eye_colors),
        'permanent_address': fake.address().replace('\n', ', '),
        'marital_status': 'Married',
        'spouse_date_of_marriage': random_date(start_date, end_date).strftime('%Y-%m-%d'),
        'spouse_full_name': mother_first_name + " " + mother_last_name,
        'spouse_place_of_birth': fake.city(),
        'spouse_date_of_birth': random_date(start_date, end_date).strftime('%Y-%m-%d'),
        'spouse_us_citizen': random.choice(us_citizen_options),
        'widow_or_divorced': 'No',
        'widow_divorce_date': None,
        'departure_date': random_date(end_date, end_date + timedelta(days=365)).strftime('%Y-%m-%d'),
        'arrival_date': random_date(end_date + timedelta(days=1), end_date + timedelta(days=366)).strftime('%Y-%m-%d'),
        'travel_countries': random.choices(countries, k=random.randint(0, 3)),
        'emergency_contact_name': mother_first_name + " " + mother_last_name,
        'emergency_contact_city': fake.city(),
        'emergency_contact_state': random.choice(list(area_codes.keys())),
        'emergency_contact_zip_code': fake.zipcode(),
        'emergency_contact_phone_number': fake.phone_number(),
        'emergency_contact_relationship': random.choice(relationships),
        'emergency_contact_address': fake.address().replace('\n', ', '),
        'previous_passport_name': f"{father_first_name} {family_last_name}",
        'previous_passport_number': fake.random_number(digits=9),
        'previous_passport_issue_date': random_date(start_date, end_date).strftime('%Y-%m-%d'),
        'previous_passport_status': random.choice(['Submitted with application', 'Stolen', 'In my possession and expired']),
        'father_last_name': family_last_name,
        'father_first_name': father_first_name,
        'father_middle_name': fake.first_name(),
        'father_place_of_birth': fake.city(),
        'father_gender': 'Male',
        'father_us_citizen': random.choice(us_citizen_options),
        'mother_last_name': mother_last_name,
        'mother_first_name': mother_first_name,
        'mother_middle_name': mother_middle_name,
        'mother_place_of_birth': fake.city(),
        'mother_gender': 'Female',
        'mother_us_citizen': random.choice(us_citizen_options),
        'has_same_address': True
    })

    # Add mother's data to the family data
    family_data.append({
        'family_id': family_id,
        'last_name': family_last_name,
        'first_name': mother_first_name,
        'middle_name': mother_middle_name,
        'ssn': fake.unique.ssn(),
        'date_of_birth': random_date(start_date, end_date).strftime('%Y-%m-%d'),
        'gender': 'Female',
        'place_of_birth': fake.city(),
        'contact_phone_number': fake.phone_number(),
        'occupation': fake.job(),
        'employer_or_school': fake.company(),
        'height_ft': random.randint(4, 6),
        'height_in': random.randint(0, 11),
        'hair_color': random.choice(hair_colors),
        'eye_color': random.choice(eye_colors),
        'permanent_address': fake.address().replace('\n', ', '),
        'marital_status': 'Married',
        'spouse_date_of_marriage': random_date(start_date, end_date).strftime('%Y-%m-%d'),
        'spouse_full_name': father_first_name + " " + family_last_name,
        'spouse_place_of_birth': fake.city(),
        'spouse_date_of_birth': random_date(start_date, end_date).strftime('%Y-%m-%d'),
        'spouse_us_citizen': random.choice(us_citizen_options),
        'widow_or_divorced': 'No',
        'widow_divorce_date': None,
        'departure_date': random_date(end_date, end_date + timedelta(days=365)).strftime('%Y-%m-%d'),
        'arrival_date': random_date(end_date + timedelta(days=1), end_date + timedelta(days=366)).strftime('%Y-%m-%d'),
        'travel_countries': random.choices(countries, k=random.randint(0, 3)),
        'emergency_contact_name': father_first_name + " " + family_last_name,
        'emergency_contact_city': fake.city(),
        'emergency_contact_state': random.choice(list(area_codes.keys())),
        'emergency_contact_zip_code': fake.zipcode(),
        'emergency_contact_phone_number': fake.phone_number(),
        'emergency_contact_relationship': random.choice(relationships),
        'emergency_contact_address': fake.address().replace('\n', ', '),
        'previous_passport_name': f"{mother_first_name} {family_last_name}",
        'previous_passport_number': fake.random_number(digits=9),
        'previous_passport_issue_date': random_date(start_date, end_date).strftime('%Y-%m-%d'),
        'previous_passport_status': random.choice(['Submitted with application', 'Stolen', 'In my possession and expired']),
        'father_last_name': family_last_name,
        'father_first_name': father_first_name,
        'father_middle_name': fake.first_name(),
        'father_place_of_birth': fake.city(),
        'father_gender': 'Male',
        'father_us_citizen': random.choice(us_citizen_options),
        'mother_last_name': mother_last_name,
        'mother_first_name': mother_first_name,
        'mother_middle_name': mother_middle_name,
        'mother_place_of_birth': fake.city(),
        'mother_gender': 'Female',
        'mother_us_citizen': random.choice(us_citizen_options),
        'has_same_address': True
    })

    # Generate a random number of kids (1 to 4)
    num_kids = random.randint(1, 4)

    # Generate data for each kid
    for kid in range(num_kids):
        kid_first_name = fake.first_name()
        kid_middle_name = fake.first_name()
        kid_last_name = family_last_name
        kid_gender = random.choice(['Male', 'Female'])
        kid_date_of_birth = random_date(start_date, end_date - timedelta(days=365 * 18)).strftime('%Y-%m-%d')

        # Determine the age based on the date of birth
        kid_age = (datetime.now() - datetime.strptime(kid_date_of_birth, '%Y-%m-%d')).days // 365

        # Adjust height for different age groups
        if kid_age < AGE_OF_ADULTHOOD:
            kid_occupation = 'Student'
            kid_employer_or_school = fake.company() + ' School'
            kid_height_ft = 0
            kid_height_in = random.randint(16, 36)  # Height in inches for babies (16-36 inches)
        else:
            kid_occupation = fake.job()
            kid_employer_or_school = fake.company()
            kid_height_ft = random.randint(4, 6)  # Height in feet for adults (4-6 feet)
            kid_height_in = random.randint(0, 11)  # Height in inches (0-11 inches)

        # Convert height to a formatted string
        kid_height = f"{kid_height_ft}'{kid_height_in}\""

        family_data.append({
            'family_id': family_id,
            'last_name': kid_last_name,
            'first_name': kid_first_name,
            'middle_name': kid_middle_name,
            'ssn': fake.unique.ssn(),
            'date_of_birth': kid_date_of_birth,
            'gender': kid_gender,
            'place_of_birth': fake.city(),
            'contact_phone_number': fake.phone_number(),
            'occupation': kid_occupation,
            'employer_or_school': kid_employer_or_school,
            'height_ft': kid_height_ft,
            'height_in': kid_height_in,
            'hair_color': random.choice(hair_colors),
            'eye_color': random.choice(eye_colors),
            'permanent_address': fake.address().replace('\n', ', '),
            'marital_status': 'Single',
            'spouse_date_of_marriage': None,
            'spouse_full_name': None,
            'spouse_place_of_birth': None,
            'spouse_date_of_birth': None,
            'spouse_us_citizen': None,
            'widow_or_divorced': 'No',
            'widow_divorce_date': None,
            'departure_date': None,
            'arrival_date': None,
            'travel_countries': ['N/A'],
            'emergency_contact_name': mother_first_name + " " + mother_last_name,
            'emergency_contact_city': fake.city(),
            'emergency_contact_state': random.choice(list(area_codes.keys())),
            'emergency_contact_zip_code': fake.zipcode(),
            'emergency_contact_phone_number': fake.phone_number(),
            'emergency_contact_relationship': random.choice(relationships),
            'emergency_contact_address': fake.address().replace('\n', ', '),
            'previous_passport_name': f"{father_first_name} {family_last_name}",
            'previous_passport_number': fake.random_number(digits=9),
            'previous_passport_issue_date': random_date(start_date, end_date).strftime('%Y-%m-%d'),
            'previous_passport_status': random.choice(['Submitted with application', 'Stolen', 'In my possession and expired']),
            'father_last_name': family_last_name,
            'father_first_name': father_first_name,
            'father_middle_name': fake.first_name(),
            'father_place_of_birth': fake.city(),
            'father_gender': 'Male',
            'father_us_citizen': random.choice(us_citizen_options),
            'mother_last_name': mother_last_name,
            'mother_first_name': mother_first_name,
            'mother_middle_name': mother_middle_name,
            'mother_place_of_birth': fake.city(),
            'mother_gender': 'Female',
            'mother_us_citizen': random.choice(us_citizen_options),
            'has_same_address': True
        })

# Create a DataFrame from the family data
family_data_df = pd.DataFrame(family_data)

family_data_df