This is a generator file, that creates 2 csv files, with 200 rows of data each. (can be changed)
There 2 csv files are: Persons_data.csv and Sample_data.csv
They CPR columns are the same, so they can be joined on that column.

In [13]:
#importing all necessary libraries and creating an instance of Faker
from faker import Faker
from faker.providers import BaseProvider
import csv

fake = Faker()

In [14]:
# Importing CPR_provider for generating CPR numbers
from Persons_data_GENERATOR import CPR_provider

# Creating an instance of CPR_provider
CPR_provider_instance = CPR_provider(fake)

def create_cpr_numbers(length=50):
    """
    Function to generate a list of CPR numbers.

    Args:
    - length (int): Number of CPR numbers to generate. Default is 50.

    Returns:
    - list: List containing the generated CPR numbers.
    """
    # Initialize an empty list to hold the generated CPR numbers
    cpr_numbers = []

    # Loop 'length' times to generate the desired number of CPR numbers
    for i in range(length):
        cpr_numbers.append(CPR_provider_instance.cpr())

    # Return the list of generated CPR numbers
    return cpr_numbers



In [15]:
# Importing necessary modules for generating person data
from Persons_data_GENERATOR import gender, Region_provider, Phone_number

# Creating instances of the imported classes
Gender_provider_instance = gender()
PhoneNumber_provider_instance = Phone_number()
Region_provider_instance = Region_provider(fake)

def generate_person(cpr):
    """
    Function to generate personal data based on a given CPR.

    Args:
    - cpr (str): The CPR number based on which personal data is generated.

    Returns:
    - tuple: Tuple containing generated personal data.
    """
    # Determine gender based on the CPR number (specifically the 10th digit)
    Gender = Gender_provider_instance.gender_determination(cpr)
    # Generate region data
    Region = Region_provider_instance.region()
    # Generate phone number data
    Phone_number = PhoneNumber_provider_instance.number()

    # Collate the generated data into a tuple
    Persons_tuple = (cpr, Phone_number, Region, Gender)
    
    # Return the collated data
    return Persons_tuple

def create_Persons_csv(cpr_numbers, name):
    """
    Function to create a CSV file containing personal data.

    Args:
    - cpr_numbers (list): List of CPR numbers for which personal data is to be generated.
    - name (str): Name of the CSV file to be created.

    Returns:
    - None: Writes the generated data to a CSV file.
    """
    # Open the CSV file in write mode
    with open(name, "w", newline="") as csvfile:
        # Create a CSV writer object
        writer = csv.writer(csvfile)
        # Write the header row
        writer.writerow(["CPR", "Phone_number", "Region", "Gender"])
        # For each CPR number, generate personal data and write to the CSV
        for cpr in cpr_numbers:
            row_data = generate_person(cpr)
            writer.writerow(row_data)


In [16]:
# Generating Sample_data.csv

#importing the classes
from sample_data_GENERATOR import random_id_provider , random_date_provider, random_host_provider, random_Ct_value_provider

#creating the instances for Sample data generation, using the faker instance, because the classes use the faker library
random_id_instance = random_id_provider(fake)
random_date_instance = random_date_provider(fake)
random_host_instance = random_host_provider(fake)
random_Ct_instance = random_Ct_value_provider(fake)

def generate_sample(cpr):
    random_uuid = random_id_instance.create_uuid()
    random_date = random_date_instance.create_date()
    random_host = random_host_instance.create_host()
    Random_Ct = random_Ct_instance.create_Ct_val()

    Sample_tuple = (random_uuid, cpr, random_date, random_host, Random_Ct)

    return Sample_tuple

def generate_Sample_csv(cpr_numbers, name):
    #first I open the csv file in write mode
    with open(name, "w", newline="") as csvfile:
        #using the writer function from the csv module, to create a writer object for the csvfile
        writer = csv.writer(csvfile)
        # Using the writer object to write the header row in the csv file
        writer.writerow(["SampleID", "CPR", "SampleDate", "Host", "Ct"])
        # now we use the generate_sample() function, to generate a row for each cpr number
        for cpr in cpr_numbers:
            # for every cpr in cpr_numbers, the generate_sample() function with the cpr as argument will be run to create a tuple (row)
            row_data = generate_sample(cpr)
            # for every cpr in cpr_numbers, the writer object will write a new row in the csv file
            writer.writerow(row_data)


        



In [18]:
# main() 
""" 
    This main function creates a new list of 50 CPR numbers (can be changed)
    --> generates 2 csv files, based on those CPR numbers

"""

def main():
    try:
        cpr_numbers = create_cpr_numbers(50)
        print(f'{len(cpr_numbers)} CPR numbers created')
        create_Persons_csv(cpr_numbers, "Persons.csv")
        generate_Sample_csv(cpr_numbers, "Sample.csv")
        print("csv files created")
    except Exception as e:
        print(f'an error occurred {e}')


if __name__ == "__main__":
    main()

50 CPR numbers created
csv files created
