This is a generator file, that creates 2 csv files, with 200 rows of data each. (can be changed)
There 2 csv files are: Persons_data.csv and Sample_data.csv
They CPR columns are the same, so they can be joined on that column.

In [11]:
#importing all necessary libraries and creating an instance of Faker
from faker import Faker
from faker.providers import BaseProvider
import csv

fake = Faker()

In [12]:
# creating cpr numbers

# Importing CPR_provider for generating CPR numbers
from GenPersons import CPR_provider

# Creating an instance of CPR_provider
CPR_provider_instance = CPR_provider(fake)

def create_cpr_numbers(length=50):
    """
    Function to generate a list of CPR numbers.

    Args:
    - length (int): Number of CPR numbers to generate. Default is 50.

    Returns:
    - list: List containing the generated CPR numbers.
    """
    # Initialize an empty list to hold the generated CPR numbers
    cpr_numbers = []

    # Loop 'length' times to generate the desired number of CPR numbers
    for i in range(length):
        cpr_numbers.append(CPR_provider_instance.cpr())

    # Return the list of generated CPR numbers
    return cpr_numbers



In [13]:
#generating person data

# Importing necessary modules for generating person data
from GenPersons import gender, Region_provider, Phone_number

# Creating instances of the imported classes
Gender_provider_instance = gender()
PhoneNumber_provider_instance = Phone_number()
Region_provider_instance = Region_provider(fake)

def generate_person(cpr):
    """
    Function to generate personal data based on a given CPR.

    Args:
    - cpr (str): The CPR number based on which personal data is generated.

    Returns:
    - tuple: Tuple containing generated personal data.
    """
    # Determine gender based on the CPR number (specifically the 10th digit)
    Gender = Gender_provider_instance.gender_determination(cpr)
    # Generate region data
    Region = Region_provider_instance.region()
    # Generate phone number data
    Phone_number = PhoneNumber_provider_instance.number()

    # Collate the generated data into a tuple
    Persons_tuple = (cpr, Phone_number, Region, Gender)
    
    # Return the collated data
    return Persons_tuple

def create_Persons_csv(cpr_numbers, name):
    """
    Function to create a CSV file containing personal data.

    Args:
    - cpr_numbers (list): List of CPR numbers for which personal data is to be generated.
    - name (str): Name of the CSV file to be created.

    Returns:
    - None: Writes the generated data to a CSV file.
    """
    # Open the CSV file in write mode
    with open(name, "w", newline="") as csvfile:
        # Create a CSV writer object
        writer = csv.writer(csvfile)
        # Write the header row
        writer.writerow(["CPR", "Phone_number", "Region", "Gender"])
        # For each CPR number, generate personal data and write to the CSV
        for cpr in cpr_numbers:
            row_data = generate_person(cpr)
            writer.writerow(row_data)


In [14]:
# File Purpose: Generating Sample_data.csv

# Importing necessary classes for sample data generation
from GenSample import (random_id_provider, random_date_provider,
                                   random_host_provider, random_Ct_value_provider)

# Creating instances for sample data generation. These instances use the faker library.
random_id_instance = random_id_provider(fake)
random_date_instance = random_date_provider(fake)
random_host_instance = random_host_provider(fake)
random_Ct_instance = random_Ct_value_provider(fake)

def generate_sample(cpr):
    """
    Function to generate a sample of data based on a provided CPR.
    
    Parameters:
    cpr (str): The CPR number to be used.
    
    Returns:
    tuple: A tuple containing generated sample data.
    """
    random_uuid = random_id_instance.create_uuid()
    random_date = random_date_instance.create_date()
    random_host = random_host_instance.create_host()
    Random_Ct = random_Ct_instance.create_Ct_val()

    # Compiling the generated sample data into a tuple
    Sample_tuple = (random_uuid, cpr, random_date, random_host, Random_Ct)
    
    return Sample_tuple

def generate_Sample_csv(cpr_numbers, name):
    """
    Function to generate a CSV file with sample data.
    
    Parameters:
    cpr_numbers (list): List of CPR numbers to be used.
    name (str): Name of the resulting CSV file.
    """
    # Opening the desired CSV file in write mode
    with open(name, "w", newline="") as csvfile:
        # Creating a writer object for the CSV file
        writer = csv.writer(csvfile)
        
        # Writing the header row to the CSV file
        writer.writerow(["SampleID", "CPR", "SampleDate", "Host", "Ct"])
        
        # Iterating through the list of CPR numbers to generate and write data rows
        for cpr in cpr_numbers:
            row_data = generate_sample(cpr)
            writer.writerow(row_data)


In [15]:
# File Purpose: Main execution script for generating CPR numbers and corresponding CSV files.

def main():
    """
    The main execution function:
    1. Creates a new list of CPR numbers (default is set to 50, but can be adjusted).
    2. Generates two CSV files, "Persons.csv" and "Sample.csv", using the generated CPR numbers.
    """
    try:
        # Generating a list of CPR numbers
        cpr_numbers = create_cpr_numbers(50)
        print(f'{len(cpr_numbers)} CPR numbers created')
        
        # Generating "Persons.csv" based on the list of CPR numbers
        create_Persons_csv(cpr_numbers, "Persons_data.csv")
        
        # Generating "Sample.csv" based on the list of CPR numbers
        generate_Sample_csv(cpr_numbers, "Sample_data.csv")
        
        print("CSV files created successfully!")
    except Exception as e:
        print(f'An error occurred: {e}')


# Ensures that the main() function is called when the script is executed directly.
if __name__ == "__main__":
    main()


50 CPR numbers created
CSV files created successfully!
