Install and configure libraries

In [None]:
!pip install faker

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting faker
  Downloading Faker-13.14.0-py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 13.1 MB/s 
Installing collected packages: faker
Successfully installed faker-13.14.0


In [None]:
from faker import Faker

import numpy as np

faker_instance = Faker('nl_NL')

Create and initialize our different persons as required (Patients, GPs and Biolab Employees)

In [None]:
class Person:
  def __init__(self, id):
    self.id = id
    self.gp = None
    self.gender = np.random.choice(["M", "F", "NB"], p=[0.49, 0.49, 0.02])
    self.ssn = faker_instance.ssn()
    self.last_name = faker_instance.last_name()

    if self.gender == "M":
      self.first_name = faker_instance.first_name_male()
    elif self.gender == "F":
      self.first_name = faker_instance.first_name_female()
    else:
      self.first_name = faker_instance.first_name_nonbinary()

In [None]:
class GP(Person):
  def __init__(self, id):
    Person.__init__(self, id)

  def __str__(self):
    return "GP #%d, %s %s, %s, %s" % (self.id, self.first_name, self.last_name, self.gender, self.ssn)

gps = [GP(id) for id in range(0, 30)]

for gp in gps[0:10]:
  print(gp)

GP #0, Lauren van de Wiel, F, 014825934
GP #1, Jason Massa, M, 429175061
GP #2, Hendrik Slagmolen, M, 346197806
GP #3, Jayden Dennenberg, M, 921306489
GP #4, Lex Brisee, M, 716984027
GP #5, Jonas Kuijpers, M, 124076385
GP #6, Anouk le Matelot, F, 385492674
GP #7, Jennifer Lucas, F, 328670455
GP #8, Boaz Bertho, M, 537819629
GP #9, Alexander Mulder, M, 361250496


In [None]:
class Patient (Person):
  def __init__(self, id):
    Person.__init__(self, id) 
    self.address = faker_instance.address().replace("\n", ", ")
    self.date_of_birth = faker_instance.date_of_birth()
    self.gp = np.random.choice(gps)
  
  def __str__(self):
    return "%s, Patient #%d, %s %s, %s, (%s %s %s)" % (self.gp, self.id, self.first_name, self.last_name, self.gender, self.ssn, self.date_of_birth, self.address)

patients = [Patient(id) for id in range(0, 1500)]

for patient in patients[0:10]:
  print(patient)

GP #17, Sven van Bunschoten, M, 624079181, Patient #0, Maja Wilmont, F, (849120652 1977-11-17 Lennbaan 767, 4928 CA, Lobith)
GP #17, Sven van Bunschoten, M, 624079181, Patient #1, Juliette Verhaar, F, (402936152 1960-07-02 Jaspersingel 19, 1345BX, Kuitaart)
GP #14, Melle Veltman, M, 567243199, Patient #2, Ilse van der Plas, F, (803964171 1959-05-31 Jasondreef 047, 4920AW, Roderwolde)
GP #8, Boaz Bertho, M, 537819629, Patient #3, Loïs Eelman, F, (632084911 2007-08-31 Owenpad 780, 4855 MB, Haaksbergen)
GP #20, Renske Ketting, F, 283975106, Patient #4, Wessel Kalman, M, (245608175 1980-11-03 Kimpad 420, 3676HL, Norg)
GP #18, Lauren Wright, F, 234716587, Patient #5, Thom van Gorp, M, (268704399 2012-06-02 Renskedreef 29, 6556 XF, Urk)
GP #8, Boaz Bertho, M, 537819629, Patient #6, Yara van Vermandois, F, (790568238 1969-06-07 Laurensweg 30, 3896HQ, Aagtekerke)
GP #14, Melle Veltman, M, 567243199, Patient #7, Hailey Zwart, F, (265703918 1953-09-11 Marinushof 753, 9302LH, Rockanje)
GP #26, Ru

In [None]:
class Biolab:
  def __init__(self, name):
    self.name = name
    #self.employees = employees
  
  def __str__(self):
    return "Biolab <%s>"%(self.name)

lab = Biolab("primlab")
print(lab)


Biolab <primlab>


In [None]:
class Employee(Person):
  def __init__(self, id):
    Person.__init__(self, id)
    self.job = np.random.choice(["Tester", "Analyst"], p=[0.7, 0.3])
    self.email = faker_instance.unique.email()
    self.biolab = lab
  
  def __str__(self):
    return "Employee #%d, %s %s, %s, %s, (%s %s %s)" % (self.id, self.first_name, self.last_name, self.gender, self.ssn, self.job, self.email, self.biolab)

employees = [Employee(id) for id in range(0, 30)]


for employee in employees[0:10]:
  print(employee)

Employee #0, Aya van Beieren, F, 856724105, (Tester bweijland@example.net Biolab <primlab>)
Employee #1, Marinus Schouten, M, 245079683, (Tester noahvan-este@example.com Biolab <primlab>)
Employee #2, Joël Jansse, M, 397450825, (Tester van-de-klashorstlisa@example.org Biolab <primlab>)
Employee #3, Stefan Sas, M, 027148968, (Analyst tvan-ooyen@example.org Biolab <primlab>)
Employee #4, Tobias van den Assem, M, 376041286, (Tester bleeskeano@example.net Biolab <primlab>)
Employee #5, Mason Labado, M, 753184205, (Tester floortje62@example.org Biolab <primlab>)
Employee #6, Silke Kalman, F, 834516093, (Analyst evavan-bernicia@example.com Biolab <primlab>)
Employee #7, Puk Le Marec, F, 809624370, (Tester jayson07@example.org Biolab <primlab>)
Employee #8, Valentijn de Gruijl, M, 017953649, (Analyst van-rietlily@example.org Biolab <primlab>)
Employee #9, Lorenzo Oosterhek, M, 092483173, (Analyst jannewillemsen@example.net Biolab <primlab>)


In [None]:
class Coagulator:
  def __init__(self, id, factor, color):
    self.id = id
    self.factor = factor
    self.color = color
  
  def __str__(self):
    return "Coagulator %d, [factor=%s, color=%s]" %(self.id, self.factor, self.color)

coagulators = [Coagulator(id, factor, color) for (id, factor, color) in [
  (0, "none", "Red"),
  (1, "Sodium Fluoride", "Grey"),
  (2, "EDTA", "Purple"),
  (3, "3.2% Sodium Citrate", "Light Blue"),
  (4, "Heparin", "Green"),
  (5, "Citrate", "Yellow"),
  (6, "(K2)EDTA", "Pink"),
  (7, "EDTA/plain", "Dark blue")
]]

for coagulator in coagulators:
  print(coagulator)

Coagulator 0, [factor=none, color=Red]
Coagulator 1, [factor=Sodium Fluoride, color=Grey]
Coagulator 2, [factor=EDTA, color=Purple]
Coagulator 3, [factor=3.2% Sodium Citrate, color=Light Blue]
Coagulator 4, [factor=Heparin, color=Green]
Coagulator 5, [factor=Citrate, color=Yellow]
Coagulator 6, [factor=(K2)EDTA, color=Pink]
Coagulator 7, [factor=EDTA/plain, color=Dark blue]


In [None]:
class TestTube:
  def __init__(self, id):
    self.barcode = np.random.randint(100000000000, 999999999999)
    self.coagulator = np.random.choice(coagulators)
    self.emergency = np.random.choice(["True", "False"], p=(0.1, 0.9))
  def __str__(self):
    return "Testtube #%d, coagulator %d, Emergency %s" % (self.barcode, self.coagulator.id, self.emergency)
  

testtubes = [TestTube(id) for id in range(0, 1500)]

for testtube in testtubes[0:10]:
  print(testtube)

Testtube #776135531495, coagulator 2, Emergency False
Testtube #541308934759, coagulator 3, Emergency False
Testtube #977446301326, coagulator 1, Emergency False
Testtube #456852704835, coagulator 4, Emergency False
Testtube #440972560479, coagulator 0, Emergency False
Testtube #338283233329, coagulator 2, Emergency False
Testtube #473581698902, coagulator 3, Emergency False
Testtube #881310803715, coagulator 5, Emergency False
Testtube #918458234488, coagulator 2, Emergency False
Testtube #717233135180, coagulator 5, Emergency False


In [None]:
import datetime
import random
class Moment:
  def __init__(self, id):
    self.tube = testtubes[id]
    start_date = datetime.datetime(2020, 1, 1, 0, 0, 0)
    end_date = datetime.datetime(2022, 6, 1, 23, 59, 59)
    time_between_dates = end_date - start_date
    seconds_between_dates = time_between_dates.total_seconds()
    random_number_of_seconds = random.randrange(seconds_between_dates)
    two_hours_max = random.randrange(7200)
    two_hours_min = random.randrange(7200,1209600)
    time_to_result = random.randrange(259200,432000)
    self.random_date1 = start_date + datetime.timedelta(seconds=random_number_of_seconds)
    self.random_date2 = self.random_date1 + datetime.timedelta(seconds=two_hours_max)
    if self.tube.emergency == "True":
      self.random_date3 = self.random_date2 + datetime.timedelta(seconds=two_hours_max)
    else:
      self.random_date3 = self.random_date2 + datetime.timedelta(seconds=two_hours_min)
    self.random_date4 = self.random_date3 + datetime.timedelta(seconds=time_to_result)
  def __str__(self):
    return "Contact with patient #%s, Contact with biolab %s, Test taken %s, Results given %s, Emergency %s" % (self.random_date1, self.random_date2, self.random_date3, self.random_date4, self.tube.emergency)

moments = [Moment(id) for id in range(0,1500)]

for moment in moments[0:10]:
  print(moment)

Contact with patient #2021-02-10 21:39:56, Contact with biolab 2021-02-10 22:48:15, Test taken 2021-02-15 17:07:44, Results given 2021-02-20 15:41:46, Emergency False
Contact with patient #2021-11-26 14:58:27, Contact with biolab 2021-11-26 15:43:13, Test taken 2021-11-26 22:25:00, Results given 2021-12-01 17:41:56, Emergency False
Contact with patient #2020-10-11 22:41:41, Contact with biolab 2020-10-11 22:47:02, Test taken 2020-10-19 11:20:31, Results given 2020-10-24 11:06:45, Emergency False
Contact with patient #2021-09-26 22:46:04, Contact with biolab 2021-09-27 00:24:43, Test taken 2021-10-06 05:27:36, Results given 2021-10-10 02:05:05, Emergency False
Contact with patient #2021-02-06 19:54:52, Contact with biolab 2021-02-06 21:52:25, Test taken 2021-02-12 16:44:16, Results given 2021-02-17 08:46:49, Emergency False
Contact with patient #2021-12-15 12:13:43, Contact with biolab 2021-12-15 13:19:02, Test taken 2021-12-19 16:31:50, Results given 2021-12-22 22:57:16, Emergency Fals

In [None]:
testers = []
analysts = []
class Result:
  def __init__(self, id):
    self.threshold = np.random.choice(["above", "equal to", "below"], p=(0.2, 0.6, 0.2))
    self.description = "A very long textual description of what the results actually entail and whether there were any weird instances and/or combinations"
    for employee in employees:
      if employee.job == "Tester":
        testers.append(employee)
      else:
        analysts.append(employee)
    self.analyzer = np.random.choice(analysts)

  def __str__(self):
    return "%s, The threshold is %s normal, %s," % (self.analyzer, self.threshold, self.description)

results = [Result(id) for id in range(0,1500)]

for result in results[0:10]:
  print(result)

Employee #6, Silke Kalman, F, 834516093, (Analyst evavan-bernicia@example.com Biolab <primlab>), The threshold is above normal, A very long textual description of what the results actually entail and whether there were any weird instances and/or combinations,
Employee #3, Stefan Sas, M, 027148968, (Analyst tvan-ooyen@example.org Biolab <primlab>), The threshold is below normal, A very long textual description of what the results actually entail and whether there were any weird instances and/or combinations,
Employee #8, Valentijn de Gruijl, M, 017953649, (Analyst van-rietlily@example.org Biolab <primlab>), The threshold is above normal, A very long textual description of what the results actually entail and whether there were any weird instances and/or combinations,
Employee #6, Silke Kalman, F, 834516093, (Analyst evavan-bernicia@example.com Biolab <primlab>), The threshold is equal to normal, A very long textual description of what the results actually entail and whether there were a

In [None]:
import pandas as pd
import numpy as np

class Datasetsmall:
  def __init__ (self, id):
    self.id = id
    self.moment = moments[id]
    self.tube = testtubes[id]
    self.tester = np.random.choice(testers)
    self.patient = np.random.choice(patients)
    self.gp = self.patient.gp
    self.result = np.random.choice(results)
    self.biolab = lab
    self.moment1 = self.moment.random_date1
    self.moment2 = self.moment.random_date2
    self.moment3 = self.moment.random_date3
    self.moment4 = self.moment.random_date4
    
  def __str__(self):
    cancel = np.random.choice([True, False], p=(0.03, 0.97))
    if cancel:
      return "%s, Contact with patient,  %s, \n %d, %s, Contact with biolab,  %s, %s" %(self.moment1, self.patient, self.id, self.moment2, self.gp, self.biolab)
    else:
      return "%s, Contact with patient,  %s, \n %d, %s, Contact with biolab,  %s, %s \n %d, %s, Test taken, %s, %s \n %d, %s, Results given,  %s" %(self.moment1, self.patient, self.id, self.moment2, self.gp, self.biolab, self.id, self.moment3, self.tester, self.tube, self.id, self.moment4, self.result)

datasets = [Datasetsmall(id) for id in range(0,1500)]

for dataset in datasets[0:20]:
  print(dataset)

for dataset in datasets[0:1500]:
  df = pd.DataFrame(datasets)
  df.to_csv("labinformation.csv", sep =",", index_label=False)

2021-02-10 21:39:56, Contact with patient,  GP #15, Samuel de Beer, M, 203948750, Patient #523, Stella Vervoort, F, (891236053 2000-10-03 Jaylinnsteeg 95, 1893JC, Macharen), 
 0, 2021-02-10 22:48:15, Contact with biolab,  GP #15, Samuel de Beer, M, 203948750, Biolab <primlab> 
 0, 2021-02-15 17:07:44, Test taken, Employee #13, Mia Steenbakkers, F, 163859425, (Tester aaronden-buytelaar@example.org Biolab <primlab>), Testtube #776135531495, coagulator 2, Emergency False 
 0, 2021-02-20 15:41:46, Results given,  Employee #6, Silke Kalman, F, 834516093, (Analyst evavan-bernicia@example.com Biolab <primlab>), The threshold is above normal, A very long textual description of what the results actually entail and whether there were any weird instances and/or combinations,
2021-11-26 14:58:27, Contact with patient,  GP #29, Jay Haneberg, M, 247359063, Patient #136, Julia Courtier, F, (761295483 1928-02-28 Eviepad 0, 5518WE, De Goorn), 
 1, 2021-11-26 15:43:13, Contact with biolab,  GP #29, Jay 