In [22]:
import numpy as np
import pandas as pd
import scipy.stats as stats

def generate_patient_vitals(id, n, t=3):
    np.random.seed(id)
    
    # Simulate random values for heart rate (bpm)
    heart_rate = np.round(stats.truncnorm.rvs(-t, t, 80, 12, n), 2)
    
    # Simulate random values for blood pressure (mmHg)
    systolic_bp = np.round(stats.truncnorm.rvs(-t, t, 105, 12, n), 2)
    diastolic_bp = np.round(stats.truncnorm.rvs(-t, t, 70, 10, n), 2)

    # Simulate random values for temperature (°C)
    temperature = np.round(stats.truncnorm.rvs(-t, t, 37, 0.5, n), 2)

    # Simulate random values for respiration rate (breaths per minute)
    respiration_rate = np.round(stats.truncnorm.rvs(-t, t, 17, 3, n), 2)

    # Simulate random values for SpO2 (%)
    spo2 = np.round(stats.truncnorm.rvs(-t, t, 98, 1.5, n), 2)
    spo2[spo2 > 100] = 100

    return pd.DataFrame({
        'Patient ID': id,
        'Heart Rate': heart_rate,
        'Systolic BP': systolic_bp,
        'Diastolic BP': diastolic_bp,
        'Temperature': temperature,
        'Respiration Rate': respiration_rate,
        'SpO2': spo2
    })

In [23]:
import json

# Processing address dataset
geojson_file_path = '../ma_addresses/city_of_boston-addresses-city.geojson'

# Extract relevant information
addresses = []
with open(geojson_file_path, 'r') as file:
    for line in file:
        try:
            data = json.loads(line)
            properties = data.get("properties", {})
            number = properties.get("number", "")
            street = properties.get("street", "")
            postcode = properties.get("postcode", "")

            # Create consolidated address
            consolidated_address = f"{number} {street}, {postcode}"

            # Append to addresses list
            addresses.append(consolidated_address)

        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")

# Create a DataFrame
address_df = pd.DataFrame({"Address": addresses})

# Checking Addresses
address_df.head()

Unnamed: 0,Address
0,"6-10 A Street, 02136"
1,"7 A Street, 02136"
2,"10 A Street, 02127"
3,"172-174 A Street, 02210"
4,"176-178 A Street, 02210"


In [24]:
Patient_count = 10

In [25]:
patient_df = pd.DataFrame([])
for i in range(1, Patient_count+1):
    patient_df = pd.concat([patient_df, generate_patient_vitals(i, 100)])
address_df_sample = address_df.sample(n = Patient_count).reset_index().drop('index', axis=1, errors='ignore')    

In [26]:
address_df_sample

Unnamed: 0,Address
0,"68 W Concord Street, 02118"
1,"142 Norwell Street, 02121"
2,"1666 Commonwealth Avenue, 02135"
3,"52 Columbia Road, 02121"
4,"456 Centre Street, 02130"
5,"38 Greenough Avenue, 02130"
6,"1086-1088A Dorchester Avenue, 02125"
7,"30 Elm Street, 02130"
8,"73 Maple Street, 02136"
9,"69-69A Readville Street, 02136"


In [27]:
from faker import Faker
import random

fake = Faker()

def generate_random_phone_number():
    # Generate a random US phone number for Boston area code (617)
    area_code = "617"
    middle_digits = f"{random.randint(200, 999):03d}"
    last_digits = f"{random.randint(1000, 9999):04d}"
    return f"({area_code}) {middle_digits}-{last_digits}"

def generate_random_ward_number():
    # Generate a random ward number (assuming a range)
    return random.randint(1, 20)

def generate_random_age():
    return random.randint(18, 75)

# Generate random patients with names, phone numbers, and ward numbers
names = []
phnumber = []
ward_list = []
age_list = []
for _ in range(Patient_count):
    names.append(fake.name())
    phnumber.append(generate_random_phone_number())
    ward_list.append(generate_random_ward_number())
    age_list.append(generate_random_age())
    
detail_df = pd.DataFrame({
    "Patient Name": names, 
    "Phone Number": phnumber,
    "Age": age_list,
    "Ward Number": ward_list
    })
detail_df

Unnamed: 0,Patient Name,Phone Number,Age,Ward Number
0,Kristopher Sanders,(617) 228-2863,22,19
1,Joseph Shelton,(617) 998-1464,65,17
2,Clinton Moody,(617) 271-7925,35,18
3,Teresa George,(617) 627-8629,58,14
4,Mary Pittman,(617) 523-6780,21,11
5,Andre Carr,(617) 450-6229,44,4
6,Sandra Holt,(617) 391-5275,61,1
7,Dr. Marissa Good,(617) 423-2954,55,4
8,Devin Lee,(617) 850-6729,29,2
9,Joshua Smith,(617) 256-5283,51,15


In [28]:
# Joining detail DF and addresses DF
patient_info_df = detail_df.join(address_df_sample)
patient_info_df

Unnamed: 0,Patient Name,Phone Number,Age,Ward Number,Address
0,Kristopher Sanders,(617) 228-2863,22,19,"68 W Concord Street, 02118"
1,Joseph Shelton,(617) 998-1464,65,17,"142 Norwell Street, 02121"
2,Clinton Moody,(617) 271-7925,35,18,"1666 Commonwealth Avenue, 02135"
3,Teresa George,(617) 627-8629,58,14,"52 Columbia Road, 02121"
4,Mary Pittman,(617) 523-6780,21,11,"456 Centre Street, 02130"
5,Andre Carr,(617) 450-6229,44,4,"38 Greenough Avenue, 02130"
6,Sandra Holt,(617) 391-5275,61,1,"1086-1088A Dorchester Avenue, 02125"
7,Dr. Marissa Good,(617) 423-2954,55,4,"30 Elm Street, 02130"
8,Devin Lee,(617) 850-6729,29,2,"73 Maple Street, 02136"
9,Joshua Smith,(617) 256-5283,51,15,"69-69A Readville Street, 02136"


In [29]:
patient_df.columns

Index(['Patient ID', 'Heart Rate', 'Systolic BP', 'Diastolic BP',
       'Temperature', 'Respiration Rate', 'SpO2'],
      dtype='object')

In [30]:
patient_df.describe()

Unnamed: 0,Patient ID,Heart Rate,Systolic BP,Diastolic BP,Temperature,Respiration Rate,SpO2
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,5.5,79.75773,104.64872,70.00352,37.0104,17.0471,97.86893
std,2.873719,11.320872,12.03627,9.678028,0.496171,2.943674,1.441492
min,1.0,44.3,70.63,40.77,35.65,8.49,93.52
25%,3.0,72.595,96.195,63.525,36.69,15.14,96.85
50%,5.5,79.86,105.095,70.115,37.015,17.075,97.93
75%,8.0,86.985,113.1125,76.9,37.35,19.045,99.0125
max,10.0,109.41,136.8,96.25,38.43,25.3,100.0


In [31]:
patient_df.to_csv("patient_vitals_CSV/patient_vitals.csv")
patient_info_df.to_csv("patient_vitals_CSV/patient_info.csv")