In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats

def generate_patient_vitals(id, n, t=3):
    np.random.seed(id)
    
    # Simulate random values for heart rate (bpm)
    heart_rate = np.round(stats.truncnorm.rvs(-t, t, 72, 12, n), 2)
    
    # Simulate random values for blood pressure (mmHg)
    systolic_bp = np.round(stats.truncnorm.rvs(-t, t, 115, 12, n), 2)
    diastolic_bp = np.round(stats.truncnorm.rvs(-t, t, 75, 10, n), 2)

    # Simulate random values for temperature (°C)
    temperature = np.round(stats.truncnorm.rvs(-t, t, 37, 0.5, n), 2)

    # Simulate random values for respiration rate (breaths per minute)
    respiration_rate = np.round(stats.truncnorm.rvs(-t, t, 17, 3, n), 2)

    # Simulate random values for SpO2 (%)
    spo2 = np.round(stats.truncnorm.rvs(-t, t, 98, 1.5, n), 2)
    spo2[spo2 > 100] = 100

    return pd.DataFrame({
        'Patient ID': id,
        'Heart Rate': heart_rate,
        'Systolic BP': systolic_bp,
        'Diastolic BP': diastolic_bp,
        'Temperature': temperature,
        'Respiration Rate': respiration_rate,
        'SpO2': spo2
    })

In [2]:
import json

# Processing address dataset
geojson_file_path = '../ma_addresses/city_of_boston-addresses-city.geojson'

# Extract relevant information
addresses = []
with open(geojson_file_path, 'r') as file:
    for line in file:
        try:
            data = json.loads(line)
            properties = data.get("properties", {})
            number = properties.get("number", "")
            street = properties.get("street", "")
            postcode = properties.get("postcode", "")

            # Create consolidated address
            consolidated_address = f"{number} {street}, {postcode}"

            # Append to addresses list
            addresses.append(consolidated_address)

        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")

# Create a DataFrame
address_df = pd.DataFrame({"Address": addresses})

# Checking Addresses
address_df.head()

Unnamed: 0,Address
0,"6-10 A Street, 02136"
1,"7 A Street, 02136"
2,"10 A Street, 02127"
3,"172-174 A Street, 02210"
4,"176-178 A Street, 02210"


In [3]:
Patient_count = 10

In [4]:
patient_df = pd.DataFrame([])
for i in range(1, Patient_count+1):
    patient_df = pd.concat([patient_df, generate_patient_vitals(i, 100)])
address_df_sample = address_df.sample(n = Patient_count).reset_index()    

In [5]:
address_df_sample

Unnamed: 0,index,Address
0,194005,"68 W Concord Street, 02118"
1,92615,"142 Norwell Street, 02121"
2,34143,"1666 Commonwealth Avenue, 02135"
3,187283,"52 Columbia Road, 02121"
4,317145,"456 Centre Street, 02130"
5,219258,"38 Greenough Avenue, 02130"
6,143928,"1086-1088A Dorchester Avenue, 02125"
7,47382,"30 Elm Street, 02130"
8,243300,"73 Maple Street, 02136"
9,147641,"69-69A Readville Street, 02136"


In [6]:
from faker import Faker
import random

fake = Faker()

def generate_random_phone_number():
    # Generate a random US phone number for Boston area code (617)
    area_code = "617"
    middle_digits = f"{random.randint(200, 999):03d}"
    last_digits = f"{random.randint(1000, 9999):04d}"
    return f"({area_code}) {middle_digits}-{last_digits}"

def generate_random_ward_number():
    # Generate a random ward number (assuming a range)
    return random.randint(1, 20)

def generate_random_age():
    return random.randint(18, 75)

# Generate random patients with names, phone numbers, and ward numbers
names = []
phnumber = []
ward_list = []
age_list = []
for _ in range(Patient_count):
    names.append(fake.name())
    phnumber.append(generate_random_phone_number())
    ward_list.append(generate_random_ward_number())
    age_list.append(generate_random_age())
    
detail_df = pd.DataFrame({
    "Patient Name": names, 
    "Phone Number": phnumber,
    "Age": age_list,
    "Ward Number": ward_list
    })
detail_df

Unnamed: 0,Patient Name,Phone Number,Ward Number
0,Michelle Johnson,(617) 241-3056,8
1,Timothy Peterson,(617) 335-6450,18
2,Zachary Jones,(617) 858-8238,1
3,Jermaine Olson,(617) 474-7332,10
4,Dale Moore,(617) 411-3655,3
5,Darryl Clark,(617) 947-4374,11
6,Barbara Orr,(617) 252-4369,4
7,Stephanie Clark,(617) 484-4710,11
8,David Stone,(617) 570-7457,15
9,Patrick Molina,(617) 391-1438,19


In [7]:
# Joining detail DF and addresses DF
patient_info_df = detail_df.join(address_df_sample)
patient_info_df

Unnamed: 0,Patient Name,Phone Number,Ward Number,index,Address
0,Michelle Johnson,(617) 241-3056,8,194005,"68 W Concord Street, 02118"
1,Timothy Peterson,(617) 335-6450,18,92615,"142 Norwell Street, 02121"
2,Zachary Jones,(617) 858-8238,1,34143,"1666 Commonwealth Avenue, 02135"
3,Jermaine Olson,(617) 474-7332,10,187283,"52 Columbia Road, 02121"
4,Dale Moore,(617) 411-3655,3,317145,"456 Centre Street, 02130"
5,Darryl Clark,(617) 947-4374,11,219258,"38 Greenough Avenue, 02130"
6,Barbara Orr,(617) 252-4369,4,143928,"1086-1088A Dorchester Avenue, 02125"
7,Stephanie Clark,(617) 484-4710,11,47382,"30 Elm Street, 02130"
8,David Stone,(617) 570-7457,15,243300,"73 Maple Street, 02136"
9,Patrick Molina,(617) 391-1438,19,147641,"69-69A Readville Street, 02136"


In [8]:
patient_df.columns

Index(['Patient ID', 'Heart Rate', 'Systolic BP', 'Diastolic BP',
       'Temperature', 'Respiration Rate', 'SpO2'],
      dtype='object')

In [9]:
patient_df.describe()

Unnamed: 0,Patient ID,Heart Rate,Systolic BP,Diastolic BP,Temperature,Respiration Rate,SpO2
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,5.5,71.75773,114.64872,75.00352,37.0104,17.0471,97.86893
std,2.873719,11.320872,12.03627,9.678028,0.496171,2.943674,1.441492
min,1.0,36.3,80.63,45.77,35.65,8.49,93.52
25%,3.0,64.595,106.195,68.525,36.69,15.14,96.85
50%,5.5,71.86,115.095,75.115,37.015,17.075,97.93
75%,8.0,78.985,123.1125,81.9,37.35,19.045,99.0125
max,10.0,101.41,146.8,101.25,38.43,25.3,100.0


In [10]:
patient_df.to_csv("patient_vitals_CSV/patient_vitals.csv")
patient_info_df.to_csv("patient_vitals_CSV/patient_info.csv")