Opening up the cases.csv through pulling it from an API provided on the website

In [2]:
import requests
import pandas as pd
import io  # Helps read CSV from a string

url = "https://api.vitaldb.net/cases"

response = requests.get(url)

if response.status_code == 200 and response.text.strip():
    try:
        df = pd.read_csv(io.StringIO(response.text))
        
        df.to_csv("cases.csv", index=False)
        print("CSV file saved as 'cases.csv'")

    except Exception as e:
        print("Error parsing CSV:", e)
else:
    print("Failed to fetch data or received an empty response.")
    print(f"Status Code: {response.status_code}")

cases = pd.read_csv("cases.csv")
cases


CSV file saved as 'cases.csv'


Unnamed: 0,caseid,subjectid,casestart,caseend,anestart,aneend,opstart,opend,adm,dis,...,intraop_colloid,intraop_ppf,intraop_mdz,intraop_ftn,intraop_rocu,intraop_vecu,intraop_eph,intraop_phe,intraop_epi,intraop_ca
0,1,5955,0,11542,-552,10848.0,1668,10368,-236220,627780,...,0,120,0.0,100,70,0,10,0,0,0
1,2,2487,0,15741,-1039,14921.0,1721,14621,-221160,1506840,...,0,150,0.0,0,100,0,20,0,0,0
2,3,2861,0,4394,-590,4210.0,1090,3010,-218640,40560,...,0,0,0.0,0,50,0,0,0,0,0
3,4,1903,0,20990,-778,20222.0,2522,17822,-201120,576480,...,0,80,0.0,100,100,0,50,0,0,0
4,5,4416,0,21531,-1009,22391.0,2591,20291,-67560,3734040,...,0,0,0.0,0,160,0,10,900,0,2100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6383,6384,5583,0,15248,-260,15640.0,2140,14140,-215340,648660,...,0,150,0.0,0,90,0,20,0,0,0
6384,6385,2278,0,20643,-544,20996.0,2396,19496,-225600,1675200,...,0,100,0.0,0,100,0,25,30,0,300
6385,6386,4045,0,19451,-667,19133.0,3533,18233,-200460,836340,...,0,70,0.0,0,130,0,10,0,0,0
6386,6387,5230,0,12025,-550,12830.0,1730,11030,-227760,377040,...,0,120,0.0,0,50,0,0,0,0,0


Filtering for more important columns

In [3]:
cases = cases[['caseid', 'age', 'height', 'weight', 'bmi', 'emop', 'ane_type', 'optype']]
cases

Unnamed: 0,caseid,age,height,weight,bmi,emop,ane_type,optype
0,1,77.0,160.2,67.50,26.3,0,General,Colorectal
1,2,54.0,167.3,54.80,19.6,0,General,Stomach
2,3,62.0,169.1,69.70,24.4,0,General,Biliary/Pancreas
3,4,74.0,160.6,53.00,20.5,0,General,Stomach
4,5,66.0,171.0,59.70,20.4,1,General,Vascular
...,...,...,...,...,...,...,...,...
6383,6384,64.0,161.5,63.00,24.2,0,General,Stomach
6384,6385,69.0,159.3,62.30,24.6,0,General,Colorectal
6385,6386,61.0,151.7,43.25,18.8,0,General,Stomach
6386,6387,24.0,155.7,55.50,22.9,0,General,Biliary/Pancreas


This code pulls tracks, which is a df that contains entries of caseids and track name, which is a method/variable that is being recorded, and their api entry in tid

In [4]:
response = requests.get('https://api.vitaldb.net/trks')

if response.status_code == 200 and response.text.strip():
    try:
        df = pd.read_csv(io.StringIO(response.text))
        
        df.to_csv("trks.csv", index=False)
        print("CSV file saved as 'trks.csv'")

    except Exception as e:
        print("Error parsing CSV:", e)
else:
    print("Failed to fetch data or received an empty response.")
    print(f"Status Code: {response.status_code}")

trks = pd.read_csv("trks.csv")
trks

CSV file saved as 'trks.csv'


Unnamed: 0,caseid,tname,tid
0,1,BIS/BIS,fd869e25ba82a66cc95b38ed47110bf4f14bb368
1,1,BIS/EEG1_WAV,0aa685df768489a18a5e9f53af0d83bf60890c73
2,1,BIS/EEG2_WAV,ad13b2c39b19193c8ae4a2de4f8315f18d61a57e
3,1,BIS/EMG,2525603efe18d982764dbca457affe7a45e766a9
4,1,BIS/SEF,1c91aec859304840dec75acf4a35da78be0e8ef0
...,...,...,...
486444,6388,Solar8000/VENT_PIP,2d63adbc7e2653f14348e219816673cde3358cf6
486445,6388,Solar8000/VENT_PPLAT,6f6604255858ddc8f6a01b9f4774b0d43105f6da
486446,6388,Solar8000/VENT_RR,f34f3fae7fd963355c1c7060e1e876d20fa87536
486447,6388,Solar8000/VENT_SET_TV,4a4a55b8aebf9c76a4a76f62a7c1ec6fcb80e618


This is an example of pulling BIS/SEF data from case 1

In [16]:
response = requests.get('https://api.vitaldb.net/fd869e25ba82a66cc95b38ed47110bf4f14bb368')

if response.status_code == 200 and response.text.strip():
    try:
        df = pd.read_csv(io.StringIO(response.text))
        
        df.to_csv("case1.csv", index=False)
        print("CSV file saved as 'case1.csv'")

    except Exception as e:
        print("Error parsing CSV:", e)
else:
    print("Failed to fetch data or received an empty response.")
    print(f"Status Code: {response.status_code}")

case_1 = pd.read_csv("case1.csv")
case_1

CSV file saved as 'case1.csv'


Unnamed: 0,Time,BIS/BIS
0,0.156,0.0
1,1.156,0.0
2,2.156,0.0
3,3.156,0.0
4,4.156,0.0
...,...,...
11535,11535.200,0.0
11536,11536.200,0.0
11537,11537.200,0.0
11538,11538.200,0.0


Grouping the original cases df in order to eventually simulate the togglable events in our final visualization, for the sake of easier computation i have decided to group ages by every ten years and weight and height in 3 seperate groups

In [6]:
cases = cases.copy()

age_bins = list(range(20, 90, 10))  # 10-year intervals from 20 to 80
weight_bins = [45, 60, 75, 90]  # Three groups: 45-59, 60-74, 75-89
height_bins = [150, 160, 170, 180]  # Three groups: 150-159, 160-169, 170-179

#Apply categories to age, weight, and height
cases.loc[:, "age_group"] = pd.cut(cases["age"], bins=age_bins, right=False, labels=[f"{i}-{i+9}" for i in age_bins[:-1]])
cases.loc[:, "weight_group"] = pd.cut(cases["weight"], bins=weight_bins, right=False, labels=["45-59", "60-74", "75-89"])
cases.loc[:, "height_group"] = pd.cut(cases["height"], bins=height_bins, right=False, labels=["150-159", "160-169", "170-179"])

#Group the original data by the new categories
grouped_cases = cases.groupby(["age_group", "weight_group", "height_group"], observed=False)["caseid"].apply(lambda x: list(x[:10])).reset_index()

grouped_cases


Unnamed: 0,age_group,weight_group,height_group,caseid
0,20-29,45-59,150-159,"[56, 1362, 1437, 1885, 1905, 1980, 2006, 2166,..."
1,20-29,45-59,160-169,"[108, 174, 183, 220, 326, 395, 431, 457, 572, ..."
2,20-29,45-59,170-179,"[493, 2558, 3054, 3978, 5239, 5669, 5817, 5963]"
3,20-29,60-74,150-159,"[2332, 2522, 3147, 6187]"
4,20-29,60-74,160-169,"[176, 791, 1095, 2090, 2637, 2879, 3081, 3137,..."
5,20-29,60-74,170-179,"[119, 368, 455, 537, 588, 686, 734, 803, 908, ..."
6,20-29,75-89,150-159,"[1685, 2182, 2883]"
7,20-29,75-89,160-169,"[5410, 6252]"
8,20-29,75-89,170-179,"[14, 194, 196, 969, 2069, 2397, 3101, 3112, 32..."
9,30-39,45-59,150-159,"[9, 36, 126, 153, 328, 339, 393, 402, 417, 648]"


The following code is used to get the case ids from our previous dataframe, since when showing real BIS dilation i want to use aggregate of the cases provided for a less biased visualization

In [7]:
def get_case_ids(age_group, weight_group, height_group):
    return grouped_cases[
        (grouped_cases["age_group"] == age_group)
        & (grouped_cases["weight_group"] == weight_group)
        & (grouped_cases["height_group"] == height_group)
    ]["caseid"].values[0]

case_ids = get_case_ids("70-79", "60-74", "150-159")
print(case_ids)

[165, 170, 181, 184, 242, 291, 315, 327, 441, 479]


Now this code, is taking advantage of the trks df and pulls needed values based on an array of caseids that was compiled before, there is an issue tho that will be disscussed later

In [11]:
def get_track_tids_1(case_ids):
    trks_tids = []
    for case in case_ids:
        # Extract the tid values as a list instead of appending a Pandas Series
        tid_list = trks.loc[(trks["caseid"] == case) & (trks["tname"] == "BIS/BIS"), "tid"].tolist()
        trks_tids.append(tid_list)  # Append list of tids (could be empty)
    
    return trks_tids

# Example usage
tids = get_track_tids_1(case_ids)
print(tids)

[['aafd57d0204077177d35d91181b82784085f9fe6'], [], ['cf167208ca7c83fd039b486bc4a09c88d9ca453e'], ['5737b94826cd45c1e77ad4bfa1990516c7307e92'], [], ['28e0265ee1fbbd94af2fc0fb6236e4075d5076b2'], ['0f61ec99f8cb64be98511cd70628e8e2fdff0fc0'], ['b6bca36fd62edec37947a038c9cff87b8147ddd9'], ['940091dde90722a31a090bc1ef5557060f86c729'], ['0dab945d0110f82df78388c8b60ad39e57fe464d']]


As you might have noticed some arrays inside the tids are empty, that is because only 91 percent of the operations have BIS/BIS values logged, that might be due to some unforseen medical reasons

In [10]:
bis_count = trks[trks["tname"] == "BIS/BIS"]["caseid"].nunique()
round(((bis_count / trks["caseid"].nunique()) * 100), 2)

91.84

Fix the previous issue with extend, we will not do anything with the lost 10 percent of data as long as every group has atleast a single tid of BIS/BIS we are good

In [13]:
def get_track_tids(case_ids):
    trks_tids = []
    for case in case_ids:
        # Extract the tid values as a list
        tid_list = trks.loc[(trks["caseid"] == case) & (trks["tname"] == "BIS/BIS"), "tid"].tolist()
        trks_tids.extend(tid_list)  # Use extend() to flatten instead of appending lists
    
    return trks_tids  # Returns a single flat list without empty lists

# Example usage
tids = get_track_tids(case_ids)
print(tids)

['aafd57d0204077177d35d91181b82784085f9fe6', 'cf167208ca7c83fd039b486bc4a09c88d9ca453e', '5737b94826cd45c1e77ad4bfa1990516c7307e92', '28e0265ee1fbbd94af2fc0fb6236e4075d5076b2', '0f61ec99f8cb64be98511cd70628e8e2fdff0fc0', 'b6bca36fd62edec37947a038c9cff87b8147ddd9', '940091dde90722a31a090bc1ef5557060f86c729', '0dab945d0110f82df78388c8b60ad39e57fe464d']


In [17]:
import plotly.express as px

fig = px.line(case_1, x="Time", y="BIS/BIS", title="BIS/BIS Over Time", labels={"Time": "Time", "BIS/BIS": "BIS Value"})

fig.show()

In [18]:
import pandas as pd
import plotly.express as px
import numpy as np

df = case_1

# Convert time to percentage
df["Time_Percent"] = (df["Time"] / df["Time"].max()) * 100

# Create bins for 100 equal time segments
df["Time_Bin"] = pd.cut(df["Time_Percent"], bins=100, labels=np.arange(1, 101))

# Aggregate by percentage bins
aggregated_df = df.groupby("Time_Bin")["BIS/BIS"].mean().reset_index()

# Convert bin labels to numeric for plotting
aggregated_df["Time_Bin"] = aggregated_df["Time_Bin"].astype(int)

# Remove zero BIS values
aggregated_df = aggregated_df[aggregated_df["BIS/BIS"] > 0]

# Plot
fig = px.line(
    aggregated_df,
    x="Time_Bin",
    y="BIS/BIS",
    title="Aggregated BIS/BIS Over Time (Percent Scale)",
    labels={"Time_Bin": "Percentage of Total Duration", "BIS/BIS": "BIS Value"}
)

fig.show()




