In [None]:
# Install required Python libraries
!pip install rpy2 matplotlib pandas

# Load rpy2 to run R code in the notebook
%load_ext rpy2.ipython

In [None]:
%%R
### Load Data

# Load required libraries
library(data.table)
library(ggplot2)
library(dplyr)
library(survey)

# Load pre-cleaned NASS 2020 data (assume available in Colab environment)
NASS_2020_all <- fread("/content/NASS_2020_all.csv")

# Verify data structure
str(NASS_2020_all)

In [None]:
%%R
### Age and Sociodemographic Diversity Analysis

# Create a dummy variable for White individuals
NASS_2020_all[, WHITE := ifelse(RACE == 1, 1, 0)]

# Calculate unadjusted proportion of White individuals
unadjusted_proportion_white <- mean(NASS_2020_all$WHITE)
print(paste("Unadjusted proportion of WHITE:", unadjusted_proportion_white))

# Perform weighted analysis using survey package
design <- svydesign(ids = ~KEY_NASS, weights = ~DISCWT, data = NASS_2020_all)
weighted_proportion_white <- svymean(~WHITE, design)
print(paste("Weighted proportion of WHITE:", coef(weighted_proportion_white)))

# Save results to a CSV file
write.csv(data.frame(Unadjusted = unadjusted_proportion_white, Weighted = coef(weighted_proportion_white)), "/content/White_Proportion_Analysis.csv", row.names = FALSE)

In [None]:
%%R
### Generate Overview Graphs

# Define labels for factor variables
teach_labels <- c("0" = "Non-Teaching", "1" = "Teaching")
location_labels <- c("0" = "Rural", "1" = "Urban")
bed_labels <- c("1" = "0-99", "2" = "100-299", "3" = "300+")
region_labels <- c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West")
race_labels <- c("1" = "White", "2" = "Black", "3" = "Hispanic", "4" = "Asian/Pacific", "5" = "Native", "6" = "Other")

# Define a consistent theme for all plots
custom_theme <- theme_minimal() +
  theme(text = element_text(size = 12), axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "bottom", legend.title = element_blank())

# FIGURE 1: Hospitals by Region and Bed Size
fig1 <- ggplot(NASS_2020_all, aes(x = HOSP_REGION, fill = HOSP_BEDSIZE_CAT)) +
  geom_bar() +
  custom_theme +
  xlab("US Region") +
  ggtitle("Hospitals within NASS 2020 Dataset", subtitle = "Segmented by Bed Size Category")

# Save the plot
ggsave("/content/Hospitals_by_Region_and_Bed_Size.png", plot = fig1)

# FIGURE 2: Age Distribution by Race
fig2 <- ggplot(NASS_2020_all, aes(x = AGE, fill = RACE)) +
  geom_density(alpha = 0.5) +
  custom_theme +
  xlab("Age") +
  ggtitle("Age Distribution by Race")

# Save the plot
ggsave("/content/Age_Distribution_by_Race.png", plot = fig2)

In [None]:
%%R
### Save Outputs

# Save cleaned data and graphs
fwrite(NASS_2020_all, "/content/NASS_2020_all_cleaned.csv")
print("All outputs saved to Colab environment.")

In [None]:
%%R
### Poster Space Setup

# Combine all plots into a single PDF for presentation
pdf("/content/NASS_Analysis_Poster.pdf", width = 12, height = 8)

# Add Figure 1
print(fig1)

# Add Figure 2
print(fig2)

# Close the PDF
dev.off()

# Save cleaned data for poster reference
fwrite(NASS_2020_all, "/content/NASS_2020_all_cleaned.csv")
print("Poster PDF and cleaned data saved to Colab environment.")