# Benchmarks

## Initialize

In [None]:
#library(Rmisc)
library(dtplyr)
library(tidyverse)
library(glue)
library(arrow)
library(patchwork)
library(data.table)
library("jsonlite")
library(ggthemes)

In [None]:
if (grepl("sc", Sys.info()[["nodename"]], fixed=TRUE)) {
    base_path = "/sc-projects/sc-proj-ukb-cvd"
} else {
    base_path = "/data/analysis/ag-reils/ag-reils-shared/cardioRS"}
print(base_path)

project_label="22_medical_records"
project_path = glue("{base_path}/results/projects/{project_label}")
figure_path = glue("{project_path}/figures")
output_path = glue("{project_path}/data")

experiment = 220627
experiment_path = glue("{output_path}/{experiment}")

In [None]:
base_size = 8
title_size = 10
facet_size = 10
geom_text_size=3
theme_set(theme_classic(base_size = base_size) + 
          theme(strip.background = element_blank(), plot.title=element_text(size=title_size, hjust=0), 
                strip.text.x = element_text(size = facet_size),axis.title=element_text(size=10), axis.text=element_text(size=8, color="black"),
                legend.position="bottom", axis.line = element_line(size = 0.2), axis.ticks=element_line(size=0.2), panel.grid.major = element_line()))

In [None]:
data <- world.cities %>% filter(country.etc=="UK")

In [None]:
library(ggrepel)

In [None]:
data %>% arrange(pop) %>% tail(10)

In [None]:
data %>% filter(str_detect(name, "Hounslow"))

In [None]:
install.packages("ggmap")

In [None]:
library(ggmap) 
cities = c("Edinburgh", "Glasgow", "Newcastle", "Middlesborough", "Leeds", 
                            "Sheffield", "Bury", "Manchester", "Liverpool", "Stockport", 
                            "Wrexham", "Stoke", "Nottingham", "Birmingham", "Oxford", 
                            "Reading", "Bristol", "Swansea", "Cardiff", "Hounslow", "London", "Croydon")
cities_df = geocode(paste0(cities, ", ", "uk")) %>% mutate(city=cities)

In [None]:
#register_google(key= "AIzaSyAxfOj8mvbLXBSbQCDRquCbn5CuALAvqU8")

In [None]:
cities_df

In [None]:
baseline_covariates = arrow::read_feather('/sc-projects/sc-proj-ukb-cvd/data/2_datasets_pre/220603_medicalhistory/baseline_covariates.feather',
                                            col_select=c("eid", "uk_biobank_assessment_centre_f54_0_0"))

In [None]:
number_individuals = baseline_covariates %>% group_by(uk_biobank_assessment_centre_f54_0_0) %>% tally() %>% rename(city = uk_biobank_assessment_centre_f54_0_0)

In [None]:
plot_df = number_individuals %>% mutate(city = recode(city, "Barts"="London")) %>% 
    mutate(city = str_remove_all(city, " \\(pilot\\)")) %>% 
    left_join(cities_df, by="city") 

In [None]:
plot_width = 8; plot_height=10; plot_dpi=320
options(repr.plot.width=plot_width, repr.plot.height=plot_height, repr.plot.dpi=plot_dpi)
uk_map = ggplot() +
  geom_polygon(data = UK, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
  #geom_point( data=data, aes(x=long, y=lat, alpha=pop)) +
  geom_point(data=plot_df, aes(x=lon, y=lat, size=n), color="red") +
  geom_text_repel(data=plot_df, aes(x=lon, y=lat, label=city), size=5, box.padding = 0.5, max.overlaps = Inf) +
  theme_void() + ylim(50,59) + coord_map(ylim=c(50, 59)) +
  theme(legend.position=c(0.9, 0.7), legend.title=element_text(size=15), legend.text=element_text(size=13)) + 
    labs(size="Individuals")
uk_map

In [None]:
library(gt)
plot_name = "Figure1_MAP_UK"
uk_map %>% ggsave(filename=glue("outputs/{plot_name}.pdf"), device="pdf", width=plot_width, height=plot_height, dpi=plot_dpi)