# Ethnicity and Gender over time
Look at ethnicity and gender cross_tabulations and changes over time entered care. 

In [None]:
# Load libraries
library(dplyr)
library(here)
library(ggplot2)
library(lubridate)
library(bigrquery)
bq_auth()

In [None]:
# Store the project ID
project_id = "yhcr-prd-phm-bia-core"

# Store Tables of Interest
targetdb1 <-'yhcr-prd-phm-bia-core.CB_2353'
targetdb1 <-gsub(' ','',targetdb1)
print (targetdb1)


In [None]:
# Create SQL command
sql1 <- paste('
WITH distinct_pi AS (
  SELECT  person_id
  FROM ', targetdb1, '.CiC_Bradford_Cohort
)
SELECT  a.person_id, b.BroadEthnicCategory_merge, c.gender_source_value, d.year_entered_care
FROM distinct_pi a
JOIN ', targetdb1,'.CiC_ethnicity b ON b.person_id = a.person_id
JOIN ', targetdb1, '.CiC_gender c ON a.person_id = c.person_id
JOIN ', targetdb1, '.CiC_age_start_care d ON a.person_id = d.person_id
LIMIT 4000;
', sep = "")


#This runs it
tb3 <- bq_project_query(project_id, sql1)

#This loads  it into an R data frame
table <- bq_table_download(tb3)
#This displays it
table

In [None]:
table2 <- table %>%
filter(year_entered_care != 2021)
table2

# Ethnic Breakdown

In [None]:
# Calculate proportions of Broad ethnic group
proportions_data <- table2 %>%
  group_by(BroadEthnicCategory_merge) %>%
  summarise(count = n()) %>%
  mutate(proportion = (count / sum(count)) *100 )

# Display the proportions data
print(proportions_data)

In [None]:
# Create the pie chart
ggplot(proportions_data, aes(x = "", y = proportion, fill = BroadEthnicCategory_merge)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Proportion of Broad Ethnic Groups",
       x = "",
       y = "") +
  theme_void() +  
  theme(legend.title = element_blank())  

In [None]:
# Ethnic proportions over time
proportions_data <- table2 %>%
  group_by(year_entered_care, BroadEthnicCategory_merge) %>%
  summarise(count = n()) %>%
  mutate(proportion = count / sum(count))

# View the proportions data
print(proportions_data)

In [None]:
ggplot(proportions_data, aes(x = factor(year_entered_care), y = proportion, color = BroadEthnicCategory_merge, group = BroadEthnicCategory_merge)) +
  geom_line() +
  geom_point() +
  labs(x = "Year Entered Care", y = "Proportion", color = "Ethnicity") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Gender Breakdown

In [None]:
# Calculate proportions of gender
proportions_data <- table2 %>%
  group_by(gender_source_value) %>%
  summarise(count = n()) %>%
  mutate(proportion = (count / sum(count)) *100 )

# Display the proportions data
print(proportions_data)

In [None]:
# Gender proportions over time
proportions_data <- table2 %>%
  group_by(year_entered_care, gender_source_value) %>%
  summarise(count = n()) %>%
  mutate(proportion = count / sum(count))

# View the proportions data
print(proportions_data)

In [None]:
ggplot(proportions_data, aes(x = factor(year_entered_care), y = proportion, color = gender_source_value, group = gender_source_value)) +
  geom_line() +
  geom_point() +
  labs(x = "Year Entered Care", y = "Proportion", color = "Gender") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

In [None]:
library(tidyr)
proportions_wide <- proportions_data %>%
  select(year_entered_care, gender_source_value, proportion) %>%  
  pivot_wider(names_from = gender_source_value, values_from = proportion)
proportions_wide

proportion_difference <- proportions_wide %>%
  mutate(proportion_difference = F - M) %>%
  select(year_entered_care, proportion_difference)

# View the proportion differences
print(proportion_difference)


In [None]:
ggplot(proportion_difference, aes(x = year_entered_care, y = proportion_difference)) +
  geom_line() +
  geom_point() +
  labs(title = "Proportion Difference Between Genders Over Time",
       x = "Year Entered Care",
       y = "Proportion Difference (F - M)") +
  theme_minimal()


# Ethnicity by Gender Breakdown

In [None]:
crosstab <- table(table2$gender_source_value, table2$BroadEthnicCategory_merge)
print(crosstab)

# Convert to proportions
crosstab_prop <- prop.table(crosstab, 2)
print(crosstab_prop)

In [None]:
# Convert the table to a data frame
crosstab_prop <- as.data.frame(crosstab_prop)

# Create a stacked bar plot
ggplot(crosstab_prop, aes(x = Var2, y = Freq, fill = Var1)) +
  geom_bar(stat = "identity") +
  labs(x = "Ethnicity", y = "Proportion", fill = "Gender") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

In [None]:
colnames(crosstab_prop) <- c("Gender", "Ethnicity", "Proportion")
crosstab_wide <- crosstab_prop %>%
  pivot_wider(names_from = Gender, values_from = Proportion)

# Calculate the proportion differences between genders for each ethnicity
crosstab_wide <- crosstab_wide %>%
  mutate(proportion_difference = F - M)

# View the proportion differences
print(crosstab_wide)

ggplot(crosstab_wide, aes(x = Ethnicity, y = proportion_difference)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(x = "Ethnicity", y = "Proportion Difference (F - M)", title = "Proportion Differences Between Genders by Ethnicity") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

In [None]:
# Calculate proportions of each gender within each ethnicity for each year
proportions_data <- table2 %>%
  group_by(year_entered_care, BroadEthnicCategory_merge, gender_source_value) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  group_by(year_entered_care, BroadEthnicCategory_merge) %>%
  mutate(proportion = count / sum(count))

proportions_data

In [None]:
# Create a line plot to visualise proportions over time
ggplot(proportions_data, aes(x = factor(year_entered_care), y = proportion, color = gender_source_value, group = gender_source_value)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ BroadEthnicCategory_merge) +
  labs(x = "Year Entered Care", y = "Proportion", color = "Gender") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

In [None]:
proportions_wide <- proportions_data %>%
  select(-count) %>%
  pivot_wider(names_from = gender_source_value, values_from = proportion) %>%
  mutate(proportion_difference = F - M)

# View the proportions and differences
print(proportions_wide)

# Plot the proportion differences over time for each ethnicity
ggplot(proportions_wide, aes(x = year_entered_care, y = proportion_difference, color = BroadEthnicCategory_merge, group = BroadEthnicCategory_merge)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ BroadEthnicCategory_merge) +
  labs(x = "Year Entered Care", y = "Proportion Difference (F - M)", title = "Proportional Differences Between Genders Over Time by Ethnicity") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_color_discrete(name = "Ethnicity")

In [None]:
# Calculate proportions of each ethnicity within each gender for each year
proportions_data <- table2 %>%
  group_by(year_entered_care, gender_source_value, BroadEthnicCategory_merge) %>%
  summarise(count = n(), .groups = 'drop') %>%
  group_by(year_entered_care, gender_source_value) %>%
  mutate(proportion = count / sum(count)) %>%
  filter(gender_source_value %in% c("M", "F"))

print(proportions_data)

In [None]:
# Create a line plot to visualise proportions over time
ggplot(proportions_data, aes(x = factor(year_entered_care), y = proportion, color = BroadEthnicCategory_merge, group = BroadEthnicCategory_merge)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ gender_source_value) +
  labs(x = "Year Entered Care", y = "Proportion", color = "Ethnic Group") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))