<div style="background-color: #f8d7da; color: #721c24; padding: 20px; margin-bottom: 20px; border: 1px solid #f5c6cb; border-radius: 4px;">
    <strong style="font-size: 24px;">Warning</strong> <br> 
    <strong>You have launched the 000.2-VaccineData_Preprocessing.ipynb.-</strong>
    <ul style="color: #721c24;">
        <li>This notebook runs with a <strong>R 4+</strong> kernel!</li>
        <li>It loads data from previous preprocessing notebook (<strong>000.1-ContactData_Preprocessing.ipynb</strong>)</li>
        <li><strong>It outputs processed data overwriting existing files<strong>: please use this with caution.</li>
    </ul>
</div>

## Mapping Vaccine covid open answers

In [1]:
rm(list=ls())
library(dplyr)
library(tidyr)
library(lubridate)

ego <- read.csv("../data_in/clean_respondent_info.csv")
contacts <- read.csv("../data_in/clean_contacts.csv")

### Drop soft launch observations ----
ego <- ego[ego$survey_date!= "2023-02-28",]
to_merge <- ego %>% dplyr::select(caseid, survey_date)
contacts <- merge(contacts, to_merge, by="caseid")
contacts <- contacts[contacts$survey_date!= "2023-02-28",]
rm(to_merge)


### Mapping comorbidity variables ----

ego$cronic_comorb_self <- apply(ego[,which(names(ego) %in% paste0("patologie_croniche_",c(2,4,6,9,11:14)))],1,function(x) sum(!is.na(x) & x==1))
ego$cronic_comorb_self_na <- apply(ego[,which(names(ego) %in% paste0("patologie_croniche_",c(2,4,6,9,11:14)))],1,function(x) sum(is.na(x)))
ego$cronic_comorb_self[ego$cronic_comorb_self_na==8] <- NA

ego$cronic_comorb_cohab <- apply(ego[,which(names(ego) %in% paste0("patologie_croniche_cohabitant_",c(2,4,6,9,11:14)))],1,function(x) sum(!is.na(x) & x==1))
ego$cronic_comorb_cohab_na <- apply(ego[,which(names(ego) %in% paste0("patologie_croniche_cohabitant_",c(2,4,6,9,11:14)))],1,function(x) sum(is.na(x)))
ego$cronic_comorb_cohab[ego$cronic_comorb_cohab_na==8] <- NA

### Mapping vaccination ----
## Wave 1 ##

ego$vacc_covid_1wave <- ego$vacc_covid

## step 1.1: "straightforward" doses, str_detect
# No doses -> 1: "non sono vaccinato per scelta" -> report no doses, sometimes without explanations
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & 
                       (stringr::str_detect(ego$vacc_covid_other,"Non")==T |  stringr::str_detect(ego$vacc_covid_other,"non")==T )] <- 1 

# No doses -> 1: "Non sono vaccinato per scelta" -> report recovery from Covid as an explanation for no doses
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & 
                       (stringr::str_detect(ego$vacc_covid_other,"covid")==T | stringr::str_detect(ego$vacc_covid_other,"Covid")==T |
                          stringr::str_detect(ego$vacc_covid_other,"codiv")==T)] <- 1

# One dose -> 4: "prima, non seconda"
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) &
                       (stringr::str_detect(ego$vacc_covid_other,"1")==T | 
                          stringr::str_detect(ego$vacc_covid_other,"una")==T | stringr::str_detect(ego$vacc_covid_other,"Una")==T|
                          stringr::str_detect(ego$vacc_covid_other,"prima")==T | stringr::str_detect(ego$vacc_covid_other,"Prima")==T)] <- 4 

# Two doses -> 5: "seconda, non booster"
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & 
                       (stringr::str_detect(ego$vacc_covid_other,"2")==T | 
                          stringr::str_detect(ego$vacc_covid_other,"due") ==T| stringr::str_detect(ego$vacc_covid_other,"Due")==T|
                          stringr::str_detect(ego$vacc_covid_other,"seconda")==T | stringr::str_detect(ego$vacc_covid_other,"Seconda")==T)] <- 5 

# Three doses -> 6: "Seconda e booster"
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) &  
                       (stringr::str_detect(ego$vacc_covid_other,"3")==T | 
                          stringr::str_detect(ego$vacc_covid_other,"tre")==T | stringr::str_detect(ego$vacc_covid_other,"Tre")==T |
                          stringr::str_detect(ego$vacc_covid_other,"terza")==T | stringr::str_detect(ego$vacc_covid_other,"Terza")==T)] <- 6 

# Four doses -> 7: "Booster e quarta" (Note: technically, answer from Wave 2)
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & 
                       (stringr::str_detect(ego$vacc_covid_other,"4")==T | 
                          stringr::str_detect(ego$vacc_covid_other,"quattro")==T | stringr::str_detect(ego$vacc_covid_other,"Quattro")==T|
                          stringr::str_detect(ego$vacc_covid_other,"quarta") ==T| stringr::str_detect(ego$vacc_covid_other,"Quarta")==T)] <- 7 

## step 1.2: specific sentences/situations
# One dose
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_other=="Farò a giorni la seconda" |
                                                       ego$vacc_covid_other=="Prima dose e covid(vale come seconda dose)")] <- 4

# Two doses
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_other=="Ho fatto la seconda dose e sto aspettando la scadenza del green pass da guarigione per fare la terza dose" |
                                                       ego$vacc_covid_other=="Il booster in settimana"|
                                                       ego$vacc_covid_other=="il booster per ora non è previsto")] <- 5

# Three doses
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_other=="Completato ciclo caccinale")] <- 6 #assumo equivalga a dire 3 dosi

# No doses: exempt
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_other=="esente" |
                                                       ego$vacc_covid_other=="Esente")] <- 2

# No doses
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_other=="Ha avutonil covid da meno di tre mesi" |
                                                       ego$vacc_covid_other=="Sarà vaccinata il prima possibile" |
                                                       ego$vacc_covid_other=="ha preso il covid prima di vaccinarsi"|
                                                       ego$vacc_covid_other=="non vaccinato, perché già con anticorpi naturali da Luglio 21"|
                                                       ego$vacc_covid_other=="Ha avuto il codiv-19")] <- 1

#J&J
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_other=="Ho fatto il monodose e poi un richiamo"|
                                                       ego$vacc_covid_other=="J&J e booster Pfizer"|
                                                       ego$vacc_covid_other=="Ho fatto una monodose e poi il booster" |
                                                       ego$vacc_covid_other=="Fo fatto j e j e la 2da dose" |
                                                       ego$vacc_covid_other=="Ho fatto unica dose Jonhson e 2 dose Pfizer")] <- 6

## step 1.3: unclear/NA
# Unclear
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_other=="Boh" |
                                                       ego$vacc_covid_other=="." | ego$vacc_covid_other=="2021/953"| ego$vacc_covid_other=="I VACCINI FANNO AMMALARE" |
                                                       ego$vacc_covid_other=="e' un siero sperimentale,non e' un vaccino" |ego$vacc_covid_other=="VACCINO INFLUENZA")] <- 999

# Explicitly refuses to answer
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) &
                       (stringr::str_detect(ego$vacc_covid_other,"rispond")==T)] <- 999 

# Peculiar answers -> refuses to answer
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave)  & ( ego$vacc_covid_other=="Fatevi i cazzi vostri" |
                                                         ego$vacc_covid_other=="Non vi interessa")] <- 999

# Selects "other" but then doesn't answer 
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & ego$vacc_covid_1wave==955 & (is.na(ego$vacc_covid_other))] <- 999
ego$vacc_covid_1wave[!is.na(ego$vacc_covid_1wave) & ego$vacc_covid_1wave==955 & ego$vacc_covid_other == ""] <- 999

# NA
ego$vacc_covid_1wave[is.na(ego$vacc_covid) & ego$wave==1] <- 999

# Filling error: declare no doses, but then in the question about reasons behind no vaccination clarify they made a mistake
ego$vacc_covid_1wave[ego$no_vax_reason_other == "sono vaccinata, ho sbagliato alla pagina precedente"|
                       ego$no_vax_reason_other == "Sono vaccinata"  ] <- 999


### Wave 2 ###
ego$vacc_covid_2wave <- ego$vacc_covid2

## step 2.1: "straightforward" doses, str_detect
# No doses -> 1: "non sono vaccinato per scelta" -> report no doses, sometimes without explanations
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & 
                       (stringr::str_detect(ego$vacc_covid_other,"Non")==T |  stringr::str_detect(ego$vacc_covid_other,"non")==T )] <- 1 

# No doses -> 1: "Non sono vaccinato per scelta" -> report recovery from Covid as an explanation for no doses
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & 
                       (stringr::str_detect(ego$vacc_covid_other,"covid")==T | stringr::str_detect(ego$vacc_covid_other,"Covid")==T |
                          stringr::str_detect(ego$vacc_covid_other,"codiv")==T)] <- 1

# One dose -> 4: "prima, non seconda"
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) &
                       (stringr::str_detect(ego$vacc_covid_other,"1")==T | 
                          stringr::str_detect(ego$vacc_covid_other,"una")==T | stringr::str_detect(ego$vacc_covid_other,"Una")==T|
                          stringr::str_detect(ego$vacc_covid_other,"prima")==T | stringr::str_detect(ego$vacc_covid_other,"Prima")==T)] <- 4 

# Two doses -> 5: "seconda, non booster"
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & 
                       (stringr::str_detect(ego$vacc_covid_other,"2")==T | 
                          stringr::str_detect(ego$vacc_covid_other,"due")==T | stringr::str_detect(ego$vacc_covid_other,"Due")==T|
                          stringr::str_detect(ego$vacc_covid_other,"seconda")==T | stringr::str_detect(ego$vacc_covid_other,"Seconda")==T)] <- 5 

# Three doses -> 6: "Seconda e booster"
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) &  
                       (stringr::str_detect(ego$vacc_covid_other,"3")==T | 
                          stringr::str_detect(ego$vacc_covid_other,"tre")==T | stringr::str_detect(ego$vacc_covid_other,"Tre")==T |
                          stringr::str_detect(ego$vacc_covid_other,"terza")==T | stringr::str_detect(ego$vacc_covid_other,"Terza")==T)] <- 6 

# Four doses -> 7: "Booster e quarta" 
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & 
                       (stringr::str_detect(ego$vacc_covid_other,"4")==T | 
                          stringr::str_detect(ego$vacc_covid_other,"quattro") ==T| stringr::str_detect(ego$vacc_covid_other,"Quattro")==T|
                          stringr::str_detect(ego$vacc_covid_other,"quarta") ==T| stringr::str_detect(ego$vacc_covid_other,"Quarta")==T)] <- 7 

# Five doses -> 8: "Quarta e quinta dose" (N.B. technically, not available as an answer)
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & 
                       (stringr::str_detect(ego$vacc_covid_other,"5")==T | 
                          stringr::str_detect(ego$vacc_covid_other,"cinque") ==T| stringr::str_detect(ego$vacc_covid_other,"Cinque")==T|
                          stringr::str_detect(ego$vacc_covid_other,"quinta") ==T| stringr::str_detect(ego$vacc_covid_other,"Quinta")==T|
                          stringr::str_detect(ego$vacc_covid_other,"QUINTA")==T | stringr::str_detect(ego$vacc_covid_other,"Quinto")==T)] <- 8 

## step 2.2: specific sentences/situations

# Two doses
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_other=="Ho fatto le prime 2 dosi più la malattia ma non la quarta dose")] <- 5

# Three doses
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_other=="Ho fatto il boster avendo poi sempre pochi anticorpi poi preso Omicron ho avuto il 600% in più di anticorpi che NON DECADONO con la stessa velocità, ma PERMANGONO"|
                                                       ego$vacc_covid_other=="fatte 3 dosi ma non la quarta perché ho contratto il covid dopo mesi dalla terza"|
                                                       ego$vacc_covid_other=="Ho ricevuto 2 vaccini ed 1 booster poiché avendo contratto il covi mi è stato consigliato così dai sanitari"|
                                                       ego$vacc_covid_other=="Non farò altre vaccinazioni") #the respondent reporting this answer reported three doses in the first wave
] <- 6 

# Five doses
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_other=="ho fatto la quarta dose e il booster"|
                                                       ego$vacc_covid_other=="Ho fatto il booster, la quarta dose e il secondo booster")] <- 8

# No doses: exempt
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_other=="esente" |
                                                       ego$vacc_covid_other=="Esente")] <- 2

# No doses
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_other=="Non l'ho vaccinata perché ha preso il covid-19 per 2 volte in 3 mesi" |
                                                       ego$vacc_covid_other=="Non l’ho vaccinata perché l’avrei fatta ammalare trattandosi di terapia genica in sperimentazione con una proteina spike che non si sa che danni va a fare. E’vergognoso chiedere ancora perché le persone non si sono vaccinare visto che si bene cosa provoca quel vaccino anti covid." |
                                                       ego$vacc_covid_other=="Non è vaccinata perché ha preso 2 volte il covid"|
                                                       ego$vacc_covid_other=="Ha contratto il virus più volte"|
                                                       ego$vacc_covid_other=="Allergica ai farmaci"|
                                                       ego$vacc_covid_other=="Ho problemi di cuore"|
                                                       ego$vacc_covid_other=="Anemia")] <- 1

ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_other=="1 Dose J&J + guarigione")] <- 5

## step 2.3: Unclear
# Unclear
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_other=="Nd" | ego$vacc_covid_other=="Non ricordo" |
                                                       ego$vacc_covid_other=="Ok" | ego$vacc_covid_other=="Altra vaccinazione" |
                                                       ego$vacc_covid_other=="Non saprei" |ego$vacc_covid_other=="Sono stata obbligata a vaccinarmi contro la mia volontà"|
                                                       ego$vacc_covid_other=="piu covid"|
                                                       ego$vacc_covid_other=="Dopo vaccino, pericardite"|
                                                       ego$vacc_covid_other== "Pfizer")] <- 999

# Explicitly refuses to answer
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) &
                       (stringr::str_detect(ego$vacc_covid_other,"rispond")==T)] <- 999 

# Explicitly refuses to answer - specific sentences
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave)  & ( ego$vacc_covid_other=="NON RICHIESTO" |
                                                         ego$vacc_covid_other=="Non pertinente"|
                                                         ego$vacc_covid_other=="Non vi interessa"|
                                                         ego$vacc_covid_other=="Fatevi i cazzi vostri"|
                                                         ego$vacc_covid_other=="Affari miei"|
                                                         ego$vacc_covid_other=="Saranno pur affari miei")] <- 999

# Selects "other" but then doesn't answer 
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & ego$vacc_covid_2wave==955 & (is.na(ego$vacc_covid_other))] <- 999
ego$vacc_covid_2wave[!is.na(ego$vacc_covid_2wave) & ego$vacc_covid_2wave==955 & ego$vacc_covid_other == ""] <- 999

# NA
ego$vacc_covid_2wave[is.na(ego$vacc_covid2) & ego$wave==2] <- 999

## Aggregating answers after mapping

ego$vacc_covid_1wave_bin <- rep(NA,length(ego$vacc_covid_1wave))
ego$vacc_covid_2wave_bin <- rep(NA,length(ego$vacc_covid_2wave))

ego$vacc_covid_1wave_bin[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_1wave==1 | ego$vacc_covid_1wave==3 )] <- 0
ego$vacc_covid_2wave_bin[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_2wave==1 | ego$vacc_covid_2wave==3 )] <- 0
ego$vacc_covid_1wave_bin[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_1wave==4)] <- 1
ego$vacc_covid_2wave_bin[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_2wave==4)] <- 1
ego$vacc_covid_1wave_bin[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_1wave==5)] <- 2
ego$vacc_covid_2wave_bin[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_2wave==5)] <- 2
ego$vacc_covid_1wave_bin[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_1wave>=6 & ego$vacc_covid_1wave<100)] <- 3
ego$vacc_covid_2wave_bin[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_2wave>=6 & ego$vacc_covid_2wave<100)] <- 3
ego$vacc_covid_1wave_bin[!is.na(ego$vacc_covid_1wave) & (ego$vacc_covid_1wave==2)] <- 4
ego$vacc_covid_2wave_bin[!is.na(ego$vacc_covid_2wave) & (ego$vacc_covid_2wave==2)] <- 4

ego$vacc_covid_bin <- ifelse(ego$wave==2, ego$vacc_covid_2wave_bin, ego$vacc_covid_1wave_bin)

##

ego <- ego  %>% 
  #filter(isolamento==4) %>% 
  filter(!is.na(age)) %>% 
  mutate(age_class = cut(respondent_age,c(-1,9,19,29,39,49,59,69,121))) %>% 
  #mutate(hh_size = cut(profile_household_size_EU,c(0,1,2,3,4,8))) %>% 
  mutate(cronic_comorb_self2 = cut(cronic_comorb_self,c(-1,0,8))) %>% 
  mutate(cronic_comorb_cohab2 = cut(cronic_comorb_cohab,c(-1,0,8))) %>% 
  #mutate(acute_comorb_cohab2 = cut(acute_comorb_cohab,c(-1,0,1,8))) %>% 
  #mutate(acute_comorb_self2 = cut(acute_comorb_self,c(-1,0,1,8))) %>% 
  as_tibble()
#ego <- ego[,-c(46:111)]

### HOUSEHOLOD SIZE ----

colnames(ego)[which(names(ego) == "cohabitants")] <- "hh_size_det"

# ego$hh_size2 <- ego$hh_size
# ids <- ego$EPID[ego$wave==2 & ego$sample_w2==1]
# for(i in 1:length(ids)){
#   if(ids[i] %in% ego$EPID[ego$wave==1]){
#     ego$hh_size2[ego$EPID==ids[i] & ego$wave==2] <- ego$hh_size2[ego$EPID==ids[i] & ego$wave==1]
#     ego$reddito2[ego$EPID==ids[i] & ego$wave==2] <- ego$reddito2[ego$EPID==ids[i] & ego$wave==1]
#   }
# }

ego$hh_size <- ifelse(ego$hh_size_det >= 5, "5+", ego$hh_size_det)

### REDDITO ----
# red_medio <- c(500,1250,1750,2250,2750,3250,3750,4500,6000)
# xx <- ego$reddito
# xx[!is.na(xx) & xx==933] <- NA
# 
# yy <- rep(NA,length(xx))
# yy[!is.na(xx) & xx==1] <- 500
# yy[!is.na(xx) & xx==2] <- 1250
# yy[!is.na(xx) & xx==3] <- 1750
# yy[!is.na(xx) & xx==4] <- 2250
# yy[!is.na(xx) & xx==5] <- 2750
# yy[!is.na(xx) & xx==6] <- 3250
# yy[!is.na(xx) & xx==7] <- 3750
# yy[!is.na(xx) & xx==8] <- 4500
# yy[!is.na(xx) & xx==9] <- 6000
# 
# 
# zz <- yy/(ego$profile_household_size_EU)
# 
# zz_cat <- zz
# zz_cat[zz<500] <- 1
# zz_cat[zz>=500 & zz<1000] <- 2
# zz_cat[zz>=1000 & zz<1500] <- 3
# zz_cat[zz>=1500 & zz<2000] <- 4
# zz_cat[zz>=2000] <- 5
# 
# 
# ego$reddito2 <- zz_cat

## income - 1000€ groups
ego$reddito_1000 <- ego$reddito
ego$reddito_1000 <- ifelse(ego$reddito == "1", "<1000", ego$reddito_1000)
ego$reddito_1000 <- ifelse(ego$reddito %in% c(2,3), "1000-1999", ego$reddito_1000)
ego$reddito_1000 <- ifelse(ego$reddito %in% c(4,5), "2000-2999", ego$reddito_1000)
ego$reddito_1000 <- ifelse(ego$reddito %in% c(6,7), "3000-3999", ego$reddito_1000)
ego$reddito_1000 <- ifelse(ego$reddito == "8", "4000-4999", ego$reddito_1000)
ego$reddito_1000 <- ifelse(ego$reddito == "9", ">5000", ego$reddito_1000)
ego$reddito_1000 <- ifelse(ego$reddito == "933", NA, ego$reddito_1000)

## income - wrt Italian median income by household size

ego$reddito_aux <- ego$reddito
ego$reddito_aux <- ifelse(ego$reddito == 1, 500, ego$reddito_aux)
ego$reddito_aux <- ifelse(ego$reddito == 2, (1000+1499)/2, ego$reddito_aux)
ego$reddito_aux <- ifelse(ego$reddito == 3, (1500+1999)/2, ego$reddito_aux)
ego$reddito_aux <- ifelse(ego$reddito == 4, (2000+2499)/2, ego$reddito_aux)
ego$reddito_aux <- ifelse(ego$reddito == 5, (2500+2999)/2, ego$reddito_aux)
ego$reddito_aux <- ifelse(ego$reddito == 6, (3000+3499)/2, ego$reddito_aux)
ego$reddito_aux <- ifelse(ego$reddito == 7, (3500+3999)/2, ego$reddito_aux)
ego$reddito_aux <- ifelse(ego$reddito == 8, (4000+4999)/2, ego$reddito_aux)
ego$reddito_aux <- ifelse(ego$reddito == 9, 5500, ego$reddito_aux)
ego$reddito_aux <- ifelse(ego$reddito == 933, NA, ego$reddito_aux)

istat_data <- read.csv("reddito_netto_ampiezza_famiglia_ISTAT.csv")
istat_data <- istat_data %>% filter(T_D8 == "REDD_MEDIANO_FAM" & PRAF== "2" &
                                      RDPR == 9) %>% dplyr::select(Territorio, N_COMP, Value)
istat_data$Value <- istat_data$Value/12
istat_data$N_COMP <- ifelse(istat_data$N_COMP == 7, 5, istat_data$N_COMP)
istat_data$Territorio <- ifelse(istat_data$Territorio == "Nord-ovest", 1, 
                                ifelse(istat_data$Territorio == "Nord-est", 2,
                                       ifelse(istat_data$Territorio == "Centro", 3,
                                              ifelse(istat_data$Territorio == "Sud", 4,
                                                     ifelse(istat_data$Territorio == "Isole", 5, NA)))))
istat_data <- istat_data[!is.na(istat_data$Territorio),]
istat_data <- spread(istat_data, Territorio, Value) # rows = N_Comp, columns = territorio
istat_data <- istat_data[,-1]
istat_data <- mutate_all(istat_data, function(x) as.numeric(x))

ego$hh_inc_med_compIT <- 0
for(i in c(1:5)){
  for(j in c(1:5)){
    for(z in c(1:nrow(ego))){
      if(ego$hh_size[z] == i & ego$region_grouped_IT[z]== j){
        if(ego$reddito_aux[z] > istat_data[i,j] & !(is.na(ego$reddito_aux[z]))){
          ego$hh_inc_med_compIT[z] <- 1
        } else if(is.na(ego$reddito_aux[z])){
          ego$hh_inc_med_compIT[z] <- NA
        } else {
          ego$hh_inc_med_compIT[z] <- 0
        } 
      }
    }
  }
}

ego <- ego %>% dplyr::select(-c(reddito_aux))

### OCCUPAZIONE ----
ego$occupazione_new <- ego$occupazione

# primo map
ego$occupazione_new[ego$occupazione == 1 | ego$occupazione == 2 | ego$occupazione == 3] <- 1 #working, full and part time
ego$occupazione_new[ego$occupazione == 4] <- 2 # home/family
ego$occupazione_new[ego$occupazione == 5 | ego$occupazione == 6 | ego$occupazione == 7] <- 3 #student, full o part time
ego$occupazione_new[ego$occupazione == 8] <- 4 #retired
ego$occupazione_new[ego$occupazione == 9 | ego$occupazione == 10 | ego$occupazione == 11] <- 5 #inactive (disoccupato, malattia, altre motivazioni)

# Mapping errors from "occupazione_categoria_other" 
ego$occupazione_new[stringr::str_detect(ego$occupazione_categoria_other,"ension")==T] <- 4


# Building other outcomes counting contacts according to some features
# NAs from c_chiuso compared with c_location are assigned to otherindoor and otheroutdoor
contacts$chiuso[contacts$c_location=="otherindoor"] <- 1
contacts$chiuso[contacts$c_location=="otheroutdoor"] <- 2
contacts$c_chiuso=NULL

# c_mascherina
#1= <139>	Nessuno dei due fixed
#2= <140>	Solo io
#3= <141>	Solo questa persona
#4= <142>	Entrambi

#chiuso
#1= <135>	Al chiuso
#2= <136>	All’aperto


##------------------- added 17th Nov 2023 Chiara & Ele --------------------####

# lista di variabili c_indoor_nocohab: 
# = NA if contatto con cohab
# = 0 if outdoor (con nocohab)
# = 1 if indoor(con nocohab)
contacts$c_indoor_nocohab <- ifelse(contacts$chiuso==1 & contacts$location != "conviventi",1,
                                    ifelse(contacts$chiuso==2 & contacts$location != "conviventi",0,
                                           NA)
                                    )

# = NA  if contatto con cohab o outdoor
# = 0 if respondent indoor senza mascherina o mascherina NA (con nocohab)
# = 1 if respondent indoor con mascherina (con nocohab)
contacts$c_indoor_mask_nocohab = ifelse(contacts$c_indoor_nocohab==1 & !is.na(contacts$c_mascherina) & (contacts$c_mascherina==2 | contacts$c_mascherina==4),1,
                                        ifelse(contacts$c_indoor_nocohab==1 & (is.na(contacts$c_mascherina)| contacts$c_mascherina==1 | contacts$c_mascherina==3),0,
                                               NA)
                                        )

# = NA  if contatto con cohab o outdoor
# = 0 if respondent indoor con mascherina (con nocohab)
# = 1 if respondent indoor senza mascherina o mascherina NA (con nocohab)
contacts$c_indoor_nomask_nocohab = ifelse(contacts$c_indoor_mask_nocohab==1,0,
                                      ifelse(contacts$c_indoor_mask_nocohab==0,1,
                                             NA)
                                      )
table(contacts$chiuso, contacts$c_indoor_nocohab , useNA = "always")
table(contacts$c_mascherina, contacts$c_indoor_mask_nocohab , useNA = "always")
table(contacts$c_indoor_nocohab, contacts$c_indoor_mask_nocohab, useNA = "always")

#exclude contacts on public transport both from total indoor and from masked
contacts$c_indoor_nocohab_notransport = ifelse(contacts$c_location=="transport", NA, contacts$c_indoor_nocohab)
contacts$c_indoor_mask_nocohab_notransport = ifelse(contacts$c_location=="transport", NA, contacts$c_indoor_mask_nocohab)


#---- repeat for outdoor setting ----
# = NA if contatto con cohab
# = 0 if indoor(con nocohab) 
# = 1 if outdoor (con nocohab)
contacts$c_outdoor_nocohab <- ifelse(contacts$chiuso==2 & contacts$location != "conviventi",1,
                                    ifelse(contacts$chiuso==1 & contacts$location != "conviventi",0,
                                           NA)
                                    )

# = NA  if contatto con cohab o indoor
# = 0 if respondent outdoor senza mascherina o mascherina NA (con nocohab)
# = 1 if respondent outdoor con mascherina (con nocohab)
contacts$c_outdoor_mask_nocohab = ifelse(contacts$c_outdoor_nocohab==1 & !is.na(contacts$c_mascherina) & (contacts$c_mascherina==2 | contacts$c_mascherina==4),1,
                                        ifelse(contacts$c_outdoor_nocohab==1 & (is.na(contacts$c_mascherina)| contacts$c_mascherina==1 | contacts$c_mascherina==3),0,
                                               NA)
                                        )

# = NA  if contatto con cohab o indoor
# = 0 if respondent outdoor con mascherina (con nocohab)
# = 1 if respondent outdoor senza mascherina o mascherina NA (con nocohab)
contacts$c_outdoor_nomask_nocohab = ifelse(contacts$c_outdoor_mask_nocohab==1,0,
                                          ifelse(contacts$c_outdoor_mask_nocohab==0,1,
                                                 NA)
                                          )



##------------------------ defined by Fili July 2023 -----------------------------------------####

# = NA if mascherina=NA regardless if indoor or outdoor
# = 1 if outdoor & respondent masked
# = 0 if any indoor + outdoor unmasked 
contacts$c_out_mask <- ifelse(contacts$chiuso==2 & (!is.na(contacts$c_mascherina) & (contacts$c_mascherina==2 | contacts$c_mascherina==4)),1,
                              ifelse(is.na(contacts$c_mascherina),NA,0))

# = NA if mascherina=NA regardless if indoor or outdoor
# = 1 if indoor & respondent unmasked
# = 0 if any outoor + indoor masked 
contacts$c_in_nomask <- ifelse(contacts$chiuso==1 & (!is.na(contacts$c_mascherina) & (contacts$c_mascherina==1 | contacts$c_mascherina==3)),1,
                               ifelse(is.na(contacts$c_mascherina),NA,0))

# = NA if contatto con cohab + mascherina NA 
# = 0 if outdoor con nocohab o indoor con nocohab con mascherina
# = 1 if indoor con nocohab respondent senza mascherina
contacts$c_risky <- ifelse(contacts$chiuso==1 & (!is.na(contacts$c_mascherina) & (contacts$c_mascherina==1 | contacts$c_mascherina==3)
                                                 & contacts$location != "conviventi"),1,
                           ifelse((is.na(contacts$c_mascherina)|contacts$location=="conviventi"),NA,0))

##------------------------------ added 25th November 2023 - Ele -------------------------------##

# definisco outdoor cohabitants contacts per la definizione di total_contacts_nohh_prol
# perchè i contatti indoor con cohab vengono tolti già con la sottrazione di total_contacts_indoor
# quindi mi serve solo escludere i contatti con cohabitants esterni

contacts$c_outdoor_cohab <- ifelse(contacts$chiuso==2 & contacts$location == "conviventi",1,0)

##------------------------------ aggregate counts by caseid -----------------------------------####

library(dplyr)
ncontacts = contacts %>%
  group_by(caseid) %>%
  summarise(indoor_nocohab = sum(c_indoor_nocohab,na.rm = T),
            indoor_mask_nocohab = sum(c_indoor_mask_nocohab,na.rm = T),
            indoor_nomask_nocohab = sum(c_indoor_nomask_nocohab,na.rm = T),
            indoor_nocohab_notransp = sum(c_indoor_nocohab_notransport,na.rm = T),
            indoor_mask_nocohab_notransp = sum(c_indoor_mask_nocohab_notransport,na.rm = T),
            outdoor_nocohab = sum(c_outdoor_nocohab,na.rm = T),
            outdoor_mask_nocohab = sum(c_outdoor_mask_nocohab,na.rm = T),
            outdoor_nomask_nocohab = sum(c_outdoor_nomask_nocohab,na.rm = T),
            masked_out = sum(c_out_mask,na.rm = T),
            notmasked_in = sum(c_in_nomask,na.rm = T),
            risky = sum(c_risky,na.rm = T),
            outdoor_cohab = sum(c_outdoor_cohab, na.rm = T)
  )

ncontacts = data.frame(ncontacts, masked=aggregate(contacts$c_mascherina,
                                                   by = list(contacts$caseid),
                                                   FUN = function(x) sum(!is.na(x) & x==4))$x) 

ncontacts = data.frame(ncontacts,outdoor=aggregate(contacts$chiuso,
                                                   by = list(contacts$caseid),
                                                   FUN = function(x) sum(!is.na(x) & x==2))$x)

ncontacts = data.frame(ncontacts,some_masked=aggregate(contacts$c_mascherina,
                                                       by = list(contacts$caseid),
                                                       FUN = function(x) sum(!is.na(x) & x>1))$x)

ncontacts = data.frame(ncontacts,work=aggregate(contacts$c_location,
                                                by = list(contacts$caseid),
                                                FUN = function(x) sum(!is.na(x) & x=="work"))$x)

ncontacts = data.frame(ncontacts,home=aggregate(contacts$c_location,
                                                by = list(contacts$caseid),
                                                FUN = function(x) sum(!is.na(x) & (x=="conviventi"|x=="home")))$x)

ncontacts = data.frame(ncontacts,community=aggregate(contacts$c_location,
                                                     by = list(contacts$caseid),
                                                     FUN = function(x) sum(!is.na(x) & x!="conviventi" & x!="home"))$x)

ncontacts = data.frame(ncontacts,leisure=aggregate(contacts$c_location,
                                                   by = list(contacts$caseid),
                                                   FUN = function(x) sum(!is.na(x) & (x=="homeguest"|x=="leisure"|x=="restaurant")))$x)

ncontacts = data.frame(ncontacts,transport=aggregate(contacts$c_location,
                                                     by = list(contacts$caseid),
                                                     FUN = function(x) sum(!is.na(x) & (x=="transport")))$x)

ncontacts = data.frame(ncontacts,school=aggregate(contacts$c_location,
                                                  by = list(contacts$caseid),
                                                  FUN = function(x) sum(!is.na(x) & (x=="school")))$x)

ncontacts = data.frame(ncontacts,other=aggregate(contacts$c_location,
                                                 by = list(contacts$caseid),
                                                 FUN = function(x) sum(!is.na(x) & (x=="otherindoor"|x=="otheroutdoor"|x=="shopping")))$x)
ncontacts = data.frame(ncontacts,physical=aggregate(contacts$c_fisico,
                                                    by = list(contacts$caseid),
                                                    FUN = function(x) sum(!is.na(x) & x==1))$x)

ncontacts = data.frame(ncontacts,non_physical=aggregate(contacts$c_fisico,
                                                        by = list(contacts$caseid),
                                                        FUN = function(x) sum(!is.na(x) & x==2))$x)


ego <- left_join(ego,ncontacts,by="caseid")


# settings  <- ego[,(ncol(ego)-13):(ncol(ego)-1)]
# settings[is.na(settings)] <- 0
# 
# ego[,(ncol(ego)-13):(ncol(ego)-1)] <- settings


### DATES ----

ego$weekend <- ifelse(wday(ego$start_date_module2A, week_start = 7)<=2,"yes","no")  # hanno iniziato a compilare il modulo dei contatti dom o lun (contatti avuti sab o dom)
ego$sunday <- ifelse(wday(ego$start_date_module2A, week_start = 7)==2,"yes","no")  # hanno iniziato a compilare il modulo dei contatti lun

#DATA COVID
gg <- rep(NA,nrow(ego))
mm <- rep(NA,nrow(ego))
yy <- rep(NA,nrow(ego))

for(ii in 1:nrow(ego)){
  gg[ii] <- as.numeric(strsplit(ego$covid_data," ")[[ii]][2])
  mm[ii] <- strsplit(ego$covid_data," ")[[ii]][3]
  yy[ii] <- as.numeric(strsplit(ego$covid_data," ")[[ii]][4])
  #if(!is.na(substr(gg[ii],2,2)) & substr(gg[ii],2,2)==" ") gg[ii] <- substr(gg[ii],1,1)
}

mm2 <- mm
mm2[!is.na(mm) & stringr::str_detect(mm,"Genna")==T] <- "01"
mm2[!is.na(mm) & stringr::str_detect(mm,"Febbr")==T] <- "02"
mm2[!is.na(mm) & stringr::str_detect(mm,"Marz")==T] <- "03"
mm2[!is.na(mm) & stringr::str_detect(mm,"Apr")==T] <- "04"
mm2[!is.na(mm) & stringr::str_detect(mm,"Magg")==T] <- "05"
mm2[!is.na(mm) & stringr::str_detect(mm,"Giugn")==T] <- "06"
mm2[!is.na(mm) & stringr::str_detect(mm,"Lugl")==T] <- "07"
mm2[!is.na(mm) & stringr::str_detect(mm,"Agos")==T] <- "08"
mm2[!is.na(mm) & stringr::str_detect(mm,"Sett")==T] <- "09"
mm2[!is.na(mm) & stringr::str_detect(mm,"Ott")==T] <- "10"
mm2[!is.na(mm) & stringr::str_detect(mm,"Nov")==T] <- "11"
mm2[!is.na(mm) & stringr::str_detect(mm,"Dic")==T] <- "12"

gg2 <- gg
gg2[!is.na(gg) & nchar(gg)==1] <- paste0("0",gg2[!is.na(gg) & nchar(gg)==1])

ego$covid_data2 <- (paste0(yy,"-",mm2,"-",gg2))
ego$covid_data2[ego$covid_data2=="NA-NA-NA"] <- NA
ego$covid_data2 <- as.Date(ego$covid_data2)



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




      
           0     1  <NA>
  1        0 14984  8587
  2     3813     0   283
  <NA>     0     0     0

      
           0     1  <NA>
  1     9336     0 10300
  2        0   729   386
  3      564     0   293
  4        0  4255  1526
  <NA>   100     0   178

      
           0     1  <NA>
  0        0     0  3813
  1    10000  4984     0
  <NA>     0     0  8870

“NAs introduced by coercion”
“NAs introduced by coercion”


## Renamig columns to english

In [2]:
### TRANSLATING/ADJUSTING VARIABLE NAMES ----
## clean_respondent_info
colnames(ego)[which(names(ego) == "occupazione")] <- "occupation"
colnames(ego)[which(names(ego) == "occupazione_05")] <- "kindergarten_05"
colnames(ego)[which(names(ego) == "occupazione_categoria")] <- "occupation_category"
colnames(ego)[which(names(ego) == "occupazione_categoria_other")] <- "occupation_category_other"
colnames(ego)[which(names(ego) == "occupazione_new")] <- "occupation_agg"
colnames(ego)[which(names(ego) == "CONVIVENTI_excl_parentANDunderage")] <- "COHABITANTS_excl_parentANDunderage"
colnames(ego)[which(names(ego) == "covid_data")] <- "covid_date_raw"
colnames(ego)[which(names(ego) == "covid_data2")] <- "covid_date"
colnames(ego)[which(names(ego) == "covid_ospedale")] <- "covid_hospital"
colnames(ego)[which(names(ego) == "isolamento")] <- "isolation"
colnames(ego)[which(names(ego) == "presenza_work")] <- "presence_work"
colnames(ego)[which(names(ego) == "presenza_school")] <- "presence_school"
colnames(ego)[which(names(ego) == "compilazione_contatti")] <- "filling_contacts"
colnames(ego)[which(names(ego) == "compilazione_contatti_first_clicked")] <- "filling_contacts_first_clicked"
colnames(ego)[which(names(ego) == "vacc_altri")] <- "other_vacc"
colnames(ego)[which(names(ego) == "rifiuto")] <- "refusal"
colnames(ego)[which(names(ego) == "vaccino")] <- "d_vacc"
colnames(ego)[which(names(ego) == "reddito_percepito")] <- "perceived_income"
colnames(ego) <- stringr::str_replace(names(ego),"reddito", "income")
colnames(ego) <- stringr::str_replace(names(ego),"patologie_croniche", "chronic_comorb")
colnames(ego) <- stringr::str_replace(names(ego),"patologie_acute", "acute_comorb")
colnames(ego) <- stringr::str_replace(names(ego),"conviventi", "cohabitants")
colnames(ego) <- stringr::str_replace(names(ego),"coviventi", "change_cohab")
colnames(ego) <- stringr::str_replace(names(ego),"cronic", "chronic")
colnames(ego)[which(names(ego) == "vaccino_Na")] <- "vacc_Na"

colnames(contacts) <- stringr::str_replace(names(contacts),"chiuso", "indoor")
colnames(contacts)[which(names(contacts) == "c_fisico")] <- "c_physical"
colnames(contacts)[which(names(contacts) == "c_mascherina")] <- "c_masks"
colnames(contacts)[which(names(contacts) == "c_reddito_percepito")] <- "c_perceived_income"
colnames(contacts)[which(names(contacts) == "c_relazione")] <- "c_relationship"
colnames(contacts)[which(names(contacts) == "c_frequenza")] <- "c_frequency"
colnames(contacts)[which(names(contacts) == "c_distanza")] <- "c_distance"
colnames(contacts)[which(names(contacts) == "c_distanza")] <- "c_distance"
colnames(contacts)[which(names(contacts) == "c_eta")] <- "c_age"
colnames(contacts)[which(names(contacts) == "mask")] <- "masked"
colnames(contacts)[which(names(contacts) == "c_mask")] <- "c_masked"
colnames(contacts)[which(names(contacts) == "vaccino")] <- "d_vacc"
colnames(contacts) <- stringr::str_replace(names(contacts),"reddito", "income")
colnames(contacts)[which(names(contacts) == "income_percepito")] <- "perceived_income"
colnames(contacts) <- stringr::str_replace(names(contacts),"patologie_croniche", "chronic_comorb")
colnames(contacts) <- stringr::str_replace(names(contacts),"patologie_acute", "acute_comorb")
colnames(contacts) <- stringr::str_replace(names(contacts),"cronic", "chronic")
colnames(contacts) <- stringr::str_replace(names(contacts),"vaccino", "vaccine")

### Exporting processed dataset ----

write.csv(ego,"../data_in/clean_respondent_info_proc.csv",row.names = F)
write.csv(contacts,"../data_in/clean_contacts_proc.csv",row.names = F)
