Load required packages

In [None]:
library(tidyverse)
library(haven)
library(estimatr)
library(stats)

Load data from github and create a subset near the cutpoint

In [None]:
read_data <- function(df)
{
  full_path <- paste("https://raw.github.com/scunning1975/mixtape/master/", 
                     df, sep = "")
  df <- read_dta(full_path)
  return(df)
}

lmb_data <- read_data("lmb-data.dta")

# reduce the dataset and only focus on vote shares larger 48% and lower 52%
lmb_subset <- lmb_data %>% 
  filter(lagdemvoteshare>. & lagdemvoteshare<.)

ADD tabluar analyses here !!! (see Mixtape code snippets 2-6)

In RDD contexts, nonparametric methods do not assume a functional form for the relationship between the outcome variable (Y) and the running variable (X).
The model would be something like this: Y = f(X) + ε

In [None]:
#aggregating the data
categories <- lmb_data$lagdemvoteshare

demmeans <- split(lmb_data$score, cut(lmb_data$lagdemvoteshare, 100)) %>% 
  lapply(mean) %>% 
  unlist()

agg_lmb_data <- data.frame(score = demmeans, lagdemvoteshare = seq(0.01,1, by = 0.01))

#plotting
lmb_data <- lmb_data %>% 
  mutate(gg_group = case_when(lagdemvoteshare > 0.5 ~ 1, TRUE ~ 0))

ggplot(lmb_data, aes(lagdemvoteshare, score)) +
  geom_point(aes(x = lagdemvoteshare, y = score), data = agg_lmb_data) +
  stat_smooth(aes(lagdemvoteshare, score, group = gg_group), method = "loess") +
  xlim(0,1) + ylim(0,100) +
  geom_vline(xintercept = 0.5)

ggplot(lmb_data, aes(lagdemvoteshare, score)) +
  geom_point(aes(x = lagdemvoteshare, y = score), data = agg_lmb_data) +
  stat_smooth(aes(lagdemvoteshare, score, group = gg_group), method = "lm") +
  xlim(0,1) + ylim(0,100) +
  geom_vline(xintercept = 0.5)

McCrary density test (local polynomial density estimation) to check continuity assumption
Do you see a sign that there was manipulation in the running variable at the cutoff?

In [None]:
# set the cutoff correctly
DCdensity(lmb_data$demvoteshare, cutpoint = ) # insert the cutoff here (format: 0.x)

density <- rddensity(lmb_data$demvoteshare, c = ) # insert the cutoff here (format: 0.x)
rdplotdensity(density, lmb_data$demvoteshare)

Local linear nonparametric regressions (give more importance to observations closest to the center)
This method will be sensitive to the size of the bandwidth chosen

In [None]:
# set a suitable bandwidth
smooth_dem0 <- lmb_data %>% 
  filter(democrat == 0) %>% 
  select(score, demvoteshare)
smooth_dem0 <- as_tibble(ksmooth(smooth_dem0$demvoteshare, smooth_dem0$score, 
                                 kernel = "box", bandwidth = )) # insert the bandwith here (format: 0.x)


smooth_dem1 <- lmb_data %>% 
  filter(democrat == 1) %>% 
  select(score, demvoteshare) %>% 
  na.omit()
smooth_dem1 <- as_tibble(ksmooth(smooth_dem1$demvoteshare, smooth_dem1$score, 
                                 kernel = "box", bandwidth = )) # insert the bandwith here (format: 0.x)

ggplot() + 
  geom_smooth(aes(x, y), data = smooth_dem0) +
  geom_smooth(aes(x, y), data = smooth_dem1) +
  geom_vline(xintercept = 0.5)