Load required packages

In [None]:
library(tidyverse)
library(haven)
library(estimatr)
library(texreg)
library(latex2exp)
library(stats)
library(rdrobust)
library(rddensity)
library(rdd)

Load data from github and create a subset near the cutpoint

In [None]:
read_data <- function(df)
{
  full_path <- paste("https://raw.github.com/scunning1975/mixtape/master/", 
                     df, sep = "")
  df <- read_dta(full_path)
  return(df)
}

lmb_data <- read_data("lmb-data.dta")

# reduce the dataset and only focus on vote shares larger 48% and lower 52%
lmb_subset <- lmb_data %>% 
  filter(lagdemvoteshare>0. & lagdemvoteshare<0.) # vote share bounds here (format: 0.xx)

#aggregating the data
# calculate mean value for every 0.01 voteshare
demmeans <- split(lmb_data$democrat, cut(lmb_data$lagdemvoteshare, 100)) %>%
  lapply(mean) %>%
  unlist()
#creating new data frame for plotting
agg_lmb_data <- data.frame(democrat = demmeans, lagdemvoteshare = seq(0.01,1, by = 0.01)) 

Main variables of interest

In [None]:
# put Moritz main_variables <- snippet here

First, we analyze the effect of the relationship between the ADA score (how liberal is the vote) and the vote share of the Democratic candidate, the so-called party affiliation effect.

To do so, we use a local linear nonparametric regression, which gives more importance to observations close to the cutpoint. Please note, that this method will be sensitive to the size of the bandwidth chosen.

In [None]:
# set a suitable bandwidth
smooth_dem0 <- lmb_data %>% 
  filter(democrat == 0) %>% 
  select(score, demvoteshare)
smooth_dem0 <- as_tibble(ksmooth(smooth_dem0$demvoteshare, smooth_dem0$score, 
                                 kernel = "box", bandwidth = )) # insert the bandwith here (format: 0.x)


smooth_dem1 <- lmb_data %>% 
  filter(democrat == 1) %>% 
  select(score, demvoteshare) %>% 
  na.omit()
smooth_dem1 <- as_tibble(ksmooth(smooth_dem1$demvoteshare, smooth_dem1$score, 
                                 kernel = "box", bandwidth = )) # insert the bandwith here (format: 0.x)

ggplot() + 
  geom_smooth(aes(x, y), data = smooth_dem0) +
  geom_smooth(aes(x, y), data = smooth_dem1) +
  geom_vline(xintercept = 0.5) +
  xlab("Democrat Vote Share, time t") +
  ylab("ADA score, time t") 

Next, we want to see, if we can find proof of an incumbent effect. This means, does a candidate has a higher chance to be re-elected in the next election period when she has won the previous election.

To do so, we use a quadratic formulation to describe the relationship between the probability of winning the next election and the Democrat vote share of the most recent election.

In [None]:
lmb_data <- lmb_data %>%
  mutate(gg_group = if_else(lagdemvoteshare > 0.5, 1,0))
#plotting
gg_srd <- ggplot(data=lmb_data, aes(lagdemvoteshare, democrat)) +
  geom_point(aes(x = lagdemvoteshare, y = democrat), data = agg_lmb_data)  +
  xlim(0,1) + ylim(-0.1,1.1) +
  geom_vline(xintercept = 0.5) +
  xlab("Democrat Vote Share, time t") +
  ylab("Probability of Democrat Win, time t+1") +
  scale_y_continuous(breaks=seq(0,1,0.2)) +
  ggtitle(TeX("Effect of Initial Win on Winning Next Election: $\\P^D_{t+1} - P^R_{t+1}$"))
gg_srd + stat_smooth(aes(lagdemvoteshare, democrat, group = gg_group),
                     method = "lm", formula = y ~ x + I(x^)) # set the order of the polynomial regression (I(x^o))

TODO: ADD DESCRIPTION !!!!
In RDD contexts, nonparametric methods do not assume a functional form for the relationship between the outcome variable (Y) and the running variable (X).
The model would be something like this: Y = f(X) + ε

we plot the relationship between the candidates second-period ADA score and the the running variable, Democratic vote share. The discontinuty gap is defined as the combination of the 'Affect' and 'Elect' Term:
γ= π_0 (P_(t+1)^(*D)- P_(t+1)^(*R) )+π_1 〖(P〗_(t+1)^(*D)  - P_(t+1)^(*R))

In [None]:
#aggregating the data
categories <- lmb_data$lagdemvoteshare

demmeans <- split(lmb_data$score, cut(lmb_data$lagdemvoteshare, 100)) %>% 
  lapply(mean) %>% 
  unlist()

agg_lmb_data <- data.frame(score = demmeans, lagdemvoteshare = seq(0.01,1, by = 0.01))

#plotting
lmb_data <- lmb_data %>% 
  mutate(gg_group = case_when(lagdemvoteshare > 0.5 ~ 1, TRUE ~ 0))


 ggplot(lmb_data, aes(lagdemvoteshare, score)) +
  xlab("Democrat Vote Share, time t") +
  ylab("ADA Score, time t+1") +
  geom_point(aes(x = lagdemvoteshare, y = score), data = agg_lmb_data) +
  stat_smooth(aes(lagdemvoteshare, score, group = gg_group), method = "lm") +
  xlim(0,1) + ylim(0,100) +
  geom_vline(xintercept = 0.5)

ggplot(lmb_data, aes(lagdemvoteshare, score)) +
  xlab("Democrat Vote Share, time t") +
  ylab("ADA Score, time t+1") +
  geom_point(aes(x = lagdemvoteshare, y = score), data = agg_lmb_data) +
  stat_smooth(aes(lagdemvoteshare, score, group = gg_group), method = "lm",
              # formula = [...] what needs to be added here, to make the regression more smooth? Hint: order of polynomial reg.
             ) + 
  xlim(0,1) + ylim(0,100) +
  geom_vline(xintercept = 0.5)

TODO: Moritz
ADD tabluar analyses here !!! (if needed see Mixtape code snippets 2-6)

In [None]:
##Replication of Results of Lee, Moretti, and Butler (2004)
# Restrict data containg the Democrat vote share between 48 percent and 52 percent
# `lagdemvoteshare` is the Dem. voteshare of the t-1 period
lmb_subset <- lmb_data %>%
  filter(lagdemvoteshare>.48 & lagdemvoteshare<.52)
# E[ADA_{t+1}|D_t] = \gamma
lm_1 <- lm_robust(score ~ lagdemocrat, data = lmb_subset, se_type = "HC1")
# E[ADA_{t}|D_t] = \pi_1
lm_2 <- lm_robust(score ~ democrat, data = lmb_subset, se_type = "HC1")
# E[D_{t+1}|D_t] = P_{t+1}^D - P_{t+1}^R
lm_3 <- lm_robust(democrat ~ lagdemocrat, data = lmb_subset, se_type = "HC1")
screenreg(l = list(lm_1, lm_2,lm_3),
          digits = 2,
          # caption = 'title',
          custom.model.names = c("ADA_t+1", "ADA_t", "DEM_t+1"),
          include.ci = F,
          include.rsquared = FALSE, include.adjrs = FALSE, include.nobs = T,
          include.pvalues = FALSE, include.df = FALSE, include.rmse = FALSE,
          custom.coef.map = list("lagdemocrat"="lagdemocrat","democrat"="democrat"),
          # select coefficients to report
          stars = numeric(0))

McCrary density test (local polynomial density estimation) to check continuity assumption
Do you see a sign that there was manipulation in the running variable at the cutoff?

In [None]:
# set the cutoff correctly
DCdensity(lmb_data$demvoteshare, cutpoint = ) # insert the cutoff here (format: 0.x)

density <- rddensity(lmb_data$demvoteshare, c = ) # insert the cutoff here (format: 0.x)
rdplotdensity(density, lmb_data$demvoteshare)