In [None]:
library(MASS)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ascii)


In [None]:
# Had to install this from the console
# install.packages("lubridate",dependencies="TRUE")

In [None]:
library(splines)


In [None]:
library(mgcv)

In [None]:
install.packages("lubridate")

In [None]:
library(lubridate)

In [None]:
PSDS_PATH <- file.path('~')

In [None]:
lung <- read.csv(file.path(PSDS_PATH, 'LungDisease.csv'))

zhvi <- read.csv(file.path(PSDS_PATH, 'County_Zhvi_AllHomes.csv'))
zhvi <- unlist(zhvi[13,-(1:5)])

In [None]:
dates <- parse_date_time(paste(substr(names(zhvi), start=2, stop=8), "01", sep="."), "Ymd")
zhvi <- data.frame(ym=dates, zhvi_px=zhvi, row.names = NULL) %>%
  mutate(zhvi_idx=zhvi/last(zhvi))

house <- read.csv(file.path(PSDS_PATH, 'house_sales.csv'), sep='\t')

In [None]:
house

In [None]:
plot(lung$Exposure, lung$PEFR, xlab="Exposure", ylab="PEFR")
dev.off()

In [None]:
model <- lm(PEFR ~ Exposure, data=lung)
model

In [None]:
plot(lung$Exposure, lung$PEFR, xlab="Exposure", ylab="PEFR", ylim=c(300,450), type="n", xaxs="i")
abline(a=model$coefficients[1], b=model$coefficients[2], col="blue", lwd=2)
text(x=.3, y=model$coefficients[1], labels=expression("b"[0]),  adj=0, cex=1.5)
x <- c(7.5, 17.5)
y <- predict(model, newdata=data.frame(Exposure=x))
segments(x[1], y[2], x[2], y[2] , col="red", lwd=2, lty=2)
segments(x[1], y[1], x[1], y[2] , col="red", lwd=2, lty=2)
text(x[1], mean(y), labels=expression(Delta~Y), pos=2, cex=1.5)
text(mean(x), y[2], labels=expression(Delta~X), pos=1, cex=1.5)
text(mean(x), 400, labels=expression(b[1] == frac(Delta ~ Y, Delta ~ X)), cex=1.5)
dev.off()

In [None]:
fitted = predict(model)

In [None]:
fitted

In [None]:
lung

In [None]:
# set plot parameters (margin)
par(mar=c(4,4,0,0)+.1)

#group by Exposure showing max and min PEFR foreach exposure level. Then show a column with the 'fitted' value (from the model) for PEFR
lung1 <- lung %>%
  mutate(Fitted=fitted,
         positive = PEFR>Fitted) %>%
  group_by(Exposure, positive) %>%
  summarize(PEFR_max = max(PEFR), 
            PEFR_min = min(PEFR),
            Fitted = first(Fitted)) %>%
  ungroup() %>%
  mutate(PEFR = ifelse(positive, PEFR_max, PEFR_min)) %>%
  arrange(Exposure)

plot(lung$Exposure, lung$PEFR, xlab="Exposure", ylab="PEFR")
abline(a=model$coefficients[1], b=model$coefficients[2], col="blue", lwd=2)
segments(lung1$Exposure, lung1$PEFR, lung1$Exposure, lung1$Fitted, col="red", lty=3)

In [None]:
lung1

In [None]:
rsid = residuals(model)

In [None]:
# residuals, how far off were data points from their predicted values.
rsid