-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCapstone Modeling.Rmd
116 lines (91 loc) · 3.17 KB
/
Capstone Modeling.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
---
title: "Capstone Modeling"
author: "Qiaojuan Tu"
date: "11/3/2021"
output: pdf_document
---
```{r}
#clean the environment
rm(list = ls())
#Load packages
library(data.table)
library(Metrics)
library(readxl)
library(ggplot2)
```
```{r}
#Load Data
Stimulus_by_states <- read_excel("Checks_Amount.xlsx")
CPI <- read_excel("CPIAUCSL.xlsx") #national CPI
CPI <- CPI[-c(1:3),]
master <- merge(CPI, Stimulus_by_states)
CPI_by_States <- read_excel("CPI_by_Region.xlsx")
CPI_by_States <- CPI_by_States[-c(1:3, 19:21, 37:39, 55:57),]
```
```{r}
#manipulate the data
South <- subset(Stimulus_by_states, Region == "South")
West <- subset(Stimulus_by_states, Region == "West")
Midwest <- subset(Stimulus_by_states, Region == "Midwest")
Northeast <- subset(Stimulus_by_states, Region == "Northeast")
South_CPI <- subset(CPI_by_States, Region == "South")
West_CPI <- subset(CPI_by_States, Region == "West")
Midwest_CPI <- subset(CPI_by_States, Region == "Midwest")
Northeast_CPI <- subset(CPI_by_States, Region == "Northeast")
#For South Region, total of 17 states
sum(South$Total_r1)/17
sum(South$Total_r2)/17
sum(South$Total_r3)/17
South_CPI_R1 <- South_CPI[1:9,]
South_CPI_R1$Stimulus <- sum(South$Total_r1)/17
South_CPI_R2 <- South_CPI[10:11,]
South_CPI_R2$Stimulus <- sum(South$Total_r2)/17
South_CPI_R3 <- South_CPI[12:15,]
South_CPI_R3$Stimulus <- sum(South$Total_r3)/17
South_Final <- rbind(South_CPI_R1, South_CPI_R2, South_CPI_R3)
#For West Region, total of 13 states
sum(West$Total_r1)/13
sum(West$Total_r2)/13
sum(West$Total_r3)/13
West_CPI_R1 <- West_CPI[1:9,]
West_CPI_R1$Stimulus <- sum(West$Total_r1)/13
West_CPI_R2 <- West_CPI[10:11,]
West_CPI_R2$Stimulus <- sum(West$Total_r2)/13
West_CPI_R3 <- West_CPI[12:15,]
West_CPI_R3$Stimulus <- sum(West$Total_r3)/13
West_Final <- rbind(West_CPI_R1, West_CPI_R2, West_CPI_R3)
#For Midwest Region
sum(Midwest$Total_r1)/12
sum(Midwest$Total_r2)/12
sum(Midwest$Total_r3)/12
Midwest_CPI_R1 <- Midwest_CPI[1:9,]
Midwest_CPI_R1$Stimulus <- sum(Midwest$Total_r1)/12
Midwest_CPI_R2 <- Midwest_CPI[10:11,]
Midwest_CPI_R2$Stimulus <- sum(Midwest$Total_r2)/12
Midwest_CPI_R3 <- Midwest_CPI[12:15,]
Midwest_CPI_R3$Stimulus <- sum(Midwest$Total_r3)/12
Midwest_Final <- rbind(Midwest_CPI_R1, Midwest_CPI_R2, Midwest_CPI_R3)
#For Northeast Region
sum(Northeast$Total_r1)/9
sum(Northeast$Total_r2)/9
sum(Northeast$Total_r3)/9
Northeast_CPI_R1 <- Northeast_CPI[1:9,]
Northeast_CPI_R1$Stimulus <- sum(Northeast$Total_r1)/9
Northeast_CPI_R2 <- Northeast_CPI[10:11,]
Northeast_CPI_R2$Stimulus <- sum(Northeast$Total_r2)/9
Northeast_CPI_R3 <- Northeast_CPI[12:15,]
Northeast_CPI_R3$Stimulus <- sum(Northeast$Total_r3)/9
Northeast_Final <- rbind(Northeast_CPI_R1, Northeast_CPI_R2, Northeast_CPI_R3)
#Put together
Master <- rbind(South_Final, West_Final, Midwest_Final, Northeast_Final)
View(Master)
fwrite(Master, "./Master.csv")
```
```{r}
#create the linear model
fit <- glm(CPI~Stimulus + Region, data = Master, family = "gaussian")
plot(fit)
#create the logistic regression
fit2 <- glm(CPI~ Stimulus + Date, data = Master, family = "gaussian")
plot(fit2)
```