/
Section13.R
131 lines (78 loc) · 4.16 KB
/
Section13.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Section 13-Choosing Regressors
require(Rlab)
# First we will make are dataset for the class.
Midterm.Grade=runif(1100,50,100)
Paper.Grade.1=Midterm.Grade+rnorm(1100,0,5)
Paper.Grade.2=mean(Midterm.Grade+Paper.Grade.1)+rnorm(1100,0,5)
Participation.Grade=mean(Paper.Grade.1+Paper.Grade.1)+rnorm(1100,0,10)
Homework.Average=mean(Paper.Grade.2+Participation.Grade)+rnorm(1100,0,10)
Gender=rbern(1100,0.5)
Attendance=rbeta(1100,12,2)*Midterm.Grade/100
Year.Of.College=sample(1:4,1100,replace=TRUE)
Presentation=rbern(1100,0.2*Midterm.Grade/100)
Extra.Credit=rbern(1100,Midterm.Grade/100)
Appearance.in.Office.Hours=rbern(1100,0.4*Midterm.Grade/100)
Poli.Sci.Major=rbern(1100,0.8*Midterm.Grade/100)
Double.Major=rbern(1100,0.7*(100-Midterm.Grade)/100)
Athlete=rbern(1100,(100-Midterm.Grade)/100)
Sits.In.Front=rbern(1100,0.5*Midterm.Grade/100)
Hat.In.Class=rbern(1100,0.1*(100-Midterm.Grade)/100)
Texts.In.Class=rbern(1100,(100-Midterm.Grade)/100)
Facebook.In.Class=rbern(1100,(100-Midterm.Grade)/100)
Sharp.Dresser=rbern(1100,0.1*Midterm.Grade/100)
Questions.In.Lecture=rbern(1100,0.3*Midterm.Grade/100)
Score=0.10*Midterm.Grade+0.5*Paper.Grade.1+0.15*Paper.Grade.2+0.01*Participation.Grade*Paper.Grade.2+0.01*Homework.Average*
Attendance+5*Year.Of.College+3*Extra.Credit+10*Appearance.in.Office.Hours+5*Poli.Sci.Major-5*Double.Major-5*Athlete+10*
Sits.In.Front-5*Hat.In.Class-5*Texts.In.Class-5*Facebook.In.Class+0.1*Sharp.Dresser+5*Questions.In.Lecture+rnorm(1100,0,25)
Final.Grade=Score/max(Score)*100
data=data.frame(Midterm.Grade, Paper.Grade.1, Paper.Grade.2, Participation.Grade, Homework.Average, Gender, Attendance,
Year.Of.College, Presentation, Extra.Credit, Appearance.in.Office.Hours, Poli.Sci.Major, Double.Major, Athlete,
Sits.In.Front, Hat.In.Class, Texts.In.Class, Facebook.In.Class, Sharp.Dresser, Questions.In.Lecture, Final.Grade)
Last.Year=data[1:100,]
This.Year=data[101:1100,]
# So for OLS with all the predictors
model=lm(Final.Grade ~ Midterm.Grade + Paper.Grade.1 + Paper.Grade.2 + Participation.Grade + Homework.Average + Gender +
Attendance + Year.Of.College + Presentation + Extra.Credit + Appearance.in.Office.Hours + Poli.Sci.Major + Double.Major +
Athlete + Sits.In.Front + Hat.In.Class + Texts.In.Class + Facebook.In.Class + Sharp.Dresser + Questions.In.Lecture, Last.Year)
predictions=predict(model,This.Year[,1:20])
MSE=mean((This.Year$Final.Grade-predictions)^2)
MSE
# Best-Subset Selection
require(leaps)
leaps.out=leaps(x=This.Year[,1:20],y=This.Year$Final.Grade,nbest=1)
# If we set the number of covariates k at 10, then the ones we should use are
leaps.out$which[10,]
# or
which(leaps.out$which[10,]==TRUE)
# So our model would be
combo=paste(colnames(Last.Year[,which(leaps.out$which[10,]==TRUE)]),collapse="+")
line=paste(c("Final.Grade ~ ",combo),collapse=" ")
line
model=lm(noquote(line), Last.Year)
predictions=predict(model,This.Year[,which(leaps.out$which[10,]==TRUE)])
MSE=mean((This.Year$Final.Grade-predictions)^2)
MSE
# Ridge Regression
require(ridge)
model=linearRidge(Final.Grade ~ Midterm.Grade + Paper.Grade.1 + Paper.Grade.2 + Participation.Grade + Homework.Average
+ Gender + Attendance + Year.Of.College + Presentation + Extra.Credit + Appearance.in.Office.Hours + Poli.Sci.Major
+ Double.Major + Athlete + Sits.In.Front + Hat.In.Class + Texts.In.Class + Facebook.In.Class + Sharp.Dresser
+ Questions.In.Lecture, Last.Year, lambda="automatic")
predictions=predict(model,This.Year[,1:20])
MSE=mean((This.Year$Final.Grade-predictions)^2)
MSE
LASSO
require(lars)
model=lars(x=as.matrix(Last.Year[,1:20]),y=Last.Year$Final.Grade,type="lasso")
model[13]
# So 16 has the smallest loss
predictions=predict(model,This.Year[,1:20])[[4]][,16]
MSE=mean((This.Year$Final.Grade-predictions)^2)
MSE
# Now to use OLS with the variables selected by LASSO
model=lm(Final.Grade ~ Paper.Grade.1 + Paper.Grade.2 + Participation.Grade + Homework.Average + Gender + Year.Of.College
+ Extra.Credit + Appearance.in.Office.Hours + Poli.Sci.Major + Double.Major + Athlete + Sits.In.Front + Hat.In.Class
+ Facebook.In.Class + Sharp.Dresser + Questions.In.Lecture, Last.Year)
predictions=predict(model,This.Year[,1:20])
MSE=mean((This.Year$Final.Grade-predictions)^2)
MSE