In [None]:
#Comparative of Least Squares, Lasso, Ridge Regressions and PCR, PLS 
library(ISLR)
library(glmnet)
library(MXM)
library(pls) 

data = College
head(data)

#Creating the predictive and response variables 
y = scale(data[,2])
X = cbind(data[,1],scale(data[,3:18]))

#Split of the data set into train and test
set.seed(10)
n = 777
ind = sample(c(TRUE, FALSE), n, replace=TRUE, prob=c(0.7, 0.3))
X_train = X[ind, ]
X_test = X[!ind, ]
y_train = y[ind]
y_test = y[!ind]

print(c(dim(X_train),dim(X_test),length(y_train),length(y_test)))

df_train=as.data.frame(cbind(X_train,y_train))
df_test=as.data.frame(cbind(X_test,y_test))

colnames(df_train) = c('Private','Accept','Enroll','Top10perc','Top25perc','F.Undergrad','P.Undergrad','Outstate','Room.Board','Books','Personal','PhD','Terminal','S.F.Ratio','perc.alumni','Expend','Grad.Rate','y_train')
colnames(df_test ) = c('Private','Accept','Enroll','Top10perc','Top25perc','F.Undergrad','P.Undergrad','Outstate','Room.Board','Books','Personal','PhD','Terminal','S.F.Ratio','perc.alumni','Expend','Grad.Rate','y_train')

#Regresión mínimos cuadrados
model = lm(y_train ~ Private+Accept+Enroll+Top10perc+Top25perc+F.Undergrad+P.Undergrad+Outstate+Room.Board+Books+Personal+PhD+Terminal+S.F.Ratio+perc.alumni+Expend+Grad.Rate,
   data = df_train)
print("RMSE mínimos cuadrados")
mean((df_test[,18] - predict(model,df_test[,1:17]))^2)

#Regresión Ridge 
grid = 10^seq(10,-3,length =100)
lambda_opt = cv.glmnet(data.matrix(X_train),y_train,alpha =0,lambda=grid)$lambda.min
ridge.mod = glmnet(data.matrix(df_train[1:17]),as.numeric(unlist(df_train[18])),alpha =0,lambda = lambda_opt)
ridge.pred = predict(ridge.mod,s=lambda_opt,newx=data.matrix(df_test[1:17])) 
print("RMSE Ridge")
mean((df_test[,18] - ridge.pred)^2)

#Regresión Lasso 
grid = 10^seq(10,-3,length =100)
lambda_opt = cv.glmnet(data.matrix(X_train),y_train,alpha = 1,lambda=grid)$lambda.min
lasso.mod = glmnet(data.matrix(df_train[1:17]),as.numeric(unlist(df_train[18])),alpha = 1,lambda = lambda_opt)
lasso.pred = predict(lasso.mod,s=lambda_opt,newx=data.matrix(df_test[1:17])) 
print("RMSE Lasso")
mean((df_test[,18] - lasso.pred)^2)

#Regresión PCR
pcr.mod = pcr(y_train~.,data=df_train,validation="CV")
validationplot(pcr.mod,val.type="MSEP")
#Entonces escogemos M=15
print("RMSE PCR")
mean((df_test[,18] - predict(pcr.mod,df_test[,1:17],ncomp =15))^2)

#Regresión PLS
plsr.mod = plsr(y_train~.,data=df_train,validation="CV")
validationplot(plsr.mod,val.type="MSEP")
#Entonces escogemos M=6
print("RMSE PLS")
mean((df_test[,18] - predict(plsr.mod,df_test[,1:17],ncomp =6))^2)