In [12]:
library(class)
library(MASS)
library(pls)
library(lars)
library(lattice)

In [23]:
setwd('/home/matt/MSOR/ISYE7406/ML-Ex-Rates/Data')
ex_rate <- read.csv('oecd.csv')
country <- 'europe'

In [24]:
df <- subset(ex_rate,ex_rate$Country.y==country)
df_set <- subset(df, select = -c(X,Country.x,Country.y,Date,Yield_f,
                                 PrimeRate_f,FDI_f,Exchange_fut))
df_set <- na.omit(df_set)
# Note: Exchange, Exchange_fut and Pct_Change are cols 17 - 19

# Split into training and testing sets
n <- dim(df_set)[1]; n
n1 <- round(n/5); n1
set.seed(19890211)
flag <- sort(sample(1:n,n1))
#train <- df_set[-flag,]; test <- df_set[flag,]
train <- df_set[1:(n-n1),]; test <- df_set[(n-n1+1):n,]

In [25]:
# Linear Regression with all predictors

linreg <- lm(Pct_Chg ~ . , data = train)
summary(linreg)
linreg.test <- predict.lm(linreg,test[,-18])
linreg.TE <- mean( (test[,18] - linreg.test)^2 )
linreg.TE


Call:
lm(formula = Pct_Chg ~ ., data = train)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.122388 -0.033051  0.000114  0.030453  0.137337 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  2.320e-01  2.417e-01   0.960 0.338684    
Infl_d       6.596e-02  8.925e-03   7.390 1.42e-11 ***
BOT_d        6.430e-06  1.935e-06   3.323 0.001149 ** 
Yield_d     -1.219e-02  1.957e-02  -0.623 0.534505    
FER_d       -4.187e-07  1.107e-06  -0.378 0.705766    
Int_d        8.322e-04  3.949e-02   0.021 0.983218    
PrimeRate_d  7.941e-03  4.467e-02   0.178 0.859164    
X1Y_Yield_d  5.762e-03  2.908e-03   1.981 0.049641 *  
GDPG_d      -2.472e-06  6.178e-07  -4.002 0.000104 ***
CA_d        -7.702e-07  2.648e-07  -2.908 0.004255 ** 
FDI_d       -9.570e-02  2.104e-02  -4.549 1.19e-05 ***
Infl_f      -8.213e-02  1.668e-02  -4.924 2.45e-06 ***
BOT_f        1.765e-06  1.038e-06   1.700 0.091396 .  
FER_f        5.285e-08  6.432e-07   0.082 0.934633    

In [26]:
# Stepwise variable selection
regmodel <- lm(Pct_Chg ~ ., data = train)
min.model = lm(Pct_Chg ~ 1, data = train)
max_model <- formula(regmodel)
linreg_AIC = step(min.model, direction = 'forward', 
                  scope = max_model, trace = FALSE)
# The variables save by stepwise selection were:
# Infl_d, Exchange, CA_d, CA_f, FDI_d, X1Y_Yield_f,
#  GPDP_f, Yield_d, BOT_d, FER_f, FER_d, Infl_f,
#  BOT_f, Int_f
summary(linreg_AIC)
linreg_AIC.test <- predict.lm(linreg_AIC,test[,-18])
linreg_AIC.TE <- mean( (test[,18]-linreg_AIC.test)^2)
linreg_AIC.TE


Call:
lm(formula = Pct_Chg ~ BOT_d + CA_d + FDI_d + CA_f + X1Y_Yield_d + 
    Infl_f + Infl_d + GDPG_f + GDPG_d + Exchange + X1Y_Yield_f + 
    BOT_f, data = train)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.118844 -0.033133  0.000387  0.032253  0.141217 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.686e-01  7.808e-02   2.160 0.032478 *  
BOT_d        6.103e-06  1.872e-06   3.260 0.001400 ** 
CA_d        -7.866e-07  2.119e-07  -3.712 0.000296 ***
FDI_d       -8.758e-02  6.214e-03 -14.095  < 2e-16 ***
CA_f         9.633e-02  6.929e-03  13.902  < 2e-16 ***
X1Y_Yield_d  5.277e-03  2.468e-03   2.138 0.034237 *  
Infl_f      -7.969e-02  1.377e-02  -5.785 4.54e-08 ***
Infl_d       6.318e-02  7.709e-03   8.195 1.43e-13 ***
GDPG_f       4.071e-03  8.933e-04   4.557 1.12e-05 ***
GDPG_d      -2.469e-06  5.059e-07  -4.880 2.85e-06 ***
Exchange    -2.699e-01  6.967e-02  -3.873 0.000164 ***
X1Y_Yield_f  3.397e-02  9.808e-03   3.464 0.0

In [27]:
# Ridge Regression
#ridgereg <- lm.ridge(Pct_Chg ~ Infl_d + Exchange + CA_d + CA_f + FDI_d + 
#    X1Y_Yield_f + GDPG_f + Yield_d + BOT_d + FER_f + FER_d + 
#    Infl_f + BOT_f + Int_f, data = train, lambda = seq(0,200,0.01))
ridgereg <- lm.ridge(Pct_Chg ~ Exchange + Yield_d + X1Y_Yield_f + Int_f + 
    PrimeRate_d + FDI_d + Infl_f + Infl_d, data = train, lambda = seq(0,10,0.001))

lambdaopt <- which.min(ridgereg$GCV)
lambdaopt
ridge_coef <- coef(ridgereg)[lambdaopt,]
test_ridge <- subset(test, select = c(Pct_Chg, Exchange, Yield_d, 
                    X1Y_Yield_f, Int_f, PrimeRate_d, FDI_d, Infl_f, Infl_d))
#test_ridge <- subset(test, select = c(Pct_Chg,Infl_d,Exchange,
#                CA_d,CA_f,FDI_d,X1Y_Yield_f,GDPG_f,Yield_d,BOT_d,
#                 FER_f,FER_d,Infl_f,BOT_f,Int_f))
ridge_testmat <- as.matrix(cbind(1,test_ridge[,-1]))
ridge.test <- as.matrix(ridge_testmat %*% ridge_coef)
ridge.TE <- mean( (test_ridge[,1] - ridge.test)^2)
ridge.TE

In [28]:
# LASSO Regression
lassotrain <- subset(train, select = c(Infl_d,Exchange,
                CA_d,CA_f,FDI_d,X1Y_Yield_f,GDPG_f,Yield_d,BOT_d,
                 FER_f,FER_d,Infl_f,BOT_f,Int_f))
lassotest <- subset(test, select = c(Infl_d,Exchange,
                CA_d,CA_f,FDI_d,X1Y_Yield_f,GDPG_f,Yield_d,BOT_d,
                 FER_f,FER_d,Infl_f,BOT_f,Int_f))
lassoreg <- lars( x = as.matrix(lassotrain), 
                 y = as.matrix(train[,18]), type = 'lasso')
Cp1 <- summary(lassoreg)$Cp
index1 <- which.min(Cp1)
lasso_coef <- coef(lassoreg)[index1]
lasso_lambda <- lassoreg$lambda[index1]
lassoreg.fit <- predict(lassoreg, as.matrix(lassotest),
                   s = lasso_lambda, type = 'fit', mode = 'lambda')
lassoreg.test <- lassoreg.fit$fit
lassoreg.TE <- mean(( test[,18] - lassoreg.test)^2)
lassoreg.TE

In [29]:
# Principal Component regression
pcreg <- pcr(Pct_Chg ~ . , data = train, validation = 'CV')
pc.ncompopt <- which.min(pcreg$validation$adj)
pcreg.test <- predict(pcreg, ncomp = pc.ncompopt,
                     newdata = test[,-18])
pcreg.TE <- mean ( (test[,18] - pcreg.test)^2)
pcreg.TE

In [30]:
# Partial Least Squares
plsreg <- plsr(Pct_Chg ~ ., data = train, validation = 'CV')
pls.ncompopt <- plsreg$ncomp
plsreg.test <- predict(plsreg, ncomp = pls.ncompopt,
                      newdata = test[,-18])
plsreg.TE <- mean( (test[,18]-plsreg.test)^2)
plsreg.TE

In [31]:
# Naive
naive.TE <- mean ( (test[,18])^2)
naive.TE