In [2]:
setwd('/home/matt/MSOR/ISYE7406/ML-Ex-Rates/Results')
library(class)
library(MASS)
library(pls)
library(lars)
library(lattice)

In [3]:
#setwd('/home/matt/MSOR/ISYE7406/ML-Ex-Rates/Data')
#ex_rate <- read.csv('oecd.csv')
#ex_rate <- subset(ex_rate, ex_rate$Country.y == 'canada' |
#                  ex_rate$Country.y == 'europe' |
#                  ex_rate$Country.y == 'mexico' |
#                  ex_rate$Country.y == 'japan' |
#                  ex_rate$Country.y == 'korea')
#setwd('/home/matt/MSOR/ISYE7406/ML-Ex-Rates/Results')
#write.csv(ex_rate, file = 'topfive.csv')

In [5]:
test_lin_models <- function(country, B){
    
        ex_rates <- read.csv('topfive.csv')
        ex_rates <- subset(ex_rates, ex_rates$Country.y==country)
    
        full <- subset(ex_rates, select = c(Pct_Chg, Exchange, X1Y_Yield_d, 
                      X1Y_Yield_f, Int_d, Int_f,  Infl_d, Infl_f, GDPG_d,
                      GDPG_f, BOT_f, BOT_d, FER_f))
        full <- na.omit(full)
        rownames(full) <- 1:nrow(full)
        struct <- subset(full, select = c(Pct_Chg, Exchange, X1Y_Yield_d, 
                      X1Y_Yield_f, Int_d, Int_f,  Infl_d, Infl_f))

        # Split into training and testing sets
        n <- dim(full)[1]
        n1 <- round(n/10)
        B <- 100
    
        # Test Full Model

        # Linear Regression -- Full Model
        set.seed(19890211)
        linreg.TE_list <- vector(length=0)
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- full[-flag,]; test <- full[flag,]
    
            regmodel <- lm(Pct_Chg ~ ., data = train)
            linreg.test <- predict.lm(regmodel,test[,-18])
            linreg.TE <- mean( (test[,1]-linreg.test)^2)
            linreg.TE_list <- c(linreg.TE_list, linreg.TE)
    
        }    
    
        # Ridge Regression -- Full Model
        set.seed(19890211)
        ridge.TE_list <- vector(length=0)
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- full[-flag,]; test <- full[flag,]
    
            ridgereg <- lm.ridge(Pct_Chg ~ ., data = train, 
                        lambda = seq(0,10,0.001))
            lambdaopt <- which.min(ridgereg$GCV)
            ridge_coef <- coef(ridgereg)[lambdaopt,]
            ridge_testmat <- as.matrix(cbind(1,test[,-1]))
            ridge.test <- as.matrix(ridge_testmat %*% ridge_coef)
            ridge.TE <- mean( (test[,1] - ridge.test)^2)
            ridge.TE_list <- c(ridge.TE_list, ridge.TE)
    
        }
    
        # Lasso Regression -- Full Model
        set.seed(19890211)
        lasso.TE_list <- vector(length=0)
        lasso_lambda <- 0
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- full[-flag,]; test <- full[flag,]
    
            lassoreg <- lars( x= as.matrix(train[,-1]),
                            y = train[,1], type = 'lasso')
            Cp1 <- summary(lassoreg)$Cp
            index1 <- which.min(Cp1)
            lasso_coef <- coef(lassoreg)[index1,]
            if (is.na(lassoreg$lambda[index1])==FALSE){
                lasso_lambda <- lassoreg$lambda[index1]
            }
            lassoreg.fit <- predict(lassoreg, as.matrix(test[,-1]),
                                   s = lasso_lambda, type = 'fit',
                                   mode = 'lambda')
            lassoreg.test <- lassoreg.fit$fit
            lassoreg.TE <- mean( (test[,1]-lassoreg.test)^2)
            lasso.TE_list <- c(lasso.TE_list, lassoreg.TE)
                
        }
    
        # PCR -- Full Model
        set.seed(19890211)
        pcreg.TE_list <- vector(length=0)
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- full[-flag,]; test <- full[flag,]
            
            pcreg <- pcr(Pct_Chg ~ ., data = train,
                        validation = 'CV')
            comps <- dim(train)[2]-1
            pcreg.test <- predict(pcreg, ncomp = comps,
                                 newdata = test[,-1])
            pcreg.TE <- mean((test[,1]-pcreg.test)^2)
            pcreg.TE_list <- c(pcreg.TE_list, pcreg.TE)
        }
    
        # Test Structural Model

        # Linear Regression -- Structural Model
        set.seed(19890211)
        linreg2.TE_list <- vector(length=0)
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- struct[-flag,]; test <- struct[flag,]
    
            regmodel <- lm(Pct_Chg ~ ., data = train)
            linreg.test <- predict.lm(regmodel,test[,-18])
            linreg.TE <- mean( (test[,1]-linreg.test)^2)
            linreg2.TE_list <- c(linreg2.TE_list, linreg.TE)
    
        }    
    
        # Ridge Regression -- Structural Model
        set.seed(19890211)
        ridge2.TE_list <- vector(length=0)
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- struct[-flag,]; test <- struct[flag,]
    
            ridgereg <- lm.ridge(Pct_Chg ~ ., data = train, 
                        lambda = seq(0,10,0.001))
            lambdaopt <- which.min(ridgereg$GCV)
            ridge_coef <- coef(ridgereg)[lambdaopt,]
            ridge_testmat <- as.matrix(cbind(1,test[,-1]))
            ridge.test <- as.matrix(ridge_testmat %*% ridge_coef)
            ridge.TE <- mean( (test[,1] - ridge.test)^2)
            ridge2.TE_list <- c(ridge2.TE_list, ridge.TE)
    
        }
    
        # Lasso Regression -- Structural Model
        set.seed(19890211)
        lasso2.TE_list <- vector(length=0)
        lasso_lambda <- 0
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- struct[-flag,]; test <- struct[flag,]
    
            lassoreg <- lars( x= as.matrix(train[,-1]),
                            y = train[,1], type = 'lasso')
            Cp1 <- summary(lassoreg)$Cp
            index1 <- which.min(Cp1)
            lasso_coef <- coef(lassoreg)[index1,]
            if (is.na(lassoreg$lambda[index1])==FALSE){
                lasso_lambda <- lassoreg$lambda[index1]
            }
            lassoreg.fit <- predict(lassoreg, as.matrix(test[,-1]),
                                   s = lasso_lambda, type = 'fit',
                                   mode = 'lambda')
            lassoreg.test <- lassoreg.fit$fit
            lassoreg.TE <- mean( (test[,1]-lassoreg.test)^2)
            lasso2.TE_list <- c(lasso2.TE_list, lassoreg.TE)
                
        }
    
        # PCR -- Structural Model
        set.seed(19890211)
        pcreg2.TE_list <- vector(length=0)
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- struct[-flag,]; test <- struct[flag,]
            
            pcreg <- pcr(Pct_Chg ~ ., data = train,
                        validation = 'CV')
            comps <- dim(train)[2]-1
            pcreg.test <- predict(pcreg, ncomp = comps,
                                 newdata = test[,-1])
            pcreg.TE <- mean((test[,1]-pcreg.test)^2)
            pcreg2.TE_list <- c(pcreg2.TE_list, pcreg.TE)
        }
    
        # Naive
        set.seed(19890211)
        naive.TE_list <- vector(length=0)
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- full[-flag,]; test <- full[flag,]
            naive.TE <- mean ( (test[,1])^2)
            naive.TE_list <- c(naive.TE_list,naive.TE)
        }
    
        output1 <- c('Full','Linear','Ridge',
                     'Lasso','PCR','Naive')
        output1 <- rbind(output1, c('Mean', mean(linreg.TE_list),
                             mean(ridge.TE_list),mean(lasso.TE_list),
                             mean(pcreg.TE_list), mean(naive.TE_list)))
        output1 <- rbind(output1, c('Variance', var(linreg.TE_list),
                             var(ridge.TE_list), var(lasso.TE_list),
                             var(pcreg.TE_list),var(naive.TE_list)))
    
        output2 <- c('Struct','Linear','Ridge',
                     'Lasso','PCR','Naive')
        output2 <- rbind(output2, c('Mean', mean(linreg2.TE_list),
                             mean(ridge2.TE_list),mean(lasso2.TE_list),
                              mean(pcreg2.TE_list), mean(naive.TE_list)))
        output2 <- rbind(output2, c('Variance', var(linreg2.TE_list),
                             var(ridge2.TE_list), var(lasso2.TE_list),
                              var(pcreg2.TE_list),var(naive.TE_list)))
        output <- rbind(output1,output2)
        return(output)
}

In [6]:
test_lin_models('canada',100)
test_lin_models('europe',100)
test_lin_models('mexico',100)
test_lin_models('japan',100)
test_lin_models('korea',100)

0,1,2,3,4,5,6
output1,Full,Linear,Ridge,Lasso,PCR,Naive
,Mean,0.00483999638751131,0.00488048295637994,0.00478805865957031,0.00520934705777391,0.00610769499132334
,Variance,2.46917859646307e-05,2.36241847748025e-05,2.25393891711527e-05,2.27018392468807e-05,4.10189829401097e-05
output2,Struct,Linear,Ridge,Lasso,PCR,Naive
,Mean,0.00342551510986978,0.00344462130175362,0.00350206587263368,0.00410120021802052,0.00610769499132334
,Variance,1.03645109050545e-05,1.05889317393333e-05,1.09894166907585e-05,1.67618012121903e-05,4.10189829401097e-05


0,1,2,3,4,5,6
output1,Full,Linear,Ridge,Lasso,PCR,Naive
,Mean,0.0131555212544343,0.013196111875579,0.0132277883907457,0.0130694490594495,0.010847548742416
,Variance,0.000399926112532473,0.000401886557343513,0.000393967018801521,0.000320121225181655,5.9948989743761e-05
output2,Struct,Linear,Ridge,Lasso,PCR,Naive
,Mean,0.011276730540153,0.011295924777219,0.0115382949487511,0.0114198852108727,0.010847548742416
,Variance,0.000100764427003758,0.000100018208719794,9.78323719934165e-05,8.98508214174915e-05,5.9948989743761e-05


0,1,2,3,4,5,6
output1,Full,Linear,Ridge,Lasso,PCR,Naive
,Mean,0.00904790946187816,0.00926301642066344,0.00884656978597254,0.00753829397260489,0.0093382107222342
,Variance,0.000185067353126297,0.000201065764077398,0.000176340563313824,0.000114694996308384,0.000152102795732646
output2,Struct,Linear,Ridge,Lasso,PCR,Naive
,Mean,0.0061014664401741,0.00610700173069355,0.00606512454783022,0.00502023561831004,0.0093382107222342
,Variance,5.93080475205169e-05,5.86128025004666e-05,5.98854220253644e-05,3.91998586371782e-05,0.000152102795732646


0,1,2,3,4,5,6
output1,Full,Linear,Ridge,Lasso,PCR,Naive
,Mean,0.0328712458205239,0.0328283164632264,0.0316950803531264,0.0683321645075422,0.0202634373884816
,Variance,0.000599846251257682,0.000588609682175689,0.000495313994505817,0.134534319571927,0.000274684212923327
output2,Struct,Linear,Ridge,Lasso,PCR,Naive
,Mean,0.0260151811200916,0.025771405740637,0.0253633996121158,0.0512960074108224,0.0202634373884816
,Variance,0.000278627608826537,0.000272923764434986,0.000262500915782667,0.0660408281370806,0.000274684212923327


0,1,2,3,4,5,6
output1,Full,Linear,Ridge,Lasso,PCR,Naive
,Mean,0.00526956889435899,0.00520408444531798,0.00523524112961232,0.00502767285847297,0.0106720652856092
,Variance,2.17030410727279e-05,2.12359222617513e-05,2.23757325932911e-05,2.07725764479649e-05,0.00016953089360253
output2,Struct,Linear,Ridge,Lasso,PCR,Naive
,Mean,0.00796752053216236,0.00764697851114573,0.00764439148678298,0.00863724350878538,0.0106720652856092
,Variance,4.16888939610318e-05,3.91042070697416e-05,3.80333655479199e-05,4.83255889051608e-05,0.00016953089360253
