In [2]:
library(class)
library(MASS)
library(pls)
library(lars)
library(lattice)

In [14]:
setwd('/home/matt/MSOR/ISYE7406/ML-Ex-Rates/Data')
ex_rate <- read.csv('oecd.csv')

In [24]:
test_lin_models('canada',100)
test_lin_models('europe',100)
test_lin_models('mexico',100)
test_lin_models('japan',100)
test_lin_models('korea',100)

0,1,2,3,4
output,Measure,Step,Ridge,Naive
,Mean,0.0095003070684963,0.00557885150813592,0.00711638519785159
,Variance,6.89783560612541e-05,1.08304149737592e-05,3.57907347521221e-05


0,1,2,3,4
output,Measure,Step,Ridge,Naive
,Mean,0.0249765370753851,0.0190941820562045,0.0122329760423686
,Variance,0.00066397207948591,0.000236784208550773,2.42574617624061e-05


0,1,2,3,4
output,Measure,Step,Ridge,Naive
,Mean,0.0187883382445681,0.00680272279598375,0.00842874833881592
,Variance,0.000690708462255487,5.36639395900579e-05,5.50111180456639e-05


0,1,2,3,4
output,Measure,Step,Ridge,Naive
,Mean,0.0232411186352456,0.0257441401238366,0.0193478540247889
,Variance,0.000639879995321904,0.000314330790447216,7.16137923150832e-05


0,1,2,3,4
output,Measure,Step,Ridge,Naive
,Mean,0.00589473611065569,0.00907550059905802,0.0123134120094243
,Variance,6.02811624300079e-06,4.57667420403888e-05,9.30619198954257e-05


In [22]:
test_lin_models <- function(country, B){
        df <- subset(ex_rate,ex_rate$Country.y==country)
        df_set <- subset(df, 
                    select = -c(X,Country.x,Country.y,Date,Yield_f,
                                 PrimeRate_f,FDI_f,Exchange_fut))
        df_set <- na.omit(df_set)
        # Note: Exchange, Exchange_fut and Pct_Change are cols 17 - 19

        # Split into training and testing sets
        n <- dim(df_set)[1]; n
        n1 <- round(n/5); n1
        B <- 100
        set.seed(19890211)
        train <- df_set[1:(n-n1),]; test <- df_set[(n-n1+1):n,]
        
        # AIC Stepwise
        linreg_AIC.TE_list <- vector(length=0)
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- df_set[-flag,]; test <- df_set[flag,]
    
            regmodel <- lm(Pct_Chg ~ ., data = train)
            min.model = lm(Pct_Chg ~ 1, data = train)
            max_model <- formula(regmodel)
            linreg_AIC = step(min.model, direction = 'forward', 
                  scope = max_model, trace = FALSE)
            linreg_AIC.test <- predict.lm(linreg_AIC,test[,-18])
            linreg_AIC.TE <- mean( (test[,18]-linreg_AIC.test)^2)
            linreg_AIC.TE_list <- c(linreg_AIC.TE_list, linreg_AIC.TE)
    
        }    
    
        # Ridge Regression
        ridge.TE_list <- vector(length=0)
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- df_set[-flag,]; test <- df_set[flag,]
    
            ridgereg <- lm.ridge(Pct_Chg ~ Exchange + X1Y_Yield_d + 
                      X1Y_Yield_f + Int_d + Int_f +  Infl_d + Infl_f, 
                    data = train, lambda = seq(0,10,0.001))
            lambdaopt <- which.min(ridgereg$GCV)
            ridge_coef <- coef(ridgereg)[lambdaopt,]
            test_ridge <- subset(test, select = c(Pct_Chg, Exchange,
                   X1Y_Yield_d, X1Y_Yield_f, Int_d, Int_f, 
                    Infl_d, Infl_f))
            ridge_testmat <- as.matrix(cbind(1,test_ridge[,-1]))
            ridge.test <- as.matrix(ridge_testmat %*% ridge_coef)
            ridge.TE <- mean( (test_ridge[,1] - ridge.test)^2)
            ridge.TE_list <- c(ridge.TE_list, ridge.TE)
    
        }
    
        # Naive
        naive.TE_list <- vector(length=0)
        for(i in 1:B){
            start <- sample(1:(n-n1-1),1)
            flag <- (start):(start+n1)
            train <- df_set[-flag,]; test <- df_set[flag,]
            naive.TE <- mean ( (test[,18])^2)
            naive.TE_list <- c(naive.TE_list,naive.TE)
        }
    
        output <- c('Measure','Step','Ridge','Naive')
        output <- rbind(output, c('Mean', mean(linreg_AIC.TE_list),
                             mean(ridge.TE_list),mean(naive.TE_list)))
        output <- rbind(output, c('Variance', var(linreg_AIC.TE_list),
                             var(ridge.TE_list),var(naive.TE_list)))
    
        return(output)
}