In [14]:
library(ggplot2)
library(gridExtra)
library(MASS)
library(e1071)
library(class)

In [15]:
test_lin_class <- function(country, B){
    #ex_rates <- read.csv('topfive.csv')
    #ex_rates <- subset(ex_rates, ex_rates$Country.y==country)
    #ex_rates$Direction <- I(ex_rates$Pct_Chg >= 0)
    
    #full <- subset(ex_rates, select = c(Direction, Exchange, X1Y_Yield_d, 
    #            X1Y_Yield_f, Int_d, Int_f,  Infl_d, Infl_f, GDPG_d,
    #            GDPG_f, BOT_f, BOT_d, FER_f))
    #full <- na.omit(full)
    
    full <- read.csv(paste(country,'_full.csv',sep=''))
    full[,1] <- I(full$Pct_Chg >= 0)
    full <- subset(full, select = -c(Pct_Chg))
    colnames(full)[1] <- 'Direction'

    rownames(full) <- 1:nrow(full)
    struct <- subset(full, select = c(Direction, Exchange, X1Y_Yield_d, 
                X1Y_Yield_f, Int_d, Int_f,  Infl_d, Infl_f))

    n <- dim(full)[1]
    n1 <- round(n/10)
    
    # LDA -- Full Model
    set.seed(19890211)
    lda.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- full[-flag,]; test <- full[flag,]
    
        ldamod <- lda(train[,-1],train[,1])
        lda.test <- predict(ldamod,test[,-1])
        lda.TE <- mean(lda.test$class != test[,1])
        lda.TE_list <- c(lda.TE_list, lda.TE)
    }

    # QDA -- Full Model
    set.seed(19890211)
    qda.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- full[-flag,]; test <- full[flag,]
    
        qdamod <- qda(train[,-1],train[,1])
        qda.test <- predict(qdamod,test[,-1])
        qda.TE <- mean(qda.test$class != test[,1])
        qda.TE_list <- c(qda.TE_list, qda.TE)
    }

    # Naive Bayes -- Full Model
    set.seed(19890211)
    bayes.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- full[-flag,]; test <- full[flag,]
    
        bayesmod <- naiveBayes(train[,-1],as.factor(train[,1]))
        bayes.test <- predict(bayesmod,test[,-1])
        bayes.TE <- mean(bayes.test != test[,1])
        bayes.TE_list <- c(bayes.TE_list, bayes.TE)
    }

    # Logistic Regression -- Full Model

    set.seed(19890211)
    logreg.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- full[-flag,]; test <- full[flag,]
    
        logreg <- glm(Direction ~ ., family = binomial(link='logit'),
                 data = train)
        logreg.pred <- predict.glm(logreg, newdata=test[,-1],
                              type='response')
        logreg.pred <- as.vector(logreg.pred)
        logreg.test <- I(logreg.pred >= .50)
        logreg.TE <- mean(logreg.test != test[,1])
        logreg.TE_list <- c(logreg.TE_list, logreg.TE)
    }


    # LDA -- Structural Model
    set.seed(19890211)
    lda2.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- struct[-flag,]; test <- struct[flag,]
    
        ldamod <- lda(train[,-1],train[,1])
        lda.test <- predict(ldamod,test[,-1])
        lda.TE <- mean(lda.test$class != test[,1])
        lda2.TE_list <- c(lda2.TE_list, lda.TE)
    }

    # QDA -- Structural Model
    set.seed(19890211)
    qda2.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- struct[-flag,]; test <- struct[flag,]
    
        qdamod <- qda(train[,-1],train[,1])
        qda.test <- predict(qdamod,test[,-1])
        qda.TE <- mean(qda.test$class != test[,1])
        qda2.TE_list <- c(qda2.TE_list, qda.TE)
    }

    # Naive Bayes -- Structural Model
    set.seed(19890211)
    bayes2.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- struct[-flag,]; test <- struct[flag,]
    
        bayesmod <- naiveBayes(train[,-1],as.factor(train[,1]))
        bayes.test <- predict(bayesmod,test[,-1])
        bayes.TE <- mean(bayes.test != test[,1])
        bayes2.TE_list <- c(bayes2.TE_list, bayes.TE)
    }

    # Logistic Regression -- Structural Model

    set.seed(19890211)
    logreg2.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- struct[-flag,]; test <- struct[flag,]
    
        logreg <- glm(Direction ~ ., family = binomial(link='logit'),
                 data = train)
        logreg.pred <- predict.glm(logreg, newdata=test[,-1],
                              type='response')
        logreg.pred <- as.vector(logreg.pred)
        logreg.test <- I(logreg.pred >= .50)
        logreg.TE <- mean(logreg.test != test[,1])
        logreg2.TE_list <- c(logreg2.TE_list, logreg.TE)
    }

    # Naive
    set.seed(19890211)
    naive.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- full[-flag,]; test <- full[flag,]
    
        naive <- sample(c(TRUE,FALSE),size=n1+1,
                    replace=TRUE, prob=c(0.5,0.5))
        naive.TE <- mean ( test[,1] != naive )
        naive.TE_list <- c(naive.TE_list,naive.TE)
    }

        output1 <- c('Full','LDA','QDA','Naive Bayes',
                     'Logistic','Naive')
        output1 <- rbind(output1, c('Mean', mean(lda.TE_list),
                             mean(qda.TE_list),mean(bayes.TE_list),
                             mean(logreg.TE_list), mean(naive.TE_list)))
        output1 <- rbind(output1, c('Variance', var(lda.TE_list),
                             var(qda.TE_list), var(bayes.TE_list),
                             var(logreg.TE_list),var(naive.TE_list)))
    
        output2 <- c('Struct','LDA','QDA','Naive Bayes',
                     'Logistic','Naive')
        output2 <- rbind(output2, c('Mean', mean(lda2.TE_list),
                             mean(qda2.TE_list),mean(bayes2.TE_list),
                              mean(logreg2.TE_list), mean(naive.TE_list)))
        output2 <- rbind(output2, c('Variance', var(lda2.TE_list),
                             var(qda2.TE_list), var(bayes2.TE_list),
                              var(logreg2.TE_list),var(naive.TE_list)))
        output <- rbind(output1,output2)
        return(output)
}

In [16]:
test_lin_class('canada',100)
test_lin_class('europe',100)
test_lin_class('mexico',100)
test_lin_class('japan',100)
test_lin_class('korea',100)

: glm.fit: fitted probabilities numerically 0 or 1 occurred

0,1,2,3,4,5,6
output1,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.152,0.306666666666667,0.371333333333333,0.272,0.525333333333333
,Variance,0.0231721661054994,0.0767676767676768,0.109562738496072,0.0469297418630752,0.0202271604938272
output2,Struct,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.295333333333333,0.381333333333333,0.318,0.316666666666667,0.525333333333333
,Variance,0.084781593714927,0.105327048260382,0.10557620650954,0.0833557800224467,0.0202271604938272


: glm.fit: fitted probabilities numerically 0 or 1 occurred

0,1,2,3,4,5,6
output1,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.335,0.39,0.551,0.3225,0.4945
,Variance,0.0200252525252525,0.033989898989899,0.0560090909090909,0.0273421717171717,0.0121159090909091
output2,Struct,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.416,0.3675,0.53,0.418,0.4945
,Variance,0.0587313131313131,0.0350189393939394,0.0495959595959596,0.0601777777777778,0.0121159090909091


: glm.fit: fitted probabilities numerically 0 or 1 occurred

0,1,2,3,4,5,6
output1,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.399166666666667,0.54,0.561666666666667,0.440833333333333,0.4975
,Variance,0.0421289281705948,0.0709147025813692,0.0748625140291807,0.035956088664422,0.019283810325477
output2,Struct,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.4475,0.475,0.5325,0.205833333333333,0.4975
,Variance,0.0722018799102132,0.0476290684624018,0.0538573232323232,0.0307877384960718,0.019283810325477


: glm.fit: fitted probabilities numerically 0 or 1 occurred

0,1,2,3,4,5,6
output1,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.337173913043478,0.394565217391304,0.454782608695652,0.343260869565217,0.496086956521739
,Variance,0.0302218307842126,0.0464033052643639,0.0480006110251857,0.0359100933722862,0.00567470546676596
output2,Struct,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.396521739130435,0.445869565217391,0.440869565217391,0.393478260869565,0.496086956521739
,Variance,0.0428970231616734,0.0266320578182582,0.0367291058028298,0.0447432739493231,0.00567470546676596


: glm.fit: fitted probabilities numerically 0 or 1 occurred

0,1,2,3,4,5,6
output1,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.2935,0.326,0.2975,0.272,0.485
,Variance,0.054275505050505,0.071589898989899,0.0489078282828283,0.0342585858585859,0.0114393939393939
output2,Struct,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.4055,0.4145,0.282,0.461,0.485
,Variance,0.0707017676767677,0.0618330808080808,0.0471474747474747,0.0887161616161616,0.0114393939393939
