In [1]:
library(ggplot2)
library(gridExtra)
library(MASS)
library(e1071)
library(class)

In [2]:
test_lin_class <- function(country, B){
    ex_rates <- read.csv('topfive.csv')
    ex_rates <- subset(ex_rates, ex_rates$Country.y==country)
    ex_rates$Direction <- I(ex_rates$Pct_Chg >= 0)
    
    full <- subset(ex_rates, select = c(Direction, Exchange, X1Y_Yield_d, 
                X1Y_Yield_f, Int_d, Int_f,  Infl_d, Infl_f, GDPG_d,
                GDPG_f, BOT_f, BOT_d, FER_f))
    full <- na.omit(full)

    rownames(full) <- 1:nrow(full)
    struct <- subset(full, select = c(Direction, Exchange, X1Y_Yield_d, 
                X1Y_Yield_f, Int_d, Int_f,  Infl_d, Infl_f))

    n <- dim(full)[1]
    n1 <- round(n/10)
    
    # LDA -- Full Model
    set.seed(19890211)
    lda.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- full[-flag,]; test <- full[flag,]
    
        ldamod <- lda(train[,-1],train[,1])
        lda.test <- predict(ldamod,test[,-1])
        lda.TE <- mean(lda.test$class != test[,1])
        lda.TE_list <- c(lda.TE_list, lda.TE)
    }

    # QDA -- Full Model
    set.seed(19890211)
    qda.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- full[-flag,]; test <- full[flag,]
    
        qdamod <- qda(train[,-1],train[,1])
        qda.test <- predict(qdamod,test[,-1])
        qda.TE <- mean(qda.test$class != test[,1])
        qda.TE_list <- c(qda.TE_list, qda.TE)
    }

    # Naive Bayes -- Full Model
    set.seed(19890211)
    bayes.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- full[-flag,]; test <- full[flag,]
    
        bayesmod <- naiveBayes(train[,-1],as.factor(train[,1]))
        bayes.test <- predict(bayesmod,test[,-1])
        bayes.TE <- mean(bayes.test != test[,1])
        bayes.TE_list <- c(bayes.TE_list, bayes.TE)
    }

    # Logistic Regression -- Full Model

    set.seed(19890211)
    logreg.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- full[-flag,]; test <- full[flag,]
    
        logreg <- glm(Direction ~ ., family = binomial(link='logit'),
                 data = train)
        logreg.pred <- predict.glm(logreg, newdata=test[,-1],
                              type='response')
        logreg.pred <- as.vector(logreg.pred)
        logreg.test <- I(logreg.pred >= .50)
        logreg.TE <- mean(logreg.test != test[,1])
        logreg.TE_list <- c(logreg.TE_list, logreg.TE)
    }


    # LDA -- Structural Model
    set.seed(19890211)
    lda2.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- struct[-flag,]; test <- struct[flag,]
    
        ldamod <- lda(train[,-1],train[,1])
        lda.test <- predict(ldamod,test[,-1])
        lda.TE <- mean(lda.test$class != test[,1])
        lda2.TE_list <- c(lda2.TE_list, lda.TE)
    }

    # QDA -- Structural Model
    set.seed(19890211)
    qda2.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- struct[-flag,]; test <- struct[flag,]
    
        qdamod <- qda(train[,-1],train[,1])
        qda.test <- predict(qdamod,test[,-1])
        qda.TE <- mean(qda.test$class != test[,1])
        qda2.TE_list <- c(qda2.TE_list, qda.TE)
    }

    # Naive Bayes -- Structural Model
    set.seed(19890211)
    bayes2.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- struct[-flag,]; test <- struct[flag,]
    
        bayesmod <- naiveBayes(train[,-1],as.factor(train[,1]))
        bayes.test <- predict(bayesmod,test[,-1])
        bayes.TE <- mean(bayes.test != test[,1])
        bayes2.TE_list <- c(bayes2.TE_list, bayes.TE)
    }

    # Logistic Regression -- Structural Model

    set.seed(19890211)
    logreg2.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- struct[-flag,]; test <- struct[flag,]
    
        logreg <- glm(Direction ~ ., family = binomial(link='logit'),
                 data = train)
        logreg.pred <- predict.glm(logreg, newdata=test[,-1],
                              type='response')
        logreg.pred <- as.vector(logreg.pred)
        logreg.test <- I(logreg.pred >= .50)
        logreg.TE <- mean(logreg.test != test[,1])
        logreg2.TE_list <- c(logreg2.TE_list, logreg.TE)
    }

    # Naive
    set.seed(19890211)
    naive.TE_list <- vector(length=0)
    for(i in 1:B){
    
        start <- sample(1:(n-n1-1),1)
        flag <- (start):(start+n1)
        train <- full[-flag,]; test <- full[flag,]
    
        naive <- sample(c(TRUE,FALSE),size=n1+1,
                    replace=TRUE, prob=c(0.5,0.5))
        naive.TE <- mean ( test[,1] != naive )
        naive.TE_list <- c(naive.TE_list,naive.TE)
    }

        output1 <- c('Full','LDA','QDA','Naive Bayes',
                     'Logistic','Naive')
        output1 <- rbind(output1, c('Mean', mean(lda.TE_list),
                             mean(qda.TE_list),mean(bayes.TE_list),
                             mean(logreg.TE_list), mean(naive.TE_list)))
        output1 <- rbind(output1, c('Variance', var(lda.TE_list),
                             var(qda.TE_list), var(bayes.TE_list),
                             var(logreg.TE_list),var(naive.TE_list)))
    
        output2 <- c('Full','LDA','QDA','Naive Bayes',
                     'Logistic','Naive')
        output2 <- rbind(output2, c('Mean', mean(lda2.TE_list),
                             mean(qda2.TE_list),mean(bayes2.TE_list),
                              mean(logreg2.TE_list), mean(naive.TE_list)))
        output2 <- rbind(output2, c('Variance', var(lda2.TE_list),
                             var(qda2.TE_list), var(bayes2.TE_list),
                              var(logreg2.TE_list),var(naive.TE_list)))
        output <- rbind(output1,output2)
        return(output)
}

In [3]:
test_lin_class('canada',100)
test_lin_class('europe',100)
test_lin_class('mexico',100)
test_lin_class('japan',100)
test_lin_class('korea',100)

: glm.fit: fitted probabilities numerically 0 or 1 occurred

0,1,2,3,4,5,6
output1,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.288125,0.300625,0.30875,0.29625,0.495625
,Variance,0.0703042140151515,0.0580567392676768,0.0985495580808081,0.0753330176767677,0.0150927241161616
output2,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.311875,0.39375,0.30125,0.304375,0.495625
,Variance,0.0811627998737374,0.0995501893939394,0.089676452020202,0.0824380523989899,0.0150927241161616


0,1,2,3,4,5,6
output1,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.351,0.309,0.467,0.324,0.4835
,Variance,0.0386858585858586,0.0281,0.0537484848484849,0.0336606060606061,0.0127805555555556
output2,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.408,0.366,0.527,0.4075,0.4835
,Variance,0.0537737373737374,0.031610101010101,0.0473949494949495,0.0530997474747475,0.0127805555555556


: glm.fit: fitted probabilities numerically 0 or 1 occurred

0,1,2,3,4,5,6
output1,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.515384615384615,0.524615384615385,0.533076923076923,0.284615384615385,0.480769230769231
,Variance,0.0747713824636902,0.0648353356045664,0.0788064072679457,0.0398661167891937,0.0188123841969996
output2,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.443846153846154,0.527692307692308,0.533846153846154,0.219230769230769,0.480769230769231
,Variance,0.0668047337278107,0.0402868925945849,0.0535317673779212,0.0302880879803957,0.0188123841969996


: glm.fit: fitted probabilities numerically 0 or 1 occurred

0,1,2,3,4,5,6
output1,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.408936170212766,0.408510638297872,0.462765957446808,0.412765957446808,0.498297872340426
,Variance,0.039074127421796,0.0548902332514827,0.0483913375493276,0.0453608058859304,0.00579978142676196
output2,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.395531914893617,0.475744680851064,0.466595744680851,0.399148936170213,0.498297872340426
,Variance,0.0397830271936202,0.0288216707591991,0.0422811638339026,0.04301155511658,0.00579978142676196


: glm.fit: fitted probabilities numerically 0 or 1 occurred

0,1,2,3,4,5,6
output1,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.315238095238095,0.337619047619048,0.314761904761905,0.278571428571429,0.496666666666667
,Variance,0.0589926475640761,0.0563501225405987,0.057852676424105,0.0411999816761721,0.0150830298449346
output2,Full,LDA,QDA,Naive Bayes,Logistic,Naive
,Mean,0.404761904761905,0.437142857142857,0.274285714285714,0.458571428571429,0.496666666666667
,Variance,0.0697908793146888,0.0586994663185139,0.0452195423623995,0.086564282278568,0.0150830298449346
