In [None]:
## Importing packages

# This R environment comes with all of CRAN and many other helpful packages preinstalled.
# You can see which packages are installed by checking out the kaggle/rstats docker image: 
# https://github.com/kaggle/docker-rstats

library(tidyverse) # metapackage with lots of helpful functions

## Running code

# In a notebook, you can run a single code cell by clicking in the cell and then hitting 
# the blue arrow to the left, or by clicking in the cell and pressing Shift+Enter. In a script, 
# you can run code by highlighting the code you want to run and then clicking the blue arrow
# at the bottom of this window.

## Reading in files

# You can access files from datasets you've added to this kernel in the "../input/" directory.
# You can see the files added to this kernel by running the code below. 

list.files(path = "../input")

## Saving data

# If you save any files or images, these will be put in the "output" directory. You 
# can see the output directory by committing and running your kernel (using the 
# Commit & Run button) and then checking out the compiled version of your kernel.

In [None]:
Data <- read_csv("../input/digits-dataset/digits.csv")

In [None]:
nca <- function( x
               , labels
               , A_init = diag(ncol(x))
               , N_iter=1e2
               , learning_rate = 0.01
               ){
  x <- as.matrix(x)
  #labels <- as.factor(labels)

  A <- A_init

  N <- nrow(x)
  stopifnot(NROW(x) == length(labels))

  p <- numeric(N)
  p_cum <- numeric(N_iter)
  for (it in seq_len(N_iter)){
    for (i in seq_len(N)){
      # softmax, with LOO
      D <- tcrossprod(A, x)       # (dA, N)
      D <- (D - as.numeric(D[,i]))
      p_ik <- exp(-colSums(D*D))       # (N)

      p_ik[i] <- 0
      softmax <- sum(p_ik)
      if (softmax > .Machine$double.eps){
        p_ik <- p_ik/sum(p_ik)             # (N)
      }
      # end softmax

      # neighbors that predict the correct label
      correct_label <- labels == labels[i]  # (N)

      p[i] <- sum(p_ik[correct_label])
      d    <- t(t(x) - as.numeric(x[i,]))  # (N, dx)
      pd <- p_ik * d                    # (N, dx)

      g <- (p[i]*crossprod(d, pd)) - crossprod(d[correct_label,], pd[correct_label,]) # (dx, dx)
      A <- A + learning_rate * (A %*% g) # (dx, dA)
      # d  <- t(x) - as.numeric(x[i,])  # (dx, N)
      # d2 <- p_ik * colSums(d * d) # (N)
      #
      # A <- A + learning_rate * A * (p[i]*sum(d2) - sum(d2[correct_label]))
    }
    p_cum[it] <- sum(p)
  }

  list( A = A
      , p = p
      , A_norm = A/A[1,1]
      , p_cum=p_cum
      )
}

scaling <- function(x){
  x_min <- apply(x, 2, min)
  x <- sweep(x, 1, x_min)
  x_max <- apply(x, 2, max)
  diag(1/(x_max))
}

#x <- iris[1:4]
#x <- as.matrix(x)
#labels <- iris[[5]]
#A <- diag(ncol(x))
#A <- scaling(x)
#pca <- prcomp(x)
#A <- t(pca$rotation[,1:2])
#A <- matrix(runif(4*2), ncol=4, nrow=4)
#res <- nca(x=x, labels = labels, A_init = A, N_iter = 200, learning_rate = 1e-2)
#res$A_norm
#
# # 2d projection
#x_2d <- t(tcrossprod(res$A, x))
#x_2d <- as.data.frame(x_2d)
#
#plot(x_2d, col=iris$Species)

x <- Data[1:64]
x <- as.matrix(x)
labels <- Data[[1]]
A <- diag(ncol(x))
A <- scaling(x)
#pca <- prcomp(x)
#A <- t(pca$rotation[,1:2])
#A <- matrix(runif(4*2), ncol=4, nrow=4)
res <- nca(x=x, labels = labels, A_init = A, N_iter = 200, learning_rate = 1e-2)
res$A_norm
#
# # 2d projection
x_2d <- t(tcrossprod(res$A, x))
x_2d <- as.data.frame(x_2d)

plot(x_2d, col=Data[65])


In [None]:
Data[65]

In [None]:
nca <- function( x
               , labels
               , A_init = diag(ncol(x))
               , N_iter=1e2
               , learning_rate = 0.01
               ){
  x <- as.matrix(x)
  labels <- as.factor(labels)

  A <- A_init

  N <- nrow(x)
  stopifnot(NROW(x) == length(labels))

  p <- numeric(N)
  p_cum <- numeric(N_iter)
  for (it in seq_len(N_iter)){
    for (i in seq_len(N)){
      # softmax, with LOO
      D <- tcrossprod(A, x)       # (dA, N)
      D <- (D - as.numeric(D[,i]))
      p_ik <- exp(-colSums(D*D))       # (N)

      p_ik[i] <- 0
      softmax <- sum(p_ik)
      if (softmax > .Machine$double.eps){
        p_ik <- p_ik/sum(p_ik)             
      }
      # end softmax

      # neighbors that predict the correct label
      correct_label <- labels == labels[i]  # (N)

      p[i] <- sum(p_ik[correct_label])
      d    <- t(t(x) - as.numeric(x[i,]))  # (N, dx)
      pd <- p_ik * d                    # (N, dx)

      g <- (p[i]*crossprod(d, pd)) - crossprod(d[correct_label,], pd[correct_label,]) # (dx, dx)
      A <- A + learning_rate * (A %*% g) # (dx, dA)
      # d  <- t(x) - as.numeric(x[i,])  # (dx, N)
      # d2 <- p_ik * colSums(d * d) # (N)
      #
      # A <- A + learning_rate * A * (p[i]*sum(d2) - sum(d2[correct_label]))
    }
    p_cum[it] <- sum(p)
  }

  list( A = A
      , p = p
      , A_norm = A/A[1,1]
      , p_cum=p_cum
      )
}

scaling <- function(x){
  x_min <- apply(x, 2, min)
  x <- sweep(x, 1, x_min)
  x_max <- apply(x, 2, max)
  diag(1/(x_max))
}

#x <- iris[1:4]
#x <- as.matrix(x)
#labels <- iris[[5]]
#A <- diag(ncol(x))
#A <- scaling(x)
#pca <- prcomp(x)
#A <- t(pca$rotation[,1:2])
#A <- matrix(runif(4*2), ncol=4, nrow=4)
#res <- nca(x=x, labels = labels, A_init = A, N_iter = 200, learning_rate = 1e-2)
#res$A_norm
#
# # 2d projection
#x_2d <- t(tcrossprod(res$A, x))
#x_2d <- as.data.frame(x_2d)
#
#plot(x_2d, col=iris$Species)

x <- Data[1:64]
x <- as.matrix(x)
labels <- Data[[65]]
A <- diag(ncol(x))
A <- scaling(x)
#pca <- prcomp(x)
#A <- t(pca$rotation[,1:2])
#A <- matrix(runif(4*2), ncol=4, nrow=4)
res <- nca(x=x, labels = labels, A_init = A, N_iter = 200, learning_rate = 1e-2)
res$A_norm
#
# # 2d projection
x_2d <- t(tcrossprod(res$A, x))
x_2d <- as.data.frame(x_2d)

plot(x_2d, col=Data[65])


In [None]:
nca <- function( x
               , labels
               , A_init = diag(ncol(x))
               , N_iter=1e2
               , learning_rate = 0.01
               ){
  x <- as.matrix(x)
  #labels <- as.factor(labels)

  A <- A_init

  N <- nrow(x)
  stopifnot(NROW(x) == length(labels))

  p <- numeric(N)
  p_cum <- numeric(N_iter)
  for (it in seq_len(N_iter)){
    for (i in seq_len(N)){
      # softmax, with LOO
      D <- tcrossprod(A, x)       # (dA, N)
      D <- (D - as.numeric(D[,i]))
      p_ik <- exp(-colSums(D*D))       # (N)

      p_ik[i] <- 0
      softmax <- sum(p_ik)
      if (softmax > .Machine$double.eps){
        p_ik <- p_ik/sum(p_ik)             # (N)
      }
      # end softmax

      # neighbors that predict the correct label
      correct_label <- labels == labels[i]  # (N)

      p[i] <- sum(p_ik[correct_label])
      d    <- t(t(x) - as.numeric(x[i,]))  # (N, dx)
      pd <- p_ik * d                    # (N, dx)

      g <- (p[i]*crossprod(d, pd)) - crossprod(d[correct_label,], pd[correct_label,]) # (dx, dx)
      A <- A + learning_rate * (A %*% g) # (dx, dA)
      # d  <- t(x) - as.numeric(x[i,])  # (dx, N)
      # d2 <- p_ik * colSums(d * d) # (N)
      #
      # A <- A + learning_rate * A * (p[i]*sum(d2) - sum(d2[correct_label]))
    }
    p_cum[it] <- sum(p)
  }

  list( A = A
      , p = p
      , A_norm = A/A[1,1]
      , p_cum=p_cum
      )
}

scaling <- function(x){
  x_min <- apply(x, 2, min)
  x <- sweep(x, 1, x_min)
  x_max <- apply(x, 2, max)
  diag(1/(x_max))
}

x <- iris[1:4]
x <- as.matrix(x)
labels <- iris[[5]]
A <- diag(ncol(x))
A <- scaling(x)
#pca <- prcomp(x)
#A <- t(pca$rotation[,1:2])
#A <- matrix(runif(4*2), ncol=4, nrow=4)
res <- nca(x=x, labels = labels, A_init = A, N_iter = 200, learning_rate = 1e-2)
res$A_norm
#
# # 2d projection
x_2d <- t(tcrossprod(res$A, x))
x_2d <- as.data.frame(x_2d)
#
plot(x_2d, col=iris$Species)



In [None]:
Species=iris[5]
new=cbind(Species,x_2d)

In [None]:
new

In [None]:
#For NCA
install.packages('caTools') 
library(caTools) 
  
set.seed(123) 
split = sample.split(new, SplitRatio = 0.75) 
  
training_set = subset(new, split == TRUE) 
test_set = subset(new, split == FALSE) 

In [None]:
training_set

In [None]:
train.data=training_set[,1:5]
test.data=test_set[,2:5]
y=test_set[1]

In [None]:
train.data

In [None]:
install.packages("rpart")
library(rpart)
rpart.model <- rpart(Species ~ .,data = train.data, method = "class")
rpart.model


In [None]:
rpart.prediction <- predict(rpart.model, test.data, type='class')


In [None]:
confMat <- table(test_set$Species,rpart.prediction)
confMat

In [None]:
sum(diag(confMat))/sum(confMat)
