In [1]:
library(e1071)
library(caret)
library(MASS)

Loading required package: lattice
Loading required package: ggplot2
Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang


In [2]:
Quantity <- c("medium", "bad", "medium", "bad", "good", "good", "bad", "bad", "bad", "medium", "good", "bad", "bad", "good", "medium", "medium", "medium", "bad", "medium", "good", "medium", "good", "medium", "good", "medium", "good", "medium", "bad", "good", "good", "bad", "good", "bad", "bad")
TP  <- c(3064, 3000, 3155, 3085, 3245, 3267, 3080, 2974, 3038, 3318, 3317, 3182, 2998, 3221, 3019, 3022, 3094, 3009, 3227, 3308, 3212, 3361, 3061, 3478, 3126, 3458, 3252, 3052, 3270, 3198, 2904, 3247, 3083, 3043)
Sun  <- c(1201, 1053, 1133, 970, 1258, 1386, 966, 1189, 1103, 1310, 1362, 1171, 1102, 1424, 1230, 1285, 1329, 1210, 1331, 1366, 1289, 1444, 1175, 1317, 1248, 1508, 1361, 1186, 1399, 1259, 1164, 1277,1195, 1208)
Heat  <- c(10, 11, 19, 4, 36, 35, 13, 12, 14, 29, 25, 28, 9, 21, 16, 9, 11, 15, 21, 24, 17, 25, 12, 42, 11, 43, 26, 14, 24, 20, 6, 19, 5, 14)
Rain <- c(361, 338, 393, 467, 294, 225, 417, 488, 677, 427, 326, 326, 349, 382, 275, 303, 339, 536, 414, 282, 302, 253, 261, 259, 315, 286, 346, 443, 306, 367, 311, 375, 441, 371)

VinQualite  <- data.frame(Quantity, TP, Sun, Heat, Rain)

In [3]:
BreastTissue <- read.csv('BreastTissue.csv')
BreastTissue <- BreastTissue[,2:11]

In [4]:
Prematures <- read.csv('prematures.csv')
Prematures <- Prematures[,2:15]

In [5]:
d2 <- function(mean, DF)
{
    mean <- (mean-colMeans(DF)) %*% t(mean-colMeans(DF))
    
    return(mean)
}

In [6]:
AFD <- function(df, First = TRUE)
{    
    if (First)
    { 
        F <- 1
        
        DF <- df[2:ncol(df)]
        
        Zij <- scale(df[2:ncol(df)], center = TRUE, scale = FALSE) 
    }     
    else 
    {
        F <- ncol(df)
        
        DF <- df[1:(ncol(df)-1)]
        
        Zij <- scale(df[1:(ncol(df)-1)], center = TRUE, scale = FALSE)
    }
    
    P <- ncol(df)-1

    N <- nrow(df)
    
    Class <- df[,F]
    
    names(df)[F] <- 'Class'

    K <- nlevels(Class)
    
    n <- c()
    
    mean <- c()
    
    cov <- c()
    
    x <- 0

    for (facteur in levels(Class))
    {
        x <- x+1
        
        n <- append(n, nrow(DF[Class == facteur,]))
    
        mean <- append(mean, apply(DF[Class == facteur,], 2, mean))
    
        cov <- append(cov, cov(DF[Class == facteur,])*((n[x]-1)/n[x]))
    }
    
    IW <- 0
    
    for (j in 1:K)
    {
       IW <- IW + n[j]*cov[(1+((P**2)*(j-1))):((P**2)+((P**2)*(j-1)))]
    }

    IW <- 1/N*IW
    IW <- matrix(data = IW, nrow = P, ncol = P)

    IB <- 0

    for (j in 1:K)
    {    
       IB <- IB + n[j]*d2(mean[(1+(P*(j-1))):(P+(P*(j-1)))], DF)
    }

    IB <- 1/N*IB
    
    Itot <- IW+IB
    
    IWp <- IW*N/(N-K)
    
    IBp <- IB*N/(K-1)

    rapp <- IBp %*% solve(IWp)
    
    eigen <- eigen(rapp)
    
    val_prop <- eigen$values[1:(K-1)]
    
    vec_prop <- sqrt(diag(t(eigen$vectors)%*% IW %*% eigen$vectors))
    
    vec_prop_norm <- sweep(x = eigen$vectors, MARGIN = 2, STATS = vec_prop, FUN = "/")
    
    vec_prop_norm <- vec_prop_norm[,1:(K-1)]
    
    I <- c()
    
    Ni <- c()
    
    Yi <- c()
    
    Ai <- c()
    
    Ei <-c()
    
    for (j in 1:(K-1))
    {
        I <- append(I, val_prop[j]/sum(val_prop))
        
        Ni <- append(Ni, sqrt(val_prop[j]/(1+val_prop[j])))
        
        Yi <- append(Yi, val_prop[j]/(1+val_prop[j]))
        
        Ai <- append(Ai, 1-Ni[j]**2)
        
        Ei <- append(Ei, -(N-((P+K)/2)-1)*log(Ai[j]))
    }
    
    Ui <- data.frame(val_prop, I, Ni, Ai, Ei)
    
    Axe <- data.frame(Class)

    if (K == 2)
    { 
        Axe <- cbind(Axe, Zij %*% vec_prop_norm)
    }
    else
    {
        for (j in 1:(K-1))
        {
            Axe <- cbind(Axe, Zij %*% vec_prop_norm[,j])
        }
    }
  
    Gravity <- data.frame(levels(Class))
    
    for (j in 1:(K-1))
    {
        Gravity <- cbind(Gravity, tapply(Axe[,(j+1)], Axe[,1], mean))
    }

    ClassPred <- c()
    
    for (k in 1:N)
    {
        Dist <- c()
        for (i in 1:K)
        {
            d <- 0
            for (j in 2:K)
            {
                d <- d+(Axe[k,j]-Gravity[i,j])**2
            }
            d <- Re(sqrt(d))
            
            Dist <- append(Dist, d)
        }

        DistMin <- min(Dist)
    
        Index <- which(Dist == DistMin)
        
        Value <- levels(Gravity[,1])[Index]

        ClassPred <- append(ClassPred, Value)
        
    }
    
    ConfMatrAFD <-confusionMatrix(table(ClassPred, Class))
    
    Prop <- table(Class)/N
    
    formula <- Class~.
    
    LDA <- lda(formula=formula, data=df, prior=Prop, CV=TRUE)
    
    LDAClass <- LDA$class
    
    ConfMatrLDA <-confusionMatrix(table(LDAClass, Class))
    
    ret <- list(ConfMatrAFD, ConfMatrLDA)
    
    return(ret)

}

In [7]:
AFD(VinQualite)

[[1]]
Confusion Matrix and Statistics

         Class
ClassPred bad good medium
   bad     11    0      1
   good     0   10      3
   medium   1    1      7

Overall Statistics
                                          
               Accuracy : 0.8235          
                 95% CI : (0.6547, 0.9324)
    No Information Rate : 0.3529          
    P-Value [Acc > NIR] : 2.403e-08       
                                          
                  Kappa : 0.7351          
                                          
 Mcnemar's Test P-Value : NA              

Statistics by Class:

                     Class: bad Class: good Class: medium
Sensitivity              0.9167      0.9091        0.6364
Specificity              0.9545      0.8696        0.9130
Pos Pred Value           0.9167      0.7692        0.7778
Neg Pred Value           0.9545      0.9524        0.8400
Prevalence               0.3529      0.3235        0.3235
Detection Rate           0.3235      0.2941        0.2059
Detect

In [8]:
AFD(BreastTissue)

[[1]]
Confusion Matrix and Statistics

         Class
ClassPred adi car con fad gla mas
      adi  10   0   0   0   0   0
      car   8  10   6   0   0   0
      con   3   4   3   0   0   1
      fad   0   0   0   2   0   1
      gla   0   0   3   8  14   9
      mas   1   7   2   5   2   7

Overall Statistics
                                         
               Accuracy : 0.434          
                 95% CI : (0.338, 0.5337)
    No Information Rate : 0.2075         
    P-Value [Acc > NIR] : 1.251e-07      
                                         
                  Kappa : 0.3188         
                                         
 Mcnemar's Test P-Value : NA             

Statistics by Class:

                     Class: adi Class: car Class: con Class: fad Class: gla
Sensitivity             0.45455    0.47619     0.2143    0.13333     0.8750
Specificity             1.00000    0.83529     0.9130    0.98901     0.7778
Pos Pred Value          1.00000    0.41667     0.2727    0.

In [9]:
AFD(Prematures, FALSE)

[[1]]
Confusion Matrix and Statistics

         Class
ClassPred negatif positif
  negatif      76      83
  positif      48     183
                                          
               Accuracy : 0.6641          
                 95% CI : (0.6148, 0.7109)
    No Information Rate : 0.6821          
    P-Value [Acc > NIR] : 0.793240        
                                          
                  Kappa : 0.2798          
                                          
 Mcnemar's Test P-Value : 0.002972        
                                          
            Sensitivity : 0.6129          
            Specificity : 0.6880          
         Pos Pred Value : 0.4780          
         Neg Pred Value : 0.7922          
             Prevalence : 0.3179          
         Detection Rate : 0.1949          
   Detection Prevalence : 0.4077          
      Balanced Accuracy : 0.6504          
                                          
       'Positive' Class : negatif         
        