In [1]:
%use kotlin-statistics, krangl, lets-plot, numpy

In [2]:
fun DataCol.toDoubleList(): List<Double> {
    return this.asDoubles().toList().filterNotNull()
}

fun DataCol.toIntList(): List<Int> {
    return this.asInts().toList().filterNotNull()
}

fun DataFrameRow.toDoubleList(): List<Double> {
    return this.values.mapNotNull {(it as Double)}
}

fun DataFrameRow.toIntList(): List<Int> {
     return this.values.mapNotNull {(it as Int)}
}

In [3]:
 enum class Distance(val itemName: String, val calc: (a: List<Double>, b: List<Double>) -> Double) {
        MANHATTAN("manhattan", { a, b -> a.zip(b).map { abs(it.first - it.second) }.sum() }),
        EUCLIDEAN(
            "euclidean",
            { a, b ->
                a.zip(b).map { (it.first - it.second) * (it.first - it.second) }.sum().let { sqrt(it) }
            }
        ),
        CHEBYSHEV("chebyshev", { a, b -> a.zip(b).map { abs(it.first - it.second) }.max()!! })
    }

enum class Kernel(val itemName: String, val calc: (u: Double) -> Double) {
    UNIFORM("uniform", { u -> if (abs(u) < 1.0) 1.0 / 2 else 0.0 }),
    TRIANGULAR("triangular", { u -> if (abs(u) < 1.0) 1.0 - abs(u) else 0.0 }),
    EPANECHNIKOV("epanechnikov", { u -> if (abs(u) < 1.0) 3.0 / 4 * (1 - abs(u * u)) else 0.0 }),
    QUARTIC("quartic", { u -> if (abs(u) < 1.0) 15.0 / 16 * (1 - u * u) * (1 - u * u) else 0.0 }),
    TRIWEIGHT("triweight", { u -> if (abs(u) < 1.0) 35.0 / 32 * (1 - u * u).pow(3) else 0.0 }),
    TRICUBE("tricube", { u -> if (abs(u) < 1.0) 70.0 / 81 * (1 - abs(u).pow(3)).pow(3) else 0.0 }),
    GAUSSIAN("gaussian", { u -> 1.0 / sqrt(2.0 * PI) * E.pow(-1.0 / 2 * u * u) }),
    COSINE("cosine", { u -> if (abs(u) < 1.0) PI / 4.0 * cos(PI / 2 * u) else 0.0 }),
    LOGISTIC("logistic", { u -> 1.0 / (E.pow(u) + 2 + E.pow(-u)) }),
    SIGMOID("sigmoid", { u -> 2.0 / PI / (E.pow(u) + E.pow(-u)) })
}

enum class Window(val itemName: String) {
    FIXED("fixed"),
    VARIABLE("variable")
}

enum class Transform {
    NAIVE, ONEHOT
}

class ConfInfo(
        var tp: Int = 0,
        var fp: Int = 0,
        var fn: Int = 0,
        var tn: Int = 0,
        var cnt: Int = 0,
        var prec: Double = 0.0,
        var recall: Double = 0.0,
        var fSc: Double = 0.0
 )

In [4]:
val df = DataFrame.readCSV("res/data3.csv")
val features = df.remove("Class")
val classes = df.get("Class").toIntList().map {it - 1}
df.head()

V1,V2,V3,V4,V5,V6,V7,Class
15.26,14.84,0.871,5.763,3.312,2.221,5.22,1
14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
14.29,14.09,0.905,5.291,3.337,2.699,4.825,1
13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1


In [5]:
val classCount = df.count("Class").get("Class").length
df.count("Class")

Class,n
1,70
2,70
3,70


In [6]:
val means = features.cols.map {it.mean()}
val sds = features.cols.map {it.sd()}

val dataNorm = features.addColumns(
    *features.cols.mapIndexed { i, c -> 
        c.name to {(c - means[i]!!) / sds[i]!! }
    }.toTypedArray()
)
val dataMatrix = dataNorm.rows.map {it.toDoubleList()}.toList()
dataNorm.head()

V1,V2,V3,V4,V5,V6,V7
0.1420977691752887,0.2154624368105019,6.060179184631368e-05,0.3042181993173967,0.1417018231277318,-0.9861517446709448,-0.3835774230126888
0.0111880256958591,0.0082237571301695,0.4285152701377202,-0.1686246637763319,0.1974322289061418,-1.7881662018206577,-0.9220134866112044
-0.1920665760222036,-0.3602005623015335,1.4423832477086511,-0.7636374532196373,0.2080475442925054,-0.6674793336006519,-1.1891919878665298
-0.3470912722478441,-0.4753331621239409,1.039381331937777,-0.6889780537837865,0.319508355849323,-0.960817954669541,-1.2299825987452062
0.445257175127653,0.3305950366329093,1.374509240841976,0.0666655647487776,0.8051590347754584,-1.5634954852292584,-0.4753562974897086


In [7]:
fun predictClass(distanceParam: Distance, 
                 kernelParam: Kernel,
                 windowParam: Window,
                 windowParamValue: Double,
                 transformParam: Transform,
                 train: List<List<Double>>, 
                 classes: List<Int>,
                 target: List<Double>): Int {
    
    val sortedData = train.map {
        distanceParam.calc.invoke(it, target)
    }.zip(classes).sortedBy{ it.first }
    val windowDivider = if (windowParam == Window.VARIABLE) {
        sortedData[windowParamValue.toInt()].first
    } else {
        windowParamValue
    }
    val kernelRes = sortedData.map { kernelParam.calc.invoke(it.first / windowDivider) }
    var res = 0.0
    if (kernelRes.sum() == 0.0) {
        res = sortedData.map { it.second }.average()
    } else {
        when(transformParam) {
            Transform.NAIVE -> {
                res = sortedData.zip(kernelRes).map { it.first.second * it.second }.sum() / kernelRes.sum()
            }
            else -> {
                val resArray = DoubleArray(classCount)
                for(i in sortedData.indices) {
                    resArray[sortedData[i].second] = resArray[sortedData[i].second] + kernelRes[i]
                }
                res = resArray.indices.maxBy{resArray[it]}!!.toDouble()
            }
        }
    }
    return res.roundToInt()
}

fun getConfMatrix(distanceParam: Distance, 
                  kernelParam: Kernel,
                  windowParam: Window,
                  windowParamValue: Double,
                  transformParam: Transform): Array<IntArray> {
    val matrix = Array(classCount) {IntArray(classCount)}
    for (i in dataMatrix.indices) {
        val targetData = dataMatrix[i]
        val targetClass = classes[i]
        val trainData = dataMatrix.filterIndexed {ind, v -> ind != i}
        val trainClasses = classes.filterIndexed {ind, v -> ind != i}
        val predictedClass = predictClass(
            distanceParam,
            kernelParam,
            windowParam,
            windowParamValue,
            transformParam,
            trainData,
            trainClasses,
            targetData
        )
        matrix[targetClass][predictedClass] += 1
    }
    return matrix
}

fun getFScore(confMatrix: Array<IntArray>): Double {
    val k = confMatrix.size
    val infos = Array(k) { ConfInfo() }
    var all = 0
    for (i in 0 until k) {
        for (j in 0 until k) {
            val cur = confMatrix[i][j]
            all += cur
            infos[i].cnt += cur
            if (i == j) {
                infos[i].tp = cur
            } else {
                infos[i].fp += cur
                infos[j].fn += cur
            }
        }
    }
    infos.forEach {
        it.tn = all - it.fp - it.fn - it.tp
        it.recall = if (it.tp + it.fn != 0) it.tp.toDouble() / (it.tp + it.fn) else 0.0
        it.prec = if (it.tp + it.fp != 0) it.tp.toDouble() / (it.tp + it.fp) else 0.0
        it.fSc = if (it.recall + it.prec != 0.0) 2 * it.recall * it.prec / (it.recall + it.prec) else 0.0
    }
    val macroF = infos.map { it.fSc * it.cnt }.sum() / all

    val microPrec = infos.map { it.prec * it.cnt }.sum() / all
    val microRecall = infos.map { it.recall * it.cnt }.sum() / all
    val microF =
        if (microPrec + microRecall != 0.0) 2 * microPrec * microRecall / (microPrec + microRecall) else 0.0
    return microF
}

val variableWindowPoints = (1..100).toList().map { it.toDouble() }
val fixedWindowPoints = (1..100).toList().map { it.toDouble() / 10.0 }

In [8]:
fun processNaive(distanceParam: Distance,
            kernelParam: Kernel,
            windowParam: Window,
            windowParamValue: Double) {
    val confMatrix = getConfMatrix(distanceParam, kernelParam, windowParam, windowParamValue, Transform.NAIVE)
    val fScore = getFScore(confMatrix)
    if (fScore > bestNaiveFScore) {
        bestNaiveDistance = distanceParam
        bestNaiveKernel = kernelParam
        bestNaiveWindow = windowParam
        bestNaiveWindowValue = windowParamValue
        bestNaiveFScore = fScore
    }
}

var bestNaiveDistance: Distance? = null
var bestNaiveKernel: Kernel? = null
var bestNaiveWindow: Window? = null
var bestNaiveWindowValue: Double? = null
var bestNaiveFScore: Double = -1.0

for(distance in Distance.values()) {
    for(kernel in Kernel.values()) {
        for (k in variableWindowPoints) {
            processNaive(distance, kernel, Window.VARIABLE, k)
        }
        for (k in fixedWindowPoints) {
            processNaive(distance, kernel, Window.FIXED, k)
        }
    }
}

println(bestNaiveDistance)
println(bestNaiveKernel)
println(bestNaiveWindow)
println(bestNaiveWindowValue)
println(bestNaiveFScore)

EUCLIDEAN
UNIFORM
VARIABLE
1.0
0.9385535656861208


In [9]:
val windowValues = when (bestNaiveWindow) {
    Window.FIXED -> fixedWindowPoints
    else -> variableWindowPoints
}
val fScores = ArrayList<Double>()
for(v in windowValues) {
    val confMatrix = getConfMatrix(bestNaiveDistance!!, bestNaiveKernel!!, bestNaiveWindow!!, v, Transform.NAIVE)
    fScores.add(getFScore(confMatrix))
}
val plotData = mapOf<String, Any>(
    "fScore" to fScores,
    "window_size" to windowValues
)
lets_plot(plotData) {x = "window_size"; y = "fScore"} + geom_path()

In [10]:
fun processOneHot(distanceParam: Distance,
            kernelParam: Kernel,
            windowParam: Window,
            windowParamValue: Double) {
    val confMatrix = getConfMatrix(distanceParam, kernelParam, windowParam, windowParamValue, Transform.ONEHOT)
    val fScore = getFScore(confMatrix)
    if (fScore > bestOneHotFScore) {
        bestOneHotDistance = distanceParam
        bestOneHotKernel = kernelParam
        bestOneHotWindow = windowParam
        bestOneHotWindowValue = windowParamValue
        bestOneHotFScore = fScore
    }
}

var bestOneHotDistance: Distance? = null
var bestOneHotKernel: Kernel? = null
var bestOneHotWindow: Window? = null
var bestOneHotWindowValue: Double? = null
var bestOneHotFScore: Double = -1.0

for(distance in Distance.values()) {
    for(kernel in Kernel.values()) {
        for (k in variableWindowPoints) {
            processOneHot(distance, kernel, Window.VARIABLE, k)
        }
        for (k in fixedWindowPoints) {
            processOneHot(distance, kernel, Window.FIXED, k)
        }
    }
}

println(bestOneHotDistance)
println(bestOneHotKernel)
println(bestOneHotWindow)
println(bestOneHotWindowValue)
println(bestOneHotFScore)

MANHATTAN
LOGISTIC
FIXED
0.3
0.9433150970083531


In [11]:
val windowValues = when (bestOneHotWindow) {
    Window.FIXED -> fixedWindowPoints
    else -> variableWindowPoints
}
val fScores = ArrayList<Double>()
for(v in windowValues) {
    val confMatrix = getConfMatrix(bestOneHotDistance!!, bestOneHotKernel!!, bestOneHotWindow!!, v, Transform.ONEHOT)
    fScores.add(getFScore(confMatrix))
}
val plotData = mapOf<String, Any>(
    "fScore" to fScores,
    "window_size" to windowValues
)
lets_plot(plotData) {x = "window_size"; y = "fScore"} + geom_path()