In [1]:
%use krangl

In [2]:
val df = DataFrame.readCSV("./dataset_wine.csv")
df.head()

class,Alcohol,Malic_acid,Ash,Alcalinity_of_ash,Magnesium,Total_phenols,Flavanoids,Nonflavanoid_phenols,Proanthocyanins,Color_intensity,Hue,OD280%2FOD315_of_diluted_wines,Proline
1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [3]:
df.schema()

Name,Type,Values


In [4]:
val classes: Array<Int> = df.cols[0].values().map { it.toString().toInt() }.toTypedArray() 
val sizeClasses = classes.maxOrNull()
sizeClasses

3

In [5]:
val sizeDf = df.nrow
sizeDf

178

In [6]:
val dfObjects = df.remove("class")
dfObjects.head()

Alcohol,Malic_acid,Ash,Alcalinity_of_ash,Magnesium,Total_phenols,Flavanoids,Nonflavanoid_phenols,Proanthocyanins,Color_intensity,Hue,OD280%2FOD315_of_diluted_wines,Proline
14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [10]:
val minMax:List<Pair<Double, Double>> = dfObjects.cols.map { Pair(it.min()!!, it.max()!!) }

val objects = Array<List<Double>>(sizeDf, { 
    dfObjects.row(it).values.mapIndexed { i, el ->  (el.toString().toDouble() - minMax[i].first) /  (minMax[i].second - minMax[i].first)}
    })

In [11]:
val metrics = listOf("manhattan", "euclidean", "chebyshev")
val cores = listOf("uniform", "triangular", "epanechnikov", "quartic")
val windowTypes = listOf("fixed", "variable")

fun metric(name: String): (List<Double>, List<Double>) -> Double = when (name) {
    "manhattan" -> { a, b -> (a zip b).map { abs(it.first - it.second) }.reduce(Double::plus) }
    "euclidean" -> { a, b ->
        sqrt((a zip b).map { (it.first - it.second) * (it.first - it.second) }.reduce(Double::plus))
    }
    "chebyshev" -> { a, b -> (a zip b).map { abs(it.first - it.second) }.reduce(::max) }
    else -> throw RuntimeException("No data dist")
}

fun core(name: String): (u: Double) -> Double = when (name) {
    "uniform" -> { u -> if (u < 1.0) 0.5 else 0.0 }
    "triangular" -> { u -> if (u < 1.0) 1.0 - u else 0.0 }
    "epanechnikov" -> { u -> if (u < 1.0) 0.75 * (1 - (u * u)) else 0.0 }
    "quartic" -> { u -> if (u < 1.0) (15.0 / 16.0) * (1.0 - u * u).pow(2) else 0.0 }
    else -> throw RuntimeException("No data dist")
}

In [12]:
fun oneHot(sizeDf: Int, sizeCl: Int, classes: Array<Int>): Array<IntArray> = Array(sizeDf) { i ->
    IntArray(sizeCl) { if (classes[i] == it + 1) 1 else 0 } 
}

In [13]:
val resOneHot = oneHot(sizeDf, sizeClasses, classes)

In [14]:
import java.lang.RuntimeException
import kotlin.math.abs
import kotlin.math.max
import kotlin.math.sqrt

class NadarayWatson(
    private val classes: Array<Int>,
    private val objects: Array<List<Double>>,
    private val resOneHot: Array<IntArray>,
    private val metric: (List<Double>, List<Double>) -> Double,
    private val core: (Double) -> Double,
    private val windowType: String,
    private val sizeClasses: Int
) {
    private val `R(D)`: Double
    private val sizeDf = objects.size

    private companion object {
        const val eps = 10e-10
    }

    init {
        var rD = 0.0

        (objects.indices).forEach { i ->
            (i + 1 until objects.size).forEach { j ->
                rD = max(rD, metric(objects[i], objects[j]))
            }
        }

        `R(D)` = rD
    }


    fun solve(): Double {
        val D = sqrt(sizeDf.toDouble())
        var solution = 0.0

        when (windowType) {
            "fixed" -> {
                var h = `R(D)` / D;
                val step = `R(D)` / D
                while ((`R(D)` - h) > eps) {
                    solution = max(solution, localSolve(h))
                    h += step
                }
            }
            "variable" -> {
                 (1 .. D.toInt()).forEach { solution = max(solution, localSolve(it.toDouble())) }
            }
            else -> throw RuntimeException("No data window type")
        }
        return solution
    }

    fun localSolve(h: Double): Double {
        val confusionMatrix: Array<DoubleArray> = Array(sizeClasses) { DoubleArray(sizeClasses) }
        val classesCount = DoubleArray(sizeDf)

        (0 until sizeDf).forEach {
            val oneHot = leaveOneHot(it, h)
            val `class` =  getClass(oneHot)
            confusionMatrix[classes[it] - 1][`class` - 1]++
            classesCount[classes[it] - 1]++
        }

        return `f-score`(confusionMatrix, classesCount)
    }

    private fun getClass(cl: Array<Double>): Int {
        var mx = 0
        (cl.indices).forEach {
            mx = if (cl[it] > cl[mx]) it else mx
        }

        return mx + 1
    }

    private fun isZero(z: Double) = abs(z) < eps

    private fun precision(tp: Double, fp: Double) = if (isZero(tp + fp)) 0.0 else tp / (tp + fp)

    private fun recall(tp: Double, fn: Double) = if (isZero(tp + fn)) 0.0 else tp / (tp + fn)

    private fun `f-score`(precision: Double, recall: Double): Double =
        if (isZero(precision + recall)) 0.0 else 2.0 * precision * recall / (precision + recall)

    private data class Point(
        val coordinates: List<Double>,
        var y: Int,
        val dist: Double
    )

    private fun nadarayWatsonFixed(i: Int, y: Int, h: Double): Double {
        val `data` = objects.mapIndexed { it, p -> Point(p, resOneHot[it][y], metric(p, objects[i])) }
        `data`[i].y = 0
        val denominator = `data`.mapIndexed { j, it -> if (i == j) 0.0 else core(it.dist / h) }.reduce(Double::plus)
        val numerator = `data`.mapIndexed { j, it -> if (i == j) 0.0  else it.y * core(it.dist / h) }.reduce(Double::plus)

        return if (denominator < eps) data.map { it.y }.reduce(Int::plus).toDouble() / data.size
            else  numerator / denominator 
    }

    private fun nadarayWatsonVariable(i: Int, y: Int, k: Int): Double {
        val data = objects.map { p -> Point(p, y, metric(p, objects[i])) }.sortedBy { it.dist }.subList(1, objects.size)
        val h = data[k].dist

        return nadarayWatsonFixed(i, y, h)
    }


    private fun leaveOneHot(i: Int, h: Double): Array<Double> = Array(sizeClasses) {
        return@Array when (windowType) {
            "fixed" -> nadarayWatsonFixed(i, it, h)
            "variable" -> nadarayWatsonVariable(i, it, h.toInt())
            else -> throw RuntimeException("No data window type")
        }
    }

    private data class Triple(val tp: Double, val fp: Double, val fn: Double)

    private fun fpCount(cm: Array<DoubleArray>, i: Int): Double {
        var res = 0.0
        for (j in cm.indices) if (j != i) res += cm[j][i]
        
        return res
    }

    private fun fnCount(cm: DoubleArray, i: Int): Double {
        var res = 0.0
        for (j in cm.indices) if (j != i) res += cm[j]
        
        return res
    }

    private fun `f-score`(confusionMatrix: Array<DoubleArray>, countClass: DoubleArray): Double {
        val all = countClass.reduce(Double::plus)

        return Array(sizeClasses) {
            Triple(
                confusionMatrix[it][it],
                fpCount(confusionMatrix, it),
                fnCount(confusionMatrix[it], it)
            )
        }.map {
            Pair(
                precision(it.tp, it.fp),
                recall(it.tp, it.fn)
            )
        }.mapIndexed { i, it ->
            `f-score`(it.first, it.second) * countClass[i]
        }.reduce(Double::plus) / all
    }
}

In [15]:
var mx = 0.0 
val ans = Array<String>(3) {""}
for (metric in metrics) {
    for (core in cores) {
        for (windowType in windowTypes) {
            println("metric: $metric, core: $core, windowType: $windowType.")
            val fScore = NadarayWatson(
                classes,
                objects,
                resOneHot,
                metric(metric),
                core(core),
                windowType,
                sizeClasses
            ).solve()
            println("max f-score = $fScore")
            if (fScore > mx) {
                mx = fScore
                ans[0] = metric; ans[1] = core; ans[2] = windowType 
            }
        }
    }
}

println("The best paramenters: metric: ${ans[0]}, core: ${ans[1]}, windowType: ${ans[2]}. Max f-score: $mx.")

metric: manhattan, core: uniform, windowType: fixed.
max f-score = 0.977410695592355
metric: manhattan, core: uniform, windowType: variable.
max f-score = 0.9831024479956075
metric: manhattan, core: triangular, windowType: fixed.
max f-score = 0.9831024479956075
metric: manhattan, core: triangular, windowType: variable.
max f-score = 0.9660316235538238
metric: manhattan, core: epanechnikov, windowType: fixed.
max f-score = 0.9831024479956075
metric: manhattan, core: epanechnikov, windowType: variable.
max f-score = 0.9660316235538238
metric: manhattan, core: quartic, windowType: fixed.
max f-score = 0.9774403017265568
metric: manhattan, core: quartic, windowType: variable.
max f-score = 0.9717211088202108
metric: euclidean, core: uniform, windowType: fixed.
max f-score = 0.9717874929756182
metric: euclidean, core: uniform, windowType: variable.
max f-score = 0.977410695592355
metric: euclidean, core: triangular, windowType: fixed.
max f-score = 0.9774612092027822
metric: euclidean, cor

In [16]:
val y: MutableList<Double> = mutableListOf()
val x: MutableList<Double> = mutableListOf()

val D = sqrt(sizeDf.toDouble())
val nw = NadarayWatson(
                classes,
                objects,
                resOneHot,
                metric("manhattan"),
                core("uniform"),
                "variable",
                sizeClasses
            )
(1 .. D.toInt()).forEach { x.add(it.toDouble()); y.add(nw.localSolve(it.toDouble())) }

In [17]:
%use lets-plot

val p = lets_plot(mapOf("x" to x, "y" to y)) + ggsize(700, 400) + geomPoint(
        color = "dark-green",
        size = 4.0
    ) { x = "x"; y = "y" }
p