In [1]:
%use kotlin-statistics, krangl, lets-plot, numpy(0.1.4)

@file:Repository("https://repo1.maven.org/maven2")
@file:DependsOn("de.sldk:kotbar:0.1.0")

In [2]:
import java.io.*
import java.util.StringTokenizer

object IO {
    var reader: BufferedReader? = null
    private var tokenizer: StringTokenizer? = null

    fun next(): String {
        while (tokenizer == null || !tokenizer!!.hasMoreTokens()) {
            val line = reader?.readLine() ?: throw NullPointerException()
            if (line == "") {
                return ""
            }
            tokenizer = StringTokenizer(line)
        }
        return tokenizer!!.nextToken()
    }

    fun nextLine(): String {
        return reader!!.readLine()
    }

    fun nextInt(): Int {
        return next().toInt()
    }

    fun nextDouble(): Double {
        return next().toDouble()
    }

    fun nextLong(): Long {
        return next().toLong()
    }
}

data class DataF(
    val featureCount: Int,
    val trainX: KtNDArray<Double>,
    val trainY: KtNDArray<Int>,
    val testX: KtNDArray<Double>,
    val testY: KtNDArray<Int>
)

In [3]:
fun readData(fileName: String): DataF {
    IO.reader = BufferedReader(FileReader("res/${fileName}.txt"))
    val featureCount = IO.nextInt()
    val trainN = IO.nextInt()
    var tX = ArrayList<ArrayList<Int>>()
    var tY = ArrayList<Int>()
    for(i in 0 until trainN) {
        val curTX = ArrayList<Int>()
        for(j in 0 until featureCount) {
            curTX.add(IO.nextInt())
        }
        curTX.add(1)
        tY.add(IO.nextInt())
        tX.add(curTX)
    }
    val trainX = array<Double>(tX)
    val trainY = array<Int>(tY)
    
    val testN = IO.nextInt()
    tX.clear()
    tY.clear()
    for(i in 0 until testN) {
        val curTX = ArrayList<Int>()
        for(j in 0 until featureCount) {
            curTX.add(IO.nextInt())
        }
        curTX.add(1)
        tY.add(IO.nextInt())
        tX.add(curTX)
    }
    val testX = array<Double>(tX)
    val testY = array<Int>(tY)
    
    IO.reader!!.close()
    
    return DataF(featureCount, trainX, trainY, testX, testY)
}

In [4]:
import org.jetbrains.numkt.linalg.Linalg
import de.sldk.kotbar.Kotbar
import kotlin.random.Random

fun rss(x: KtNDArray<Double>, y: KtNDArray<Int>): KtNDArray<Double> {
    return Linalg.lstsq(x,y)[0] as KtNDArray<Double>
}

fun nrmse(x: KtNDArray<Double>, y: KtNDArray<Int>, w: KtNDArray<Double>): Double {
    return with(w.dot(x.transpose()).minus(y)) {
            Math.sqrt(this.dot(this).scalar!! / x.shape[0]) / (y.max()!! - y.min()!!)
    }
}

fun gradient(x: KtNDArray<Double>, y: KtNDArray<Int>, iters: Int, a: Double) : KtNDArray<Double> {
    var w0 = array<Double>((DoubleArray(x.shape[1])).toList())
    var t = 1.0
    for (i in 1..iters) {
        val curInd = Random.nextInt(x.shape[0])
        val curX = x.get(curInd)
        val curY = y.get(curInd).scalar!!
        val diff = (curX * w0).sum()!! - curY
        val grad = 2.0 * curX * diff
        w0 = w0 - grad / t * a
        t += 1.0
    }
    return w0
}



In [15]:
var bestGradParam = 0.0
var bestNRSME = Double.MAX_VALUE
val params = (1..20).toList().map {10.0.pow(-it)}
for(a in params) {
    val dataF = readData("1")
    
    val nrmse = nrmse(dataF.testX, dataF.testY, gradient(dataF.trainX, dataF.trainY, 3000, a))
    if (nrmse < bestNRSME) {
        bestNRSME = nrmse
        bestGradParam = a
    }
}

bestGradParam

1.0E-15

In [16]:
val iterations = (1..12).toList().map { 2.0.pow(it).toInt() }

fun processDataFile(fileName: String) {
    val dataF = readData(fileName)
    
    val rss = rss(dataF.trainX, dataF.trainY)
    val nrmseRssTrain = nrmse(dataF.trainX, dataF.trainY, rss)
    val nrmseRssTest = nrmse(dataF.testX, dataF.testY, rss)
    
    val gradientResTest = ArrayList<Double>()
    val gradientResTrain = ArrayList<Double>()
    for(i in iterations) {
        gradientResTrain.add(nrmse(dataF.trainX, dataF.trainY, gradient(dataF.trainX, dataF.trainY, i, bestGradParam)))
        gradientResTest.add(nrmse(dataF.testX, dataF.testY, gradient(dataF.trainX, dataF.trainY, i, bestGradParam)))
    }  
    
    var plotData = mapOf<String, Any>(
        "iters" to iterations,
        "nrmseRssTrain" to DoubleArray(iterations.size) {nrmseRssTrain},
        "nrmseRssTest" to DoubleArray(iterations.size) {nrmseRssTest},
        "nrmseGradTest" to gradientResTest,
        "nrmseGradTrain" to gradientResTrain
    )
    
    val p1 = lets_plot(plotData) {x = "iters"} + geom_path{ y = "nrmseRssTest"} + ggtitle("rss test dataset") + scale_y_log10()
    val p2 = lets_plot(plotData) {x = "iters"} + geom_path{ y = "nrmseRssTrain"} + ggtitle("rss train dataset") + scale_y_log10()
    
    var p3 = lets_plot(plotData) {x = "iters"} + geom_path{ y = "nrmseGradTest"} + ggtitle("gradient test dataset") + scale_y_log10()
    p3 += geom_point(shape = 21, fill = "red"){y = "nrmseGradTest"}
    var p4 = lets_plot(plotData) {x = "iters"} + geom_path{ y = "nrmseGradTrain"} + ggtitle("gradient train dataset") + scale_y_log10()
    p4 += geom_point(shape = 21, fill = "red"){y = "nrmseGradTrain"}

    val a = GGBunch()
        .addPlot(p1, 0, 0)
        .addPlot(p2, 500 , 0)
        .addPlot(p3, 0, 350)
        .addPlot(p4, 500, 350)
        .show()
}



In [17]:
processDataFile("1")