In [1]:
@file:DependsOn("org.jetbrains.kotlinx:kotlin-deeplearning-api:0.3.0")

import org.jetbrains.kotlinx.dl.api.core.Sequential
import org.jetbrains.kotlinx.dl.api.core.activation.Activations
import org.jetbrains.kotlinx.dl.api.core.layer.core.Dense
import org.jetbrains.kotlinx.dl.api.core.layer.core.Input
import org.jetbrains.kotlinx.dl.api.core.loss.Losses
import org.jetbrains.kotlinx.dl.api.core.metric.Metrics
import org.jetbrains.kotlinx.dl.api.core.optimizer.Adam
import org.jetbrains.kotlinx.dl.api.core.summary.logSummary
import org.jetbrains.kotlinx.dl.dataset.OnHeapDataset

A min max scaler
Scale our number to a range we want

For example

Age from 0..100 sclaes to 0..1

[13, 56, 23] -> [0.13, 0.56, 0.23]

In [2]:
fun minMaxScaler(scaling: Pair<IntRange, IntRange>, input: Collection<Int>): List<Float> {
    val (original, target) = scaling

    val min = original.first
    val max = original.last

    val inputScale = max - min
    val rangeScale = target.last - target.first

    return input.map { (((it - min).toFloat() / inputScale) * rangeScale) + target.first }
}

An experimental drug was tested on individuals from ages 13 ro 100 in a clinical trial

The trail had 2100 participants. Half were under 65 years old, half were 65 years or older

Around 95% of patients 65 or older experienced side effects.

Around 95% of patients under 65 experienced **no** side effects.

In [3]:
data class DataGeneration(
    val ages: List<Float>,
    val result: List<Int>,
)

In [4]:
fun generateData(): DataGeneration {
    val ages = mutableListOf<Int>()
    val results = mutableListOf<Int>()

    val youngerAge = 13..64
    val olderAge = 65..100


    repeat(50) {
        // 5% of younger individuals who experienced side effects
        ages += youngerAge.random()
        results += 1

        //5% older individuals experienced no side effects
        ages += olderAge.random()
        results += 0
    }


    repeat(1000) {
        // 95% of younger individuals who did not experience side effects
        ages += youngerAge.random()
        results += 0

        // 95% of older individuals who experienced side effects
        ages += olderAge.random()
        results += 1
    }

    // shuffle to remove patterns when generating or ordering data
    // create a map and shuffle together
    val shuffled = ages.mapIndexed { index, age -> age to results[index] }.shuffled()

    return DataGeneration(minMaxScaler(13..100 to 0..1, shuffled.map { it.first }), shuffled.map { it.second })
}

Creating a sequential model

The first layer is the input layer, in this case the input is the age, which has only one dimension when it is converted to an array.

In [5]:
val model = Sequential.of(
    Input(1),
    Dense(outputSize = 16, activation = Activations.Relu),
    Dense(outputSize = 32, activation = Activations.Relu),
    // This is the final layer, and it is the output, since our input for experience side effects is 0 or 1 (false or true). It will have a result of array contains two element.
    // The first one is possibility of get 0(false, which is possibility of not experiencing side effects), and sencond element is possibility of getting 1(true, which will experience side effects)
    Dense(outputSize = 2, activation = Activations.Softmax)
)

Compile model

In [6]:
model.compile(
    optimizer = Adam(learningRate = 0.0001f),
    loss = Losses.SOFT_MAX_CROSS_ENTROPY_WITH_LOGITS,
    metric = Metrics.ACCURACY,
)

In [8]:
// temporary not support, https://github.com/JetBrains/KotlinDL/pull/251
model.logSummary()

In [10]:
val scaling = 13..100 to 0..1

Create DataSet from input ages and scale it.

In [13]:
fun createScaledDatasetForTest(vararg ages: Int): OnHeapDataset {
    val test = minMaxScaler(scaling, listOf(*ages.toTypedArray()))
        .map { floatArrayOf(it) }.toTypedArray()

    return OnHeapDataset.create(test, FloatArray(test.size) { 0f })
}

This generates random scaled DataSet for testing 

In [31]:
fun generateDataSet(): OnHeapDataset {
    val (ages, result) = generateData()

    // this converts all ages to an array
    val features = ages.map { floatArrayOf(it) }.toTypedArray()
    // this converts result to an array
    val label = FloatArray(result.size) { result[it].toFloat() }
    return OnHeapDataset.create(features, label)
}

Start training

In [17]:
model.fit(
        epochs = 30,
        trainingDataset = generateDataSet(),
        validationDataset = generateDataSet(),
        trainBatchSize = 10,
        validationBatchSize = 10,
)

org.jetbrains.kotlinx.dl.api.core.history.TrainingHistory@6719a5b8

In [19]:
fun evaluate(model: Sequential) {
    println("Accuracy: ${model.evaluate(generateDataSet(), batchSize = 10).metrics[Metrics.ACCURACY]}")
}

In [21]:
evaluate(model)

Accuracy: 0.9371426105499268


In [23]:
val testDataSet = createScaledDatasetForTest(75, 35, 45, 55, 63, 45, 23, 32, 38, 35)

In [25]:
println(model.predictSoftly(testDataSet, 10).contentDeepToString())

[[0.30628088, 0.6937191], [0.7303169, 0.2696831], [0.72259474, 0.27740523], [0.6353656, 0.36463442], [0.4780286, 0.52197134], [0.72259474, 0.27740523], [0.73101467, 0.26898536], [0.7306809, 0.2693191], [0.7296027, 0.2703973], [0.7303169, 0.2696831]]


this is the result                                                                            
the first element in returned array is the possibility of not experiencing side effect        
the second element is the possibility of experiencing side effect                                                                                         

for older people, we expect have higher possibility on experiencing side effect               
second element > first element    

In [27]:
 val predictOlderResult = model.predictSoftly(createScaledDatasetForTest(75), 1)[0]
println(predictOlderResult.contentToString())
check(predictOlderResult[1] > predictOlderResult[0]) { "Older people have lower possibility on experiencing side effect, it is not expected." }

[0.30628088, 0.6937191]


and for younger people, we expect have lower possibility on experiencing side effect                               
first element > second element                                                                                     

In [29]:
val predictYoungerResult = model.predictSoftly(createScaledDatasetForTest(35), 1)[0]
println(predictYoungerResult.contentToString())
check(predictYoungerResult[0] > predictYoungerResult[1]) { "Younger people have higher possibility on experiencing side effect, it is not expected." }

[0.7303169, 0.2696831]


In [30]:
model.close()