## QLearning example of Taxi

In [1]:
USE {
    repositories {
        mavenCentral()
        maven("https://central.sonatype.com/repository/maven-snapshots/")
    }
    dependencies {
        implementation("io.github.kotlinrl:integration:0.1.0-SNAPSHOT")
        implementation("io.github.kotlinrl:tabular:0.1.0-SNAPSHOT")
        implementation("io.github.kotlinrl:envs:0.1.0-SNAPSHOT")
        implementation("io.github.kotlinrl:rendering:0.1.0-SNAPSHOT")
    }
}

In [2]:
import io.github.kotlinrl.core.*
import io.github.kotlinrl.core.RecordVideo
import io.github.kotlinrl.core.api.*
import io.github.kotlinrl.core.wrapper.*
import io.github.kotlinrl.integration.gymnasium.*
import io.github.kotlinrl.integration.gymnasium.GymnasiumEnvs.*
import io.github.kotlinrl.rendering.*
import io.github.kotlinrl.tabular.*
import io.github.kotlinrl.tabular.td.classic.*
import org.jetbrains.kotlinx.kandy.letsplot.export.*
import org.jetbrains.kotlinx.multik.api.*
import org.jetbrains.kotlinx.multik.api.io.*
import org.jetbrains.kotlinx.multik.ndarray.data.*
import java.io.*


In [3]:
val maxStepsPerEpisode = 205
val trainingEpisodes = 5_000
val testEpisodes = 50
val initialEpsilon = 0.3
val epsilonDecayRate = 0.000056
val minEpsilon = 0.0
val alpha = 0.5
val gamma = 0.99
val fileName = "TaxiQLearning.npy"


In [4]:
val env = gymnasium.make<CliffWalkingEnv>(Taxi_v3, render = true)

var trainingQtable: QTable = mk.d2array(500, 6) { 0.0 }

val (epsilonSchedule, epsilonDecrement) = ParameterSchedule.linearDecay(
    initialValue = initialEpsilon,
    minValue = minEpsilon,
    decayRate = epsilonDecayRate,
    callback = { episode, parameter ->
        if (episode % 100 == 0) {
            println("Episode: $episode, Epsilon: $parameter")
        }
    }
)


2025-09-18T00:25:16.782957Z Execution of code 'val env = gymnasium....' ERROR Log4j2 could not find a logging implementation. Please add log4j-core to the classpath. Using SimpleLogger to log to the console...


In [5]:
val trainer = episodicTrainer(
    env = env,
    agent = learningAgent(
        id = "training",
        algorithm = QLearning(
            Q = trainingQtable,
            epsilon = epsilonSchedule,
            alpha = ParameterSchedule.constant(alpha),
            gamma = gamma,
        )
    ),
    maxStepsPerEpisode = maxStepsPerEpisode,
    successfulTermination = { it.done },
    callbacks = listOf(
        printEpisodeStart(100),
        onEpisodeEnd { epsilonDecrement() }
    )
)
println("Starting training")
val training = trainer.train(maxEpisodes(trainingEpisodes))
mk.writeNPY(fileName, trainingQtable)


Starting training
Episode 1 truncated=true
Episode 2 truncated=true
Episode 3 truncated=true
Episode 5 truncated=true
Episode 6 truncated=true
Episode 7 truncated=true
Episode 8 truncated=true
Episode 9 truncated=true
Episode 10 truncated=true
Episode 11 truncated=true
Episode 12 truncated=true
Episode 14 truncated=true
Episode 15 truncated=true
Episode 16 truncated=true
Episode 17 truncated=true
Episode 18 truncated=true
Episode 19 truncated=true
Episode 20 truncated=true
Episode 21 truncated=true
Episode 22 truncated=true
Episode 25 truncated=true
Episode 26 truncated=true
Episode 27 truncated=true
Episode 28 truncated=true
Episode 29 truncated=true
Episode 30 truncated=true
Episode 31 truncated=true
Episode 32 truncated=true
Episode 33 truncated=true
Episode 34 truncated=true
Episode 36 truncated=true
Episode 37 truncated=true
Episode 38 truncated=true
Episode 40 truncated=true
Episode 41 truncated=true
Episode 45 truncated=true
Episode 46 truncated=true
Episode 47 truncated=true
Ep

In [6]:
val testingQtable = mk.readNPY<Double, D2>(fileName).asD2Array()


In [7]:
val recordEnv = RecordVideo(env = env, folder = "videos/taxi_q_learning", testEpisodes / 3)
val tester = episodicTrainer(
    env = recordEnv,
    agent = policyAgent(
        id = "testing",
        policy = testingQtable.greedy()
    ),
    maxStepsPerEpisode = maxStepsPerEpisode,
    successfulTermination = { it.done },
    callbacks = listOf(
        printEpisodeStart(10)
    )
)
println("Starting testing")
val test = tester.train(maxEpisodes(testEpisodes))


Starting testing
Starting episode 10
Starting episode 20
Starting episode 30
Starting episode 40
Starting episode 50
Max episodes reached: 50


In [8]:
println("Training average reward: ${training.totalAverageReward}")
println("Test average reward: ${test.totalAverageReward}")

displayVideos(recordEnv.folder)


Training average reward: -11.8408
Test average reward: 8.24
