In [5]:
%use lets-plot

In [8]:
import java.nio.file.Files
import java.nio.file.Paths
import kotlin.io.path.fileSize
import kotlin.io.path.name
import kotlin.io.path.pathString
import java.io.IOException

val rawFileSizes = mutableListOf<Long>()

fun searchFiles(folderName: String){
    val path = Paths.get(folderName)

    try {
        Files.list(path).use { stream ->
            stream.forEach { pathToFile ->
                if (Files.isDirectory(pathToFile)) {
                    searchFiles(pathToFile.pathString)
                } else {
//                    proofOfLaw(convertFileSize(pathToFile.fileSize()))
                    rawFileSizes.add(pathToFile.fileSize())
                }
            }
        }
    } catch (e: IOException) {
    }
}

fun convertFileSize(fileSize: Long): String{
    val kb = 1024L
    val mb = 1024L * 1024L
    val gb = 1024L * 1024L * 1024L
    val tb = 1024L * 1024L * 1024L * 1024L

    return when {
        fileSize < kb -> "$fileSize B"
        fileSize < mb -> "${fileSize / kb} KB"
        fileSize < gb -> "${fileSize / mb} MB"
        fileSize < tb -> "${fileSize / gb} GB"
        else          -> "${fileSize / tb} TB"
    }
}


fun getFirstDigit(sizeString: String): Int? {
    val firstChar = sizeString.firstOrNull()
    return if (firstChar != null && firstChar != '0' && firstChar.isDigit()) {
        firstChar.digitToInt()
    } else {
        null
    }
}

fun showStatistics(numbersList: List<Int>, titleSuffix: String) {
    if (numbersList.isEmpty()) {
        println("Brak danych do wyświetlenia dla: $titleSuffix")
        return
    }

    val statistics = numbersList.groupingBy { it }.eachCount().toSortedMap()
    println("Rozkład liczbowy ($titleSuffix): $statistics")

    val labels = mutableListOf<String>()
    val values = mutableListOf<Int>()

    statistics.forEach { (number, times) ->
        labels.add(number.toString())
        values.add(times)
    }

    val data = mapOf(
        "Etykieta" to labels,
        "Wartosc" to values
    )

    val plot = letsPlot(data) +
            geomPie(size = 15, hole = 0.4,
                stat = Stat.identity,
                tooltips = layerTooltips()
                    .line("Cyfra|@Etykieta")
                    .line("Ilość|@Wartosc")
            ) {
                fill = "Etykieta"
                slice = "Wartosc"
            } +
            ggtitle("Rozkład Benforda - $titleSuffix (Próbek: ${numbersList.size})")

    plot.show()
}
rawFileSizes.clear()
searchFiles("C:/")

val digitsWithConversion = rawFileSizes.mapNotNull { size ->
    val sizeStr = convertFileSize(size)
    getFirstDigit(sizeStr)
}


val digitsRawBytes = rawFileSizes.mapNotNull { size ->
    val sizeStr = size.toString()
    getFirstDigit(sizeStr)
}

showStatistics(digitsWithConversion, "Z konwersją")
showStatistics(digitsRawBytes, "Surowe bajty")


Rozkład liczbowy (Z konwersją): {1=233658, 2=124578, 3=95967, 4=77698, 5=59287, 6=49685, 7=44229, 8=41929, 9=37054}


Rozkład liczbowy (Surowe bajty): {1=235675, 2=125588, 3=95712, 4=77377, 5=60316, 6=49597, 7=44740, 8=42303, 9=32777}
