Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local.properties
local.properties
.gradle
build/
!gradle/wrapper/gradle-wrapper.jar
Expand All @@ -20,4 +20,4 @@ out/
.vscode/

### Mac OS ###
.DS_Store
.DS_Store
1 change: 1 addition & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
plugins {
alias(libs.plugins.androidLibrary) apply false
alias(libs.plugins.kotlinMultiplatform) apply false
alias(libs.plugins.jetbrainsKotlinJvm) apply false
}
1 change: 1 addition & 0 deletions gguf/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/build
18 changes: 18 additions & 0 deletions gguf/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
plugins {
id("java-library")
alias(libs.plugins.jetbrainsKotlinJvm)
}

java {
sourceCompatibility = JavaVersion.VERSION_11
targetCompatibility = JavaVersion.VERSION_11
}

kotlin {
compilerOptions {
jvmTarget = org.jetbrains.kotlin.gradle.dsl.JvmTarget.JVM_11
}
}
dependencies {
testImplementation(libs.testng)
}
106 changes: 106 additions & 0 deletions gguf/src/main/java/sk/ai/net/gguf/Constants.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package sk.ai.net.gguf

/**
* This is a kotlin gguf reader related logic interpreted from python code "gguf-py/gguf/constants.py"
* of github repo "https://github.com/ggerganov/llama.cpp"
*/

//TODO convert the rest of file from constants.py

const val GGUF_MAGIC = 0x46554747u
const val GGUF_VERSION = 3
const val GGUF_DEFAULT_ALIGNMENT = 32

enum class GGMLQuantizationType(val value: Int) {
F32(0),
F16(1),
Q4_0(2),
Q4_1(3),
Q5_0(6),
Q5_1(7),
Q8_0(8),
Q8_1(9),
Q2_K(10),
Q3_K(11),
Q4_K(12),
Q5_K(13),
Q6_K(14),
Q8_K(15),
IQ2_XXS(16),
IQ2_XS(17),
IQ3_XXS(18),
IQ1_S(19),
IQ4_NL(20),
IQ3_S(21),
IQ2_S(22),
IQ4_XS(23),
I8(24),
I16(25),
I32(26),
I64(27),
F64(28),
IQ1_M(29),
BF16(30),
TQ1_0(34),
TQ2_0(35);

companion object {
fun fromValue(value: Int): GGMLQuantizationType? {
return values().find { it.value == value }
}
}
}

// Block size constant
const val QK_K = 256

// Quantization type and corresponding sizes
val GGML_QUANT_SIZES: Map<GGMLQuantizationType, Pair<Int, Int>> = mapOf(
GGMLQuantizationType.F32 to (1 to 4),
GGMLQuantizationType.F16 to (1 to 2),
GGMLQuantizationType.Q4_0 to (32 to 2 + 16),
GGMLQuantizationType.Q4_1 to (32 to 2 + 2 + 16),
GGMLQuantizationType.Q5_0 to (32 to 2 + 4 + 16),
GGMLQuantizationType.Q5_1 to (32 to 2 + 2 + 4 + 16),
GGMLQuantizationType.Q8_0 to (32 to 2 + 32),
GGMLQuantizationType.Q8_1 to (32 to 4 + 4 + 32),
GGMLQuantizationType.Q2_K to (256 to 2 + 2 + QK_K / 16 + QK_K / 4),
GGMLQuantizationType.Q3_K to (256 to 2 + QK_K / 4 + QK_K / 8 + 12),
GGMLQuantizationType.Q4_K to (256 to 2 + 2 + QK_K / 2 + 12),
GGMLQuantizationType.Q5_K to (256 to 2 + 2 + QK_K / 2 + QK_K / 8 + 12),
GGMLQuantizationType.Q6_K to (256 to 2 + QK_K / 2 + QK_K / 4 + QK_K / 16),
GGMLQuantizationType.Q8_K to (256 to 4 + QK_K + QK_K / 8),
GGMLQuantizationType.IQ2_XXS to (256 to 2 + QK_K / 4),
GGMLQuantizationType.IQ2_XS to (256 to 2 + QK_K / 4 + QK_K / 32),
GGMLQuantizationType.IQ3_XXS to (256 to 2 + QK_K / 4 + QK_K / 8),
GGMLQuantizationType.IQ1_S to (256 to 2 + QK_K / 8 + QK_K / 16),
GGMLQuantizationType.IQ4_NL to (32 to 2 + 16),
GGMLQuantizationType.IQ3_S to (256 to 2 + QK_K / 4 + QK_K / 8 + QK_K / 32 + 4),
GGMLQuantizationType.IQ2_S to (256 to 2 + QK_K / 4 + QK_K / 16),
GGMLQuantizationType.IQ4_XS to (256 to 2 + 2 + QK_K / 2 + QK_K / 64),
GGMLQuantizationType.I8 to (1 to 1),
GGMLQuantizationType.I16 to (1 to 2),
GGMLQuantizationType.I32 to (1 to 4),
GGMLQuantizationType.I64 to (1 to 8),
GGMLQuantizationType.F64 to (1 to 8),
GGMLQuantizationType.IQ1_M to (256 to QK_K / 8 + QK_K / 16 + QK_K / 32),
GGMLQuantizationType.BF16 to (1 to 2),
GGMLQuantizationType.TQ1_0 to (256 to 2 + 4 * 13),
GGMLQuantizationType.TQ2_0 to (256 to 2 + 64)
)

enum class GGUFValueType(val value: Int) {
UINT8(0),
INT8(1),
UINT16(2),
INT16(3),
UINT32(4),
INT32(5),
FLOAT32(6),
BOOL(7),
STRING(8),
ARRAY(9),
UINT64(10),
INT64(11),
FLOAT64(12)
}
Loading