From 6f5d1e9091774877ccd7d5924b033ad9f5e6ce67 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Tue, 28 Jan 2025 21:35:39 +0100 Subject: [PATCH 01/46] Remove v1 code --- src/dalvik/dex/debug_info.rs | 131 ---- src/dalvik/dex/dtype.rs | 190 ------ src/dalvik/dex/encoded_value.rs | 406 ----------- src/dalvik/dex/header.rs | 279 -------- src/dalvik/dex/items.rs | 531 --------------- src/dalvik/dex/map_list.rs | 181 ----- src/dalvik/dex/mod.rs | 20 - src/dalvik/dex/types.rs | 366 ---------- src/dalvik/error.rs | 50 -- src/dalvik/file/annotation.rs | 98 --- src/dalvik/file/class_def.rs | 356 ---------- src/dalvik/file/debug.rs | 243 ------- src/dalvik/file/field.rs | 50 -- src/dalvik/file/lazy_file.rs | 375 ---------- src/dalvik/file/method.rs | 179 ----- src/dalvik/file/mod.rs | 35 - src/dalvik/file/value.rs | 65 -- src/dalvik/insns.rs | 1128 ------------------------------- src/dalvik/mod.rs | 4 - src/lib.rs | 3 - src/smali/io.rs | 506 -------------- src/smali/mod.rs | 2 - 22 files changed, 5198 deletions(-) delete mode 100644 src/dalvik/dex/debug_info.rs delete mode 100644 src/dalvik/dex/dtype.rs delete mode 100644 src/dalvik/dex/encoded_value.rs delete mode 100644 src/dalvik/dex/header.rs delete mode 100644 src/dalvik/dex/items.rs delete mode 100644 src/dalvik/dex/map_list.rs delete mode 100644 src/dalvik/dex/mod.rs delete mode 100644 src/dalvik/dex/types.rs delete mode 100644 src/dalvik/error.rs delete mode 100644 src/dalvik/file/annotation.rs delete mode 100644 src/dalvik/file/class_def.rs delete mode 100644 src/dalvik/file/debug.rs delete mode 100644 src/dalvik/file/field.rs delete mode 100644 src/dalvik/file/lazy_file.rs delete mode 100644 src/dalvik/file/method.rs delete mode 100644 src/dalvik/file/mod.rs delete mode 100644 src/dalvik/file/value.rs delete mode 100644 src/dalvik/insns.rs delete mode 100644 src/dalvik/mod.rs delete mode 100644 src/smali/io.rs delete mode 100644 src/smali/mod.rs diff --git a/src/dalvik/dex/debug_info.rs b/src/dalvik/dex/debug_info.rs deleted file mode 100644 index 2b299a7..0000000 --- a/src/dalvik/dex/debug_info.rs +++ /dev/null @@ -1,131 +0,0 @@ -use super::types::*; -use binrw::binrw; - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct DebugInfoItem { - /// the initial value for the state machine's line register. Does not represent - /// an actual positions entry. - pub line_start: ULeb128, - - /// the number of parameter names that are encoded. There should be one per - /// method parameter, excluding an instance method's this, if any. - #[bw(calc = ULeb128(parameter_names.len() as u32))] - pub parameters_size: ULeb128, - - /// string index of the method parameter name. An encoded value of `NO_INDEX` - /// indicates that no name is available for the associated parameter. The type - /// descriptor and signature are implied from the method descriptor and signature. - #[br(count = parameters_size.0)] - pub parameter_names: Vec, -} - -impl DebugInfoItem { - /// terminates a debug info sequence for a code_item - pub const DBG_END_SEQUENCE: UByte = 0x00; - - /// advances the address register without emitting a positions entry - /// - /// @format: [ULeb128] addr_diff - /// - /// @args: - /// - `addr_diff`: amount to add to address register - pub const DBG_ADVANCE_PC: UByte = 0x01; - - /// advances the line register without emitting a positions entry - /// - /// @format: [SLeb128] line_diff - /// - /// @args: - /// - `line_diff`: amount to add to line register - pub const DBG_ADVANCE_LINE: UByte = 0x02; - - /// introduces a local variable at the current address. Either name_idx or - /// type_idx may be NO_INDEX to indicate that that value is unknown. - /// - /// @format: - /// - [ULeb128] register_num - /// - [ULeb128p1] name_idx - /// - [ULeb128p1] type_idx - /// - /// @args: - /// - `register_num`: register number that will contain local - /// - `name_idx`: index into the string_ids list - /// - `type_idx`: index into the type_ids list - pub const DBG_START_LOCAL: UByte = 0x03; - - /// introduces a local with a type signature at the current address. Any of - /// name_idx, type_idx, or sig_idx may be NO_INDEX to indicate that that value - /// is unknown. (If sig_idx is -1, though, the same data could be represented - /// more efficiently using the opcode DBG_START_LOCAL.) - /// - /// @format: - /// - [ULeb128] register_num - /// - [ULeb128p1] name_idx - /// - [ULeb128p1] type_idx - /// - [ULeb128p1] sig_idx - /// - /// @args: - /// - `register_num`: register number that will contain local - /// - `name_idx`: index into the string_ids list - /// - `type_idx`: index into the type_ids list - /// - `sig_idx`: string index of the type signature - pub const DBG_START_LOCAL_EXTENDED: UByte = 0x04; - - /// marks a currently-live local variable as out of scope at the current address - /// - /// @format: - /// - [ULeb128] register_num - /// - /// @args: - /// - `register_num`: register that contained local - pub const DBG_END_LOCAL: UByte = 0x05; - - /// re-introduces a local variable at the current address. The name and type are - /// the same as the last local that was live in the specified register. - /// - /// @format: - /// - [ULeb128] register_num - /// - /// @args: - /// - `register_num`: register to restart - pub const DBG_RESTART_LOCAL: UByte = 0x06; - - /// sets the prologue_end state machine register, indicating that the next position - /// entry that is added should be considered the end of a method prologue (an - /// appropriate place for a method breakpoint). The prologue_end register is - /// cleared by any special (>= 0x0a) opcode. - pub const DBG_SET_PROLOGUE_END: UByte = 0x07; - - /// sets the epilogue_begin state machine register, indicating that the next position - /// entry that is added should be considered the beginning of a method epilogue (an - /// appropriate place to suspend execution before method exit). The epilogue_begin - /// register is cleared by any special (>= 0x0a) opcode. - pub const DBG_SET_EPILOGUE_BEGIN: UByte = 0x08; - - /// indicates that all subsequent line number entries make reference to this source - /// file name, instead of the default name specified in code_item - /// - /// @format: - /// - [ULeb128p1] file_idx - /// - /// @args: - /// - `file_idx`: string index of source file name; [NO_INDEX] if unknown - pub const DBG_SET_FILE: UByte = 0x09; - - /// Behaviour for special opcodes: advances the line and address registers, emits a - /// position entry, and clears prologue_end and epilogue_begin. See below for description. - /// - /// Opcodes with values between 0x0a and 0xff (inclusive) move both the line and address - /// registers by a small amount and then emit a new position table entry. - pub const DBG_FIRST_SPECIAL: UByte = 0x0a; - - /// the smallest line number increment - pub const DBG_LINE_BASE: i8 = -4; - - /// the number of line increments represented - pub const DBG_LINE_RANGE: UByte = 15; -} - - diff --git a/src/dalvik/dex/dtype.rs b/src/dalvik/dex/dtype.rs deleted file mode 100644 index 958667d..0000000 --- a/src/dalvik/dex/dtype.rs +++ /dev/null @@ -1,190 +0,0 @@ -use crate::dalvik::error::{Error, Result}; -use std::{ - fmt::{Debug, Display}, - rc::Rc, -}; - -/// A TypeDescriptor is the representation of any type, including -/// primitives, classes, arrays, and void. -/// -/// @see https://source.android.com/docs/core/runtime/dex-format#typedescriptor -/// -#[derive(PartialEq, Eq)] -pub struct DexType { - pub descriptor: String, - pub dim: usize, - pub primitive: bool, -} - -impl DexType { - /// Create a new `DexType` from a `String` removing any array - /// dimensions - pub fn from(descriptor: &Rc) -> Option { - let mut chars = descriptor.chars().peekable(); - let mut i: usize = 0; - while *chars.peek()? == '[' { - i += 1; - chars.next(); - } - match *chars.peek()? { - // primitive types - 'V' | 'Z' | 'C' | 'B' | 'S' | 'I' | 'F' | 'J' | 'D' => { - Some(DexType { - descriptor: descriptor[i..].to_string(), - dim: i, - primitive: true, - }) - } - // REVISIT: - // resolve the class type descriptor directly - 'L' => { - Some(DexType { - descriptor: descriptor[i..].to_string(), - dim: i, - primitive: false, - }) - } - _ => { - None - } - } - } - - pub fn read(descriptor: &Rc) -> Result { - let mut chars = descriptor.chars().peekable(); - let mut i: usize = 0; - loop { - if let Some(c) = chars.peek() { - if *c == '[' { - i += 1; - chars.next(); - continue; - } - } - break; - } - match *chars.peek().unwrap() { - // primitive types - 'V' | 'Z' | 'C' | 'B' | 'S' | 'I' | 'F' | 'J' | 'D' => { - Ok(DexType { - descriptor: descriptor[i..].to_string(), - dim: i, - primitive: true, - }) - } - // REVISIT: - // resolve the class type descriptor directly - 'L' => { - Ok(DexType { - descriptor: descriptor[i..].to_string(), - dim: i, - primitive: false, - }) - } - _ => { - Err(Error::MalformedDescriptor(format!( - "Invalid type descriptor: {}", - descriptor - ))) - } - } - } -} - -impl Display for DexType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.dim > 0 { - write!(f, "{}", "[".repeat(self.dim))?; - } - write!(f, "{}", self.descriptor) - } -} - -impl Debug for DexType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "DexType {{ descriptor: \"{}\", dim: {}, primitive: {} }}", - self.descriptor.escape_default(), - self.dim, - self.primitive - ) - } -} - -// pub struct Prototype { -// pub shorty: Rc, -// pub return_type: Rc, -// pub parameters: Vec>, -// } - -// impl Prototype { -// /// Parses the `proto_id_item` section of the dex file. -// /// -// /// First, the `proto_id_item` is read and the `shorty` and `return_type` descriptors -// /// are read from the `string_ids_item` section. The `parameters` are read from the -// /// data section only if `parameters_off` is not 0. -// /// -// /// @**NOTE**: this function assumes that `reader` is at the start of the next -// /// `proto_id_item`. -// pub fn from(mut reader: R, dex: &Dex) -> result::Result -// where -// R: Read + Seek, -// { -// // 1. read the proto_id_item -// let item = match ProtoIdItem::read(&mut reader) { -// Ok(x) => x, -// Err(e) => return Err(Error::from(e)), -// }; - -// let shorty = dex.string_at(item.shorty_idx as usize)?; -// let return_ty = dex.type_at(item.return_type_idx)?; -// let mut proto = Prototype { -// shorty: shorty.clone(), -// return_type: return_ty.clone(), -// // REVISIT: maybe find a way to hardcode the number of parameters -// parameters: Vec::new(), -// }; -// // As described in Android docs: 0 if this prototype has no parameters. -// if item.parameters_off != 0 { -// reader.seek(io::SeekFrom::Start(item.parameters_off as u64))?; -// let params = match TypeList::read(&mut reader) { -// Ok(x) => x, -// Err(e) => return Err(Error::from(e)), -// }; - -// for j in 0..params.size { -// // the parameter item stores the type index of the parameter -// let index = params.list[j as usize].type_idx; -// let ty = dex.type_at(index as u32)?; -// proto.parameters.push(ty.clone()); -// } -// } -// return Ok(proto); -// } -// } - -// impl Display for Prototype { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// write!( -// f, -// "({}){}", -// self.parameters -// .iter() -// .map(|x| x.to_string()) -// .collect::>() -// .join(","), -// self.return_type -// ) -// } -// } - -// impl Debug for Prototype { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// write!( -// f, -// "Prototype {{ shorty: {}, return_type: {}, parameters: {:?} }}", -// self.shorty, self.return_type, self.parameters -// ) -// } -// } diff --git a/src/dalvik/dex/encoded_value.rs b/src/dalvik/dex/encoded_value.rs deleted file mode 100644 index e4a4930..0000000 --- a/src/dalvik/dex/encoded_value.rs +++ /dev/null @@ -1,406 +0,0 @@ -use super::types::*; -use binrw::{binrw, BinRead, BinWrite, Endian}; -use byteorder::{LittleEndian, ReadBytesExt}; -use std::{ - ffi::{c_double, c_float}, - io, -}; - -/// ## EncodedValue -/// ### Structure -/// - `value_type`: byte indicating the type of the immediately subsequent value along with -/// an optional clarifying argument in the high-order three bits. -/// In most cases, `value_arg` encodes the length of the immediately-subsequent -/// value in bytes, as (`size - 1`), e.g., `0` means that the value requires -/// one byte, and `7` means it requires eight bytes; -/// - `value`: bytes representing the value, variable in length and interpreted differently -/// for different value_type bytes, though always little-endian. -#[derive(Debug)] -pub enum EncodedValue { - /// signed one-byte integer value - /// - /// @value_arg: none, must be 0 - /// @value_format: `UByte[1]` - Byte(i8), - - /// signed two-byte integer value, sign-extended - /// - /// @value_arg: `size - 1` (0..1) - /// @value_format: `UByte[size]` - Short(i16), - - /// unsigned two-byte integer value, zero-extended - /// - /// @value_arg: size - 1 (0..1) - /// @value_format: `UByte[size]` - Char(char), - - /// signed four-byte integer value, sign-extended - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: `UByte[size]` - Int(i32), - - /// signed eight-byte integer value, sign-extended - /// - /// @value_arg: `size - 1` (0..7) - /// @value_format: `UByte[size]` - Long(i64), - - /// four-byte bit pattern, zero-extended to the right, - /// and interpreted as an IEEE754 32-bit floating point value. - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: `UByte[size]` - Float(f32), - - /// eight-byte bit pattern, zero-extended to the right, - /// and interpreted as an IEEE754 64-bit floating point value. - /// - /// @value_arg: `size - 1` (0..7) - /// @value_format: `UByte[size]` - Double(f64), - - /// unsigned (zero-extended) four-byte integer value, interpreted - /// as an index into the `proto_ids` section and representing a - /// method type value - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: `UByte[size]` - MethodType(u32), - - /// unsigned (zero-extended) four-byte integer value, interpreted - /// as an index into the method_handles section and representing a - /// method handle value. - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: `UByte[size]` - MethodHandle(u32), - - /// unsigned (zero-extended) four-byte integer value, interpreted as - /// an index into the string_ids section and representing a string value. - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: `UByte[size]` - String(u32), - - /// unsigned (zero-extended) four-byte integer value, interpreted as - /// an index into the type_ids section and representing a type value. - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: `UByte[size]` - Type(u32), - - /// unsigned (zero-extended) four-byte integer value, interpreted as - /// an index into the field_ids section and representing a field value. - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: `UByte[size]` - Field(u32), - - /// unsigned (zero-extended) four-byte integer value, interpreted as - /// an index into the method_ids section and representing a method value. - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: `UByte[size]` - Method(u32), - - /// unsigned (zero-extended) four-byte integer value, interpreted as - /// an index into the field_ids section and representing an enum value. - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: `UByte[size]` - Enum(u32), - - /// An array of values, in the format specified by "encoded_array format" - /// The size of the value is implicit in the encoding. - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: [EncodedArray] - Array(EncodedArray), - - /// a sub-annotation, in the format specified by "encoded_annotation format". - /// The size of the value is implicit in the encoding. - /// - /// @value_arg: `size - 1` (0..3) - /// @value_format: [EncodedAnnotation] - Annotation(EncodedAnnotation), - - /// null reference value - /// - /// @value_arg: 0 - /// @value_format: none - Null, - - /// one-bit value; 0 for false and 1 for true. The bit is represented in - /// the `value_arg`. - /// - /// @value_arg: boolean (0..1) - /// @value_format: none - True, - False, -} - -impl EncodedValue { - pub const VALUE_BYTE: UByte = 0x00; - pub const VALUE_SHORT: UByte = 0x02; - pub const VALUE_CHAR: UByte = 0x03; - pub const VALUE_INT: UByte = 0x04; - pub const VALUE_LONG: UByte = 0x06; - pub const VALUE_FLOAT: UByte = 0x10; - pub const VALUE_DOUBLE: UByte = 0x11; - pub const VALUE_METHOD_TYPE: UByte = 0x15; - pub const VALUE_METHOD_HANDLE: UByte = 0x16; - pub const VALUE_STRING: UByte = 0x17; - pub const VALUE_TYPE: UByte = 0x18; - pub const VALUE_FIELD: UByte = 0x19; - pub const VALUE_METHOD: UByte = 0x1A; - pub const VALUE_ENUM: UByte = 0x1B; - pub const VALUE_ARRAY: UByte = 0x1C; - pub const VALUE_ANNOTATION: UByte = 0x1D; - pub const VALUE_NULL: UByte = 0x1E; - pub const VALUE_BOOLEAN: UByte = 0x1F; -} - -impl BinRead for EncodedValue { - type Args<'a> = (); - fn read_options( - reader: &mut R, - _: Endian, - _: Self::Args<'_>, - ) -> binrw::BinResult { - let byte = reader.read_u8()?; - - let value_type = byte & 0x1F_u8 as u8; - let value_size = ((byte & 0xE0) >> 5) as usize + 1; - - // return Ok(EncodedValue::Data(byte, data)); - // return Ok(EncodedValue::Int(byte[0] as i32)); - - let value = match value_type { - EncodedValue::VALUE_BYTE => { - EncodedValue::Byte(reader.read_int::(value_size).unwrap() as i8) - } - EncodedValue::VALUE_SHORT => { - EncodedValue::Short(reader.read_int::(value_size).unwrap() as i16) - } - EncodedValue::VALUE_CHAR => EncodedValue::Char( - char::from_u32(reader.read_uint::(value_size).unwrap() as u32) - .unwrap(), - ), - EncodedValue::VALUE_INT => { - EncodedValue::Int(reader.read_int::(value_size).unwrap() as i32) - } - EncodedValue::VALUE_LONG => { - EncodedValue::Long(reader.read_int::(value_size).unwrap()) - } - EncodedValue::VALUE_FLOAT => EncodedValue::Float(c_float::from_bits( - reader.read_uint::(value_size).unwrap() as u32, - )), - EncodedValue::VALUE_DOUBLE => EncodedValue::Double(c_double::from_bits( - reader.read_uint::(value_size).unwrap(), - )), - EncodedValue::VALUE_METHOD_TYPE => EncodedValue::MethodType( - reader.read_uint::(value_size).unwrap() as u32, - ), - EncodedValue::VALUE_METHOD_HANDLE => EncodedValue::MethodHandle( - reader.read_uint::(value_size).unwrap() as u32, - ), - EncodedValue::VALUE_STRING => { - EncodedValue::String(reader.read_uint::(value_size).unwrap() as u32) - } - EncodedValue::VALUE_TYPE => { - EncodedValue::Type(reader.read_uint::(value_size).unwrap() as u32) - } - EncodedValue::VALUE_FIELD => { - EncodedValue::Field(reader.read_uint::(value_size).unwrap() as u32) - } - EncodedValue::VALUE_METHOD => { - EncodedValue::Method(reader.read_uint::(value_size).unwrap() as u32) - } - EncodedValue::VALUE_ENUM => { - EncodedValue::Enum(reader.read_uint::(value_size).unwrap() as u32) - } - EncodedValue::VALUE_ARRAY => EncodedValue::Array(EncodedArray::read(reader).unwrap()), - EncodedValue::VALUE_ANNOTATION => { - EncodedValue::Annotation(EncodedAnnotation::read(reader).unwrap()) - } - EncodedValue::VALUE_NULL => EncodedValue::Null, - EncodedValue::VALUE_BOOLEAN => { - if (byte & 0xE0) == 0x00 { - EncodedValue::False - } else { - EncodedValue::True - } - } - _ => panic!( - "Unknown value type: {} with original byte {}", - value_type, byte - ), - }; - return Ok(value); - } -} - -// TODO -impl BinWrite for EncodedValue { - type Args<'a> = (); - fn write_options( - &self, - _: &mut W, - _: Endian, - _: Self::Args<'_>, - ) -> binrw::BinResult<()> { - todo!(); - } -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct EncodedArray { - /// the number of elements in this array - #[bw(calc = ULeb128(values.len() as u32))] - pub size: ULeb128, - - /// a series of size encoded_value byte sequences in the format specified by - /// this section, concatenated sequentially. - #[br(count = size.0)] - pub values: Vec, -} - -/// bytes representing the encoded array value -pub type EncodedArrayItem = EncodedArray; - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct EncodedTypeAddrPair { - /// index into the `type_ids` list for the type of the exception to catch - pub type_idx: ULeb128, - - /// bytecode address of the associated exception handler - pub addr: ULeb128, -} - -//TODO -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct EncodedCatchHandler { - /// number of catch types in this list. If non-positive, then this is the negative - /// of the number of catch types, and the catches are followed by a catch-all - /// handler. For example: A size of 0 means that there is a catch-all but no - /// explicitly typed catches. A size of 2 means that there are two explicitly typed - /// catches and no catch-all. And a size of -1 means that there is one typed catch - /// along with a catch-all. - pub size: SLeb128, - - /// stream of `abs(size)` encoded items, one for each caught type, in the order that - /// the types should be tested. - #[br(count = if size.0 != 0 { size.0.abs() } else { 0 })] - pub handlers: Vec, - - /// bytecode address of the catch-all handler. This element is only present if size - /// is non-positive. - #[br(if(size.0 <= 0))] - pub catch_all_addr: Option, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct EncodedCatchHandlerList { - /// the number of entries in this list - pub size: ULeb128, - - // elements of this list - // #[br(count = size.0 as usize)] - // pub list: Vec, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct EncodedField { - /// index into the `field_ids` list for the identity of this field (includes the - /// name and descriptor), represented as a difference from the index of - /// previous element in the list. The index of the first element in a list is - /// represented directly. - pub field_idx_diff: ULeb128, - - /// access flags for this field - pub access_flags: ULeb128, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct EncodedMethod { - /// index into the `method_ids` list for the identity of this method (includes the - /// name and descriptor), represented as a difference from the index of - /// previous element in the list. The index of the first element in a list is - /// represented directly. - pub method_idx_diff: ULeb128, - - /// access flags for this method - pub access_flags: ULeb128, - - /// offset from the start of the file to the code for this method - pub code_off: ULeb128, -} - -#[binrw] -#[brw(little, repr = u8)] -#[derive(Debug)] -pub enum AnnotationVisibility { - /// intended only to be visible at build time (e.g., during compilation of other code) - BUILD = 0x00, - - /// intended to visible at runtime - RUNTIME = 0x01, - - /// intended to visible at runtime, but only to the underlying system (and not to - /// regular user code) - SYSTEM = 0x02, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct AnnotationItem { - /// visibility of the annotation - pub visibility: AnnotationVisibility, - - /// encoded annotation contents - pub annotation: EncodedAnnotation, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct EncodedAnnotation { - /// type of the annotation. This must be a class (not array or - /// primitive) type. - pub type_idx: ULeb128, - - /// number of name-value mappings in this annotation - #[bw(calc = ULeb128(elements.len() as u32))] - pub size: ULeb128, - - /// elements of the annotation, represented directly in-line (not as - /// offsets). - #[br(count = size.0)] - pub elements: Vec, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct AnnotationElement { - /// element name, represented as an index into the `string_ids` section. - pub name_idx: ULeb128, - - /// element value - pub value: EncodedValue, -} diff --git a/src/dalvik/dex/header.rs b/src/dalvik/dex/header.rs deleted file mode 100644 index 5e10a65..0000000 --- a/src/dalvik/dex/header.rs +++ /dev/null @@ -1,279 +0,0 @@ -use binrw::binrw; -use openssl::sha; -use std::{ - io::{self}, - result, -}; - -use super::types::*; -use crate::dalvik::error::ConstraintError; - -/// The magic number for a DEX file represented as a byte array. It translates -/// to b'dex\n'. -pub const DEX_FILE_MAGIC: [UByte; 4] = [0x64, 0x65, 0x78, 0x0a]; - -/// Contains the structure of DEX_FILE_MAGIC. It must appear at the beginning -/// of a DEX file. Splitting the bytes in this way allows us to use the version -/// number as a u32. -#[binrw] -#[brw(little, magic = b"dex\n")] -#[derive(Debug)] -pub struct Magic { - /// The version of the DEX file. Use .version_num() to get the version - /// as a u32. - version: [UByte; 4], -} - -impl Magic { - /// Returns the version as a u32 - pub fn version_num(&self) -> result::Result { - // We assume the version is always 3 bytes and ends with a '\0' - let raw_version = &self.version[..3]; - return String::from_utf8_lossy(raw_version).parse(); - } -} - -/// Default endianness constant indicator -pub const ENDIAN_CONSTANT: UInt = 0x12345678; - -/// Reverse endianness constant indicator -/// -/// Files with this constant have performed byte-swapping. -pub const REVERSE_ENDIAN_CONSTANT: UInt = 0x78563421; - -/// The constant `NO_INDEX` is used to indicate that an index value is absent. -/// -/// Its value will be encoded as -1 using the ULeb128p1 encoding. -pub const NO_INDEX: UInt = 0xFFFFFFFF; - -/// SHA-1 signature size -pub const SIGNATURE_SIZE: usize = 20; - -/// Header item size -pub const HEADER_SIZE: usize = 0x70; - - - -/// Header item data structure -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct HeaderItem { - /// magic value - pub magic: Magic, - - /// Taken from Android docs: - /// - /// Adler32 checksum of the rest of the file (everything but `magic` and this - /// field); used to detect file corruption. - pub checksum: UInt, - - /// Android docs: - /// - /// SHA-1 signature (hash) of the rest of the file (everything but `magic`, - /// `checksum`, and this field); used to uniquely identify files. - pub signature: [UByte; 20], - - /// Size of the entire file including the header. - pub file_size: UInt, - - /// Size of the header (this struct), in bytes. It is always 0x70. - pub header_size: UInt, - - /// Endianness specification. - pub endian_tag: UInt, - - /// size of the link section, or 0 if this file isn't statically linked - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub link_size: UInt, - - /// offset from the start of the file to the link section, or `0` if - /// `link_size == 0`. The offset, if non-zero, should be to an offset - /// into the `link_data` section. - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub link_off: UInt, - - /// offset from the start of the file to the map item. The offset, which - /// must be non-zero, should be to an offset into the `data` section. - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub map_off: UInt, - - /// count of strings in the string identifiers list - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub string_ids_size: UInt, - - /// offset from the start of the file to the string identifiers list, or - /// `0` if `string_ids_size == 0`. - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub string_ids_off: UInt, - - /// count of elements in the type identifiers list, at most `65535` - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub type_ids_size: UInt, - - /// offset from the start of the file to the type identifiers list, or - /// `0` if `type_ids_size == 0`. - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub type_ids_off: UInt, - - /// count of elements in the proto identifiers list, at most `65535` - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub proto_ids_size: UInt, - - /// offset from the start of the file to the proto identifiers list, or - /// `0` if `proto_ids_size == 0`. - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub proto_ids_off: UInt, - - /// count of elements in the field identifiers list - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub field_ids_size: UInt, - - /// offset from the start of the file to the field identifiers list, or - /// `0` if `field_ids_size == 0`. - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub field_ids_off: UInt, - - /// count of elements in the method identifiers list - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub method_ids_size: UInt, - - /// offset from the start of the file to the method identifiers list, or - /// `0` if `method_ids_size == 0`. - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub method_ids_off: UInt, - - /// count of elements in the class definitions list - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub class_defs_size: UInt, - - /// offset from the start of the file to the class definitions list, or - /// `0` if `class_defs_size == 0`. - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub class_defs_off: UInt, - - /// size of the data section (in bytes) - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub data_size: UInt, - - /// offset from the start of the file to the data section - #[br(is_big = endian_tag == REVERSE_ENDIAN_CONSTANT)] - pub data_off: UInt, -} - -impl HeaderItem { - pub fn verify(&self, mut reader: R, offset: UInt) -> result::Result<(), ConstraintError> - where - R: io::Read + io::Seek, - { - if let Err(e) = reader.seek(io::SeekFrom::Start((offset + 12) as u64)) { - return Err(ConstraintError { - identifier: "io", - description: e.to_string(), - }); - } - - // Verification of the first contraints from the Android docs: - // - // G2: The checksum must be an Adler-32 checksum of the whole file contents - // except magic and checksum field. - let checksum = match adler32::adler32(&mut reader) { - Ok(x) => x, - Err(e) => { - return Err(ConstraintError { - identifier: "io", - description: e.to_string(), - }) - } - }; - - if checksum != self.checksum { - return Err(ConstraintError { - identifier: "G2", - description: format!("expected {}, got {}", self.checksum, checksum), - }); - } - - // G3: The signature must be a SHA-1 hash of the whole file contents except - // magic, checksum, and signature. - if let Err(e) = reader.seek(io::SeekFrom::Start((offset + 32) as u64)) { - return Err(ConstraintError { - identifier: "io", - description: e.to_string(), - }); - } - - let digest = { - let mut hasher = sha::Sha1::new(); - let mut buffer = [0u8; 1024]; - - loop { - let count = reader.read(&mut buffer).unwrap(); - if count == 0 { - break; - } - hasher.update(&buffer[..count]); - } - hasher.finish() - }; - - if digest != self.signature { - return Err(ConstraintError { - identifier: "G3", - description: format!("expected {:?}, got {:?}", self.signature, digest), - }); - } - - // G5: The header_size must be 0x70. - if self.header_size != 0x70 { - return Err(ConstraintError { - identifier: "G5", - description: format!("expected 0x70, got {}", self.header_size), - }); - } - - // G6: The endian_tag must have either the value: ENDIAN_CONSTANT or - // REVERSE_ENDIAN_CONSTANT - if self.endian_tag != 0x12345678 && self.endian_tag != 0x78563412 { - return Err(ConstraintError { - identifier: "G6", - description: format!( - "expected 0x12345678 or 0x78563412, got {:#x}", - self.endian_tag - ), - }); - } - - let values = vec![ - (self.link_size, self.link_off, "link"), - (self.string_ids_size, self.string_ids_off, "string_ids"), - (self.type_ids_size, self.type_ids_off, "type_ids"), - (self.proto_ids_size, self.proto_ids_off, "proto_ids"), - (self.field_ids_size, self.field_ids_off, "field_ids"), - (self.method_ids_size, self.method_ids_off, "method_ids"), - (self.class_defs_size, self.class_defs_off, "class_defs"), - (self.data_size, self.data_off, "data"), - ]; - for (v1, v2, name) in values { - if v1 != 0 && v2 == 0 { - return Err(ConstraintError { - identifier: "G7", - description: format!( - "expected non-zero offset for size {}, got 0 (sec: {})", - v1, name - ), - }); - } else if v1 == 0 && v2 != 0 { - return Err(ConstraintError { - identifier: "G7", - description: format!( - "expected 0 for offset, got non-zero offset {} (sec: {})", - v2, name - ), - }); - } - } - - return Ok(()); - } -} diff --git a/src/dalvik/dex/items.rs b/src/dalvik/dex/items.rs deleted file mode 100644 index 99842e8..0000000 --- a/src/dalvik/dex/items.rs +++ /dev/null @@ -1,531 +0,0 @@ -use super::encoded_value::{EncodedField, EncodedMethod}; -use super::{types::*, EncodedCatchHandlerList}; -use binrw::meta::{EndianKind, ReadEndian}; -use binrw::{binrw, BinRead, Endian}; -use std::io; - -/// A string identifier item stores the offset from the start of the file -/// to the string data. -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct StringIdItem { - /// offset from the start of the file to the string data of this - /// item. - pub offset: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct TypeIdItem { - /// index into the `string_ids` list for the descriptor string of this type. - pub descriptor_idx: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct ProtoIdItem { - /// index into the `string_ids` list for the shorty string of this prototype. - pub shorty_idx: UInt, - - /// index into the `type_ids` list for the return type of this prototype. - pub return_type_idx: UInt, - - /// offset from the start of the file to the parameters of this prototype. - pub parameters_off: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct FieldIdItem { - /// index into the `type_ids` list for the enclosing type of this field. - pub class_idx: UShort, - - /// index into the `type_ids` list for the type of this field. - pub type_idx: UShort, - - /// index into the `string_ids` list for the name of this field. - pub name_idx: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct MethodIdItem { - /// index into the `type_ids` list for the declaring class of this method. - pub class_idx: UShort, - - /// index into the `proto_ids` list for the prototype of this method. - pub proto_idx: UShort, - - /// index into the `string_ids` list for the name of this method. - pub name_idx: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct ClassDefItem { - /// index into the `type_ids` list for this class. - pub class_idx: UInt, - - /// access flags for this class. - pub access_flags: UInt, - - /// index into the `type_ids` list for the superclass of this class. The - /// value `NO_INDEX` may be used to indicate that this class has no - /// superclass. - pub superclass_idx: UInt, - - /// offset from the start of the file to the list of interfaces implemented - /// by this class or `0` if this class does not implement any interfaces. - pub interfaces_off: UInt, - - /// index to the `string_ids` list for the source file from which this - /// class was compiled. The value `NO_INDEX` may be used to indicate that - /// there is no source file information present. - pub source_file_idx: UInt, - - /// offset from the start of the file to the list of annotations for this - /// class or `0` if there are no annotations. - pub annotations_off: UInt, - - /// offset from the start of the file to the list of class data for this - /// class or `0` if there is no class data. - pub class_data_off: UInt, - - /// offset from the start of the file to the list of static values for this - /// class or `0` if there are no static values (initial values for static - /// fields). - pub static_values_off: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct CallSiteIdItem { - /// offset from the start of the file to the call site definition - pub call_side_off: UInt, -} - -/// An item storing string data. -#[derive(Debug)] -pub struct StringDataItem { - /// a series of MUTF-8 code units (a.k.a. octets, a.k.a. bytes) followed by a byte - /// of value 0. Use .mutf8 to decode the string data. - pub data: Option, -} - -impl ReadEndian for StringDataItem { - const ENDIAN: EndianKind = EndianKind::Runtime; -} - -impl BinRead for StringDataItem { - type Args<'a> = (); - fn read_options( - reader: &mut R, - _: Endian, - _: Self::Args<'_>, - ) -> binrw::BinResult { - Ok(Self { - data: Some(mutf8::read(reader)?), - }) - } -} - -#[binrw] -#[brw(repr(UShort), little)] -#[derive(Debug)] -pub enum MethodHandleType { - /// Method handle is a static field setter (accessor) - StaticPut = 0x00, - - /// Method handle is a static field getter (accessor) - StaticGet = 0x01, - - /// Method handle is a instance field setter (accessor) - InstancePut = 0x02, - - /// Method handle is a instance field getter (accessor) - InstanceGet = 0x03, - - /// Method handle is a static method invoke - StaticInvoke = 0x04, - - /// Method handle is an instance method invoke - InstanceInvoke = 0x05, - - /// Method handle is a constructor invoke - InvokeConstructor = 0x06, - - /// Method handle is an instance method invoke - InvokeDirect = 0x07, - - /// Method handle is an instance method invoke - InvokeStatic = 0x08, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct MethodHandleItem { - /// type of this method handle - #[brw(align_after = 4)] - pub method_handle_type: MethodHandleType, - - /// Field or method id depending on whether the method handle type is an - /// accessor or a method invoker - #[brw(align_after = 4)] - pub field_or_method_id: UShort, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct TypeItem { - /// index into the `type_ids` list for the type of this item - pub type_idx: UShort, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct TypeList { - /// the number of items in this list - pub size: UInt, - - /// elements of this list - #[br(count = size as usize)] - #[brw(align_after = 4)] - pub list: Vec, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct TryItem { - /// start address of the block of code covered by this entry. The - /// address is a count of 16-bit code units to the start of the - /// first covered instruction. - pub start_addr: UInt, - - /// number of 16-bit code units covered by this entry. The last - /// code unit covered (inclusive) is `start_addr + insn_count - 1`. - pub insn_count: UShort, - - /// offset in bytes from the start of the associated encoded_catch_hander_list - /// to the encoded_catch_handler for this entry. This must be an offset to the - /// start of an encoded_catch_handler. - pub handler_off: UShort, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct AnnotationSetItem { - /// the number of elements in this list - #[bw(calc = list.len() as u32)] - pub size: UInt, - - /// elements of this list - #[br(count = size)] - pub list: Vec, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct AnnotationOffItem { - /// offset from the start of the file to the annotation. - /// - /// @references: `annotation_item` - pub annotation_off: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct AnnotationSetRefList { - /// the number of items in this list - #[bw(calc = list.len() as u32)] - pub size: UInt, - - /// elements of this list - #[br(count = size)] - pub list: Vec, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct AnnotationSetRefItem { - /// offset from the start of the file to the annotations. - /// - /// @references: `annotation_set_item` - pub annotations_off: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct FieldAnnotation { - /// index into the `field_ids` list for the field - pub field_idx: UInt, - - /// offset from the start of the file to the annotations for the field. - /// - /// @references: `annotation_set_item` - pub annotations_off: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct MethodAnnotation { - /// index into the `method_ids` list for the method - pub method_idx: UInt, - - /// offset from the start of the file to the annotations for the method - /// - /// @references: `annotation_set_item` - pub annotations_off: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct ParameterAnnotation { - /// index into the `method_ids` list for the method - pub method_idx: UInt, - - /// offset from the start of the file to the annotations for the method - /// - /// @references: `annotation_set_ref_list` - pub annotations_off: UInt, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct AnnotationsDirectoryItem { - /// offset from the start of the file to the annotations made directly - /// on a class. - pub class_annotations_off: UInt, - - /// the number of fields annotated by this item - #[bw(calc = field_annotations.len() as u32)] - pub fields_size: UInt, - - /// the number of methods annotated by this item - #[bw(calc = method_annotations.len() as u32)] - pub annotated_methods_size: UInt, - - /// the number of parameters annotated by this item - #[bw(calc = parameter_annotations.len() as u32)] - pub annotated_parameters_size: UInt, - - /// list of associated field annotations. The elements of the list must - /// be sorted in increasing order, by `field_idx`. - #[br(count = fields_size)] - pub field_annotations: Vec, - - /// list of associated method annotations. The elements of the list must - /// be sorted in increasing order, by `method_idx`. - #[br(count = annotated_methods_size)] - pub method_annotations: Vec, - - /// list of associated parameter annotations. The elements of the list must - /// be sorted in increasing order, by `method_idx`. - #[br(count = annotated_parameters_size)] - pub parameter_annotations: Vec, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct CodeItem { - /// the number of registers pushed by this code - pub registers_size: UShort, - - /// the number of words of incoming arguments to the method that - /// this code is for - pub ins_size: UShort, - - /// the number of words of outgoing argument space - pub outs_size: UShort, - - /// the number of `TryItem` for this instance - pub tries_size: UShort, - - /// offset from the start of the file to the debug info (line - /// numbers + local variable info) sequence for this code, or `0` - /// if there simply is no information. - pub debug_info_off: UInt, - - /// size of the instructions list, in 16-bit code units - pub insns_size: UInt, - - /// actual array of bytecode. - #[br(count = insns_size * 2)] - pub insns: Vec, - - #[br(if(tries_size != 0))] - #[bw(if(*tries_size != 0))] - padding: Option, - - /// array indicating where in the code exceptions are caught and how - /// to handle them. - #[br(count = tries_size as usize)] - pub tries: Vec, - // bytes representing a list of lists of catch types and associated - // handler addresses. - #[br(if(tries_size != 0))] - // // #[bw(if(*tries_size != 0))] - pub handlers: Option, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct ClassDataItem { - /// the number of static fields in this item - #[bw(calc = ULeb128(static_fields.len() as u32))] - pub static_fields_size: ULeb128, - - /// the number of instance fields in this item - #[bw(calc = ULeb128(instance_fields.len() as u32))] - pub instance_fields_size: ULeb128, - - /// the number of direct methods in this item - #[bw(calc = ULeb128(direct_methods.len() as u32))] - pub direct_methods_size: ULeb128, - - /// the number of virtual methods in this item - #[bw(calc = ULeb128(virtual_methods.len() as u32))] - pub virtual_methods_size: ULeb128, - - /// the defined static fields, represented as a sequence of - /// encoded elements. The fields must be sorted by field_idx - /// in increasing order. - #[br(count = static_fields_size.0 as usize)] - pub static_fields: Vec, - - /// the defined instance fields, represented as a sequence of - /// encoded elements. The fields must be sorted by field_idx - /// in increasing order. - #[br(count = instance_fields_size.0 as usize)] - pub instance_fields: Vec, - - /// the defined direct methods, represented as a sequence of - /// encoded elements. The methods must be sorted by method_idx - /// in increasing order. - #[br(count = direct_methods_size.0 as usize)] - pub direct_methods: Vec, - - /// the defined virtual methods, represented as a sequence of - /// encoded elements. The methods must be sorted by method_idx - /// in increasing order. - #[br(count = virtual_methods_size.0 as usize)] - pub virtual_methods: Vec, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct HiddenAPIClassDataItem { - /// total size of the section - #[bw(calc = data.len() as u32)] - pub size: UInt, - - // array of offsets indexed by class_idx. A zero array entry at index class_idx - // means that either there is no data for this class_idx, or all hidden API flags - // are zero. Otherwise the array entry is non-zero and contains an offset from - // the beginning of the section to an array of hidden API flags for this class_idx. - // pub offsets: Vec, - - // concatenated arrays of hidden API flags for each class. Flags are encoded in - // the same order as fields and methods are encoded in class data. - // flags: Vec, - #[br(count = size as usize)] - pub data: Vec, -} - -impl HiddenAPIClassDataItem { - /// Interfaces that can be freely used and are supported as part of the officially - /// documented Android framework Package Index. - pub const FLAG_WHITELIST: UInt = 0x00; - - /// Non-SDK interfaces that can be used regardless of the application's target API - /// level. - pub const FLAG_GREYLIST: UInt = 0x01; - - /// Non-SDK interfaces that cannot be used regardless of the application's target API - /// level. Accessing one of these interfaces causes a runtime error. - pub const FLAG_BLACKLIST: UInt = 0x02; - - /// Non-SDK interfaces that can be used for Android 8.x and below unless they are - /// restricted. - pub const FLAG_GREYLIST_MAX_O: UInt = 0x03; - - /// Non-SDK interfaces that can be used for Android 9.x and below unless they are - /// restricted. - pub const FLAG_GREYLIST_MAX_P: UInt = 0x04; - - /// Non-SDK interfaces that can be used for Android 10.x and below unless they are - /// restricted. - pub const FLAG_GREYLIST_MAX_Q: UInt = 0x05; - - /// Non-SDK interfaces that can be used for Android 11.x and below unless they are - /// restricted. - pub const FLAG_GREYLIST_MAX_R: UInt = 0x06; -} - -// custom instruction payload data -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct FillArrayData { - /// number of bytes in each element - pub width: u16, - - /// number of elements in the table - pub size: u32, - - /// data values - #[br(count = (size * width as u32) as usize)] - pub data: Vec, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct PackedSwitch { - /// number of keys - pub size: u16, - - /// first key - pub first_key: i32, - - /// target offsets - #[br(count = size as usize)] - pub targets: Vec, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct SparseSwitch { - /// number of keys - pub size: u16, - - /// keys - #[br(count = size)] - pub keys: Vec, - - /// target offsets - #[br(count = size)] - pub targets: Vec, -} diff --git a/src/dalvik/dex/map_list.rs b/src/dalvik/dex/map_list.rs deleted file mode 100644 index 62ff664..0000000 --- a/src/dalvik/dex/map_list.rs +++ /dev/null @@ -1,181 +0,0 @@ -use binrw::binrw; - -use super::types::*; - -#[binrw] -#[brw(repr(UShort), little)] -#[derive(Debug, PartialEq, Eq)] -pub enum MapListItemType { - /// header item type - /// - /// @size: `0x70` - /// @type: [HeaderItem] - HeaderItem = 0x0000, - - /// string identifier item type - /// - /// @size: `0x04` - /// @type: [StringIdItem] - StringIdItem = 0x0001, - - /// type identifier item type - /// - /// @size: `0x04` - /// @type: [TypeIdItem] - TypeIdItem = 0x0002, - - /// prototype identifier item type - /// - /// @size: `0x0C` - /// @type: [ProtoIdItem] - ProtoIdItem = 0x0003, - - /// field identifier item type - /// - /// @size: `0x08` - /// @type: [FieldIdItem] - FieldIdItem = 0x0004, - - /// method identifier item type - /// - /// @size: `0x08` - /// @type: [MethodIdItem] - MethodIdItem = 0x0005, - - /// class definition item type - /// - /// @size: `0x0C` - /// @type: [ClassDefItem] - ClassDefItem = 0x0006, - - /// call site id item type - /// - /// @size: `0x08` - /// @type: [CallSiteIdItem] - CallSiteIdItem = 0x0007, - - /// method handle item type - /// - /// @size: `0x08` - /// @type: [MethodHandleItem] - MethodHandleItem = 0x0008, - - /// map list type - /// - /// @size: `4 + (item.size * 12)` - MapList = 0x1000, - - /// type list type - /// - /// @size: `4 + (item.size * 2)` - TypeList = 0x1001, - - /// annotation set ref list type - /// - /// @size: `4 + (item.size * 4)` - /// @type: [AnnotationSetRefList] - AnnotationSetRefList = 0x1002, - - /// annotation set item type - /// - /// @size: `4 + (item.size * 4)` - /// @type: [AnnotationSetItem] - AnnotationSetItem = 0x1003, - - /// class data item type - /// - /// @size: `0x08` - /// @type: [ClassDataItem] - ClassDataItem = 0x2000, - - /// code item type - /// - /// @size: _implicit_ - /// @type: [CodeItem](CodeItem) - CodeItem = 0x2001, - - /// string data item type - /// - /// @size: _implicit_ - /// @type: [StringDataItem](StringDataItem) - StringDataItem = 0x2002, - - /// debug info item type - /// - /// @size: _implicit_ - /// @type: [DebugInfoItem](DebugInfoItem) - DebugInfoItem = 0x2003, - - /// annotation item type - /// - /// @size: _implicit_ - /// @type: [AnnotationItem](AnnotationItem) - AnnotationItem = 0x2004, - - /// encoded array item type - /// - /// @size: _implicit_ - /// @type: [EncodedArrayItem](EncodedArrayItem) - EncodedArrayItem = 0x2005, - - /// annotations directory item type - /// - /// @size: _implicit_ - /// @type: [AnnotationsDirectoryItem](AnnotationsDirectoryItem) - AnnotationsDirectoryItem = 0x2006, - - /// hidden api list class data item type - /// - /// @size: _implicit_ - /// @type: [HiddenAPIClassDataItem](HiddenAPIClassDataItem) - HiddenApiListClassDataItem = 0xF000, -} - -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct MapListItem { - /// type of the item - #[br(align_after = 4)] - pub type_: MapListItemType, - - /// count of the number of items to be found at the indicated offset - pub size: UInt, - - /// offset from the start of the file to the item - pub offset: UInt, -} - -/// A map list is a list of the entire contents of a file, in order. -#[binrw] -#[brw(little)] -#[derive(Debug)] -pub struct MapList { - /// size of the list, in entries - // #[bw(calc = list.len() as u32)] - pub size: UInt, - - // elements of the list - #[br(count = size as usize)] - list: Vec, // MapListItem[this.size] -} - -impl MapList { - pub fn get(&self, type_: MapListItemType) -> Option<&MapListItem> { - self.list.iter().find(|&item| item.type_ == type_) - } - - pub fn item_size(&self, type_: MapListItemType) -> usize { - match self.get(type_) { - Some(item) => item.size as usize, - None => 0, - } - } - - pub fn item_offset(&self, type_: MapListItemType) -> usize { - match self.get(type_) { - Some(item) => item.offset as usize, - None => 0, - } - } -} diff --git a/src/dalvik/dex/mod.rs b/src/dalvik/dex/mod.rs deleted file mode 100644 index 44bafc7..0000000 --- a/src/dalvik/dex/mod.rs +++ /dev/null @@ -1,20 +0,0 @@ -pub mod types; -pub use types::*; - -pub mod header; -pub use header::*; - -pub mod map_list; -pub use map_list::*; - -pub mod encoded_value; -pub use encoded_value::*; - -pub mod items; -pub use items::*; - -pub mod debug_info; -pub use debug_info::*; - -pub mod dtype; -pub use dtype::*; diff --git a/src/dalvik/dex/types.rs b/src/dalvik/dex/types.rs deleted file mode 100644 index e0d72e3..0000000 --- a/src/dalvik/dex/types.rs +++ /dev/null @@ -1,366 +0,0 @@ -use binrw::meta::ReadEndian; -use binrw::{BinRead, BinWrite, Endian}; -use bitflags::bitflags; -use leb128; -use std::{io, result}; - -/// 8bit signed int -pub type Byte = i8; - -/// 8bit unsigned int -pub type UByte = u8; - -/// 16bit signed int -pub type Short = i16; - -/// 16bit unsigned int -pub type UShort = u16; - -/// 32bit signed int -pub type Int = i32; - -/// 32bit unsigned int -pub type UInt = u32; - -/// 64bit signed int -pub type Long = i64; - -/// 64bit unsigned int -pub type ULong = u64; - -/// SHA-1 signature type alias -pub type SHA1Signature = [UByte; 20]; - -bitflags! { - #[derive(Debug)] - pub struct AccessFlags: UInt { - const PUBLIC = 0x0001; - const PRIVATE = 0x0002; - const PROTECTED = 0x0004; - const STATIC = 0x0008; - const FINAL = 0x0010; - - /// associated lock automatically acquired around call - /// to this method. - /// - /// @note Only valid for methods. - const SYNCHRONIZED = 0x0020; - - /// special access rules to help with thread safety - /// - /// @note Only valid for fields. - const VOLATILE = 0x0040; - - /// bridge method, added automatically by compiler as a - /// type-safe bridge - /// - /// @note Only valid for methods. - const BRIDGE = 0x0040; - - /// not to be saved by default serialization - /// - /// @note Only valid for fields. - const TRANSIENT = 0x0080; - - /// last argument should be treated as a "rest" argument by compiler - /// - /// @note Only valid for methods. - const VARARGS = 0x0080; - - /// native method - /// - /// @note Only valid for methods. - const NATIVE = 0x0100; - - /// multiply-implementable abstract class - /// - /// @note Only valid for classes. - const INTERFACE = 0x0200; - - /// @note Only valid for classes and methods. - const ABSTRACT = 0x0400; - - /// @note Only valid for fields. - const STRICT = 0x0800; - const SYNTHETIC = 0x1000; - - /// @note Only valid for classes. - const ANNOTATION = 0x2000; - - /// @note Only valid for classes and fields. - const ENUM = 0x4000; - - /// mandated, the parameter is synthetic but also implied by the language specification - const MANDATED = 0x8000; // only fields - - /// @note Only valid for methods. - const CONSTRUCTOR = 0x10000; - - /// @note Only valid for methods. - const DECLARED_SYNCHRONIZED = 0x20000; - } -} - -/// signed LEB128, variable-length: -/// -/// Borrowed from the DWARF3 specification, Section 7.6, "Variable Length Data", -/// Android only uses it to encode 32bit entities. Therefore, the type here will -/// be i32. -#[derive(Debug)] -pub struct SLeb128(pub i32); - -impl BinRead for SLeb128 { - type Args<'a> = (); - - /// Read signed LEB128, variable-length into an i32 - fn read_options( - reader: &mut R, - _: Endian, - _: Self::Args<'_>, - ) -> result::Result { - // simply delegate to leb128 - return match leb128::read::signed(reader) { - Ok(x) => Ok(Self(x as i32)), - Err(e) => Err(binrw::Error::Io(io::Error::new( - io::ErrorKind::InvalidData, - e, - ))), - }; - } -} - -impl ReadEndian for SLeb128 { - const ENDIAN: binrw::meta::EndianKind = binrw::meta::EndianKind::None; -} - -impl BinWrite for SLeb128 { - type Args<'a> = (); - - /// Write signed LEB128, variable-length into an i32 - fn write_options( - &self, - writer: &mut W, - _: Endian, - _: Self::Args<'_>, - ) -> result::Result<(), binrw::Error> { - // simply delegate to leb128 - return match leb128::write::signed(writer, self.0 as i64) { - Ok(_) => Ok(()), - Err(e) => Err(binrw::Error::Io(io::Error::new( - io::ErrorKind::InvalidData, - e, - ))), - }; - } -} - -/// unsigned LEB128, variable-length -#[derive(Debug)] -pub struct ULeb128(pub u32); - -impl ReadEndian for ULeb128 { - const ENDIAN: binrw::meta::EndianKind = binrw::meta::EndianKind::None; -} - -impl BinRead for ULeb128 { - type Args<'a> = (); - - /// Read unsigned LEB128, variable-length into an u32 - fn read_options( - reader: &mut R, - _: Endian, - _: Self::Args<'_>, - ) -> result::Result { - // simply delegate to leb128 - return match leb128::read::unsigned(reader) { - Ok(x) => Ok(Self(x as u32)), - Err(e) => Err(binrw::Error::Io(io::Error::new( - io::ErrorKind::InvalidData, - e, - ))), - }; - } -} - -impl BinWrite for ULeb128 { - type Args<'a> = (); - - /// Write unsigned LEB128, variable-length into an u32 - fn write_options( - &self, - writer: &mut W, - _: Endian, - _: Self::Args<'_>, - ) -> result::Result<(), binrw::Error> { - // simply delegate to leb128 - return match leb128::write::unsigned(writer, self.0 as u64) { - Ok(_) => Ok(()), - Err(e) => Err(binrw::Error::Io(io::Error::new( - io::ErrorKind::InvalidData, - e, - ))), - }; - } -} - -/// unsigned LEB128 plus 1, variable-length -/// -/// This is used for LEB128p1 in Android, which is used to encode a number -/// using uleb128 + 1. -#[derive(Debug)] -pub enum ULeb128p1 { - Pos(u32), - Neg, -} - -impl ReadEndian for ULeb128p1 { - const ENDIAN: binrw::meta::EndianKind = binrw::meta::EndianKind::None; -} - -impl BinRead for ULeb128p1 { - type Args<'a> = (); - - /// Read unsigned LEB128p1, variable-length into an i32 - fn read_options( - reader: &mut R, - _: Endian, - _: Self::Args<'_>, - ) -> result::Result { - // simply delegate to leb128 - return match leb128::read::unsigned(reader) { - Ok(x) => match x { - 0 => Ok(Self::Neg), - _ => Ok(Self::Pos((x - 1) as u32)), - }, - Err(e) => Err(binrw::Error::Io(io::Error::new( - io::ErrorKind::InvalidData, - e, - ))), - }; - } -} - - - -impl BinWrite for ULeb128p1 { - type Args<'a> = (); - - /// Write unsigned LEB128p1, variable-length into an i32 - fn write_options( - &self, - writer: &mut W, - _: Endian, - _: Self::Args<'_>, - ) -> result::Result<(), binrw::Error> { - match self { - ULeb128p1::Pos(x) => match leb128::write::unsigned(writer, *x as u64 + 1) { - Ok(_) => Ok(()), - Err(e) => Err(binrw::Error::Io(io::Error::new( - io::ErrorKind::InvalidData, - e, - ))), - }, - ULeb128p1::Neg => match leb128::write::unsigned(writer, 0) { - Ok(_) => Ok(()), - Err(e) => Err(binrw::Error::Io(io::Error::new( - io::ErrorKind::InvalidData, - e, - ))), - }, - } - } -} - - - -pub mod mutf8 { - use std::io::{self, Read, Seek}; - - - /// # Modified UTF-8 encoding - /// - /// Taken from Android docs: As a concession to easier legacy support, the `.dex` - /// format encodes its string data in a de facto standard modified UTF-8 form, - /// hereafter referred to as MUTF-8. This form is identical to standard UTF-8 - /// with a few modifications: - /// - /// - Only the one-, two-, and three-byte encodings are used. - /// - Code points in the range `U+10000 ... U+10ffff` are encoded as a surrogate - /// pair, each of which is represented as a three-byte encoded value. - /// - The code point U+0000 is encoded in two-byte form. - /// - A plain null byte (value 0) indicates the end of a string, as is the standard - /// C language interpretation. - /// - /// The first two items above can be summarized as: MUTF-8 is an encoding format for - /// UTF-16, instead of being a more direct encoding format for Unicode characters. - pub fn read(reader: &mut R) -> io::Result - where - R: Read + Seek, - { - let len = match leb128::read::unsigned(reader) { - Ok(x) => x as usize, - Err(e) => return Err(io::Error::new(io::ErrorKind::InvalidData, e)), - }; - let mut buf = [0]; - let mut out: Vec = Vec::with_capacity(len); - let mut k: usize = len; - while k > 0 { - reader.read(&mut buf)?; - let byte = buf[0]; - // (4) A plain null byte (value 0) indicates the end of a string, as is the - // standard C language interpretation. - if byte == 0 { - break; - } - - let out_val: u16 = match byte >> 4 { - 0x00..=0x07 => { - // 0xxx xxxx - byte as u16 - } - 0x0C | 0x0D => { - // 110x xxxx - reader.read(&mut buf)?; - let next = buf[0]; - if (next & 0xC0) != 0x80 { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "Bad second character!", - )); - } - (((byte & 0x1F) << 6) | (next & 0x3F)) as u16 - } - 0x0E => { - // 1110 xxxx - reader.read(&mut buf)?; - let b = buf[0]; - if (b & 0xC0) != 0x80 { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "Bad second character!", - )); - } - reader.read(&mut buf)?; - let c = buf[0]; - if (c & 0xC0) != 0x80 { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "Bad third character!", - )); - } - // REVISIT: rust can't handle surrogates - (((byte as u16) & 0x0F) << 12) | (b as u16 & 0x3F) << 6 | (c as u16 & 0x3F) - } - _ => { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - format!("Bad character: {:#x}", byte), - )); - } - }; - out.push(out_val); - k -= 1; - } - return Ok(String::from_utf16_lossy(out.as_ref())); - } -} diff --git a/src/dalvik/error.rs b/src/dalvik/error.rs deleted file mode 100644 index 3b4265d..0000000 --- a/src/dalvik/error.rs +++ /dev/null @@ -1,50 +0,0 @@ -use std::{io, result}; - -#[derive(Debug)] -pub struct ConstraintError { - pub identifier: &'static str, - pub description: String, -} - -pub type Result = result::Result; - -#[derive(Debug)] -pub enum Error { - IO(io::Error), - Parse(binrw::Error), - Custom(&'static str), - Validation(ConstraintError), - InvalidData(String), - - // - InvalidOffset(isize), - InvalidIndex(usize), - MalformedDescriptor(String), - MethodNotFound(usize), - FieldNotFound(usize), - ParameterNotFound(usize), -} - -impl From for Error { - fn from(e: io::Error) -> Self { - Error::IO(e) - } -} - -impl From for Error { - fn from(e: binrw::Error) -> Self { - Error::Parse(e) - } -} - -impl From for Error { - fn from(e: ConstraintError) -> Self { - Error::Validation(e) - } -} - -impl From for Error { - fn from(e: std::fmt::Error) -> Self { - Error::InvalidData(e.to_string()) - } -} \ No newline at end of file diff --git a/src/dalvik/file/annotation.rs b/src/dalvik/file/annotation.rs deleted file mode 100644 index e368182..0000000 --- a/src/dalvik/file/annotation.rs +++ /dev/null @@ -1,98 +0,0 @@ -use crate::dalvik::dex::{ - AnnotationItem, AnnotationSetItem, AnnotationVisibility, DexType, EncodedAnnotation, -}; -use crate::dalvik::error::Result; - -use super::{Dex, DexValue, IDexRef}; -use binrw::{io, BinRead}; -use std::collections::HashMap; -use std::io::{Read, Seek}; -use std::rc::Rc; - -#[derive(Debug)] -pub struct DexAnnotation { - /// The referenced annotation type displayed as a shared reference - /// to the [DexType]. - /// - /// This value will never be null and an error will be reported if no - /// type is associated with the parsed annotation. - pub type_: Rc, - - /// The visibility of the annotation. - pub visibility: Option, - - /// A key-value mapping binding the values of an annotation to their - /// specified values. The key is a reference to the string pool of the - /// DEX file and the value must be an instance of [DexValue], which - /// may contain a reference to another object in the DEX file. - pub values: HashMap, DexValue>, -} - -impl DexAnnotation { - pub fn is_subannotation(&self) -> bool { - // Subannotations are parsed from encoded values that don't specify - // a visibility. - self.visibility.is_none() - } - - /// Reads an annotation from the given reader and returns it. - /// - /// @**Note**: This function assumes that the reader points to the - /// start of an [AnnotationItem] object in the DEX file. - pub fn read(dex: &mut Dex<'_, R>) -> Result - where - R: Read + Seek, - { - let annotation_item = AnnotationItem::read(dex.fd)?; - let mut annotation = DexAnnotation::from_encoded(&annotation_item.annotation, dex)?; - annotation.visibility = Some(annotation_item.visibility); - Ok(annotation) - } - - /// Decodes an encoded annotation object and returns a [DexAnnotation] object - /// storing all resolved string and type references. - pub fn from_encoded(encoded_annotation: &EncodedAnnotation, dex: IDexRef<'_>) -> Result { - let mut annotation = DexAnnotation { - type_: dex.get_type(encoded_annotation.type_idx.0)?.clone(), - values: HashMap::with_capacity(encoded_annotation.elements.len()), - visibility: None, - }; - - for element in &encoded_annotation.elements { - let value = DexValue::from(&element.value, dex)?; - annotation - .values - .insert(dex.get_string(element.name_idx.0)?, value); - } - Ok(annotation) - } - - /// expects an [AnnotationSetItem] to be at reader's current position - pub fn read_set(dex: &mut Dex<'_, R>) -> Result> - where - R: Read + Seek, - { - // REVISIT: - let mut annotations: Vec = Vec::new(); - DexAnnotation::read_set_into(dex, &mut annotations)?; - Ok(annotations) - } - - pub fn read_set_into(dex: &mut Dex<'_, R>, target: &mut Vec) -> Result<()> - where - R: Read + Seek, - { - AnnotationSetItem::read(dex.fd)? - .list - .iter() - .try_for_each(|x| { - dex.fd.seek(io::SeekFrom::Start(x.annotation_off as u64))?; - target.push(DexAnnotation::read(dex)?); - Ok(()) - }) - } - - pub fn get(&self, name: &String) -> Option<&DexValue> { - self.values.get(name) - } -} diff --git a/src/dalvik/file/class_def.rs b/src/dalvik/file/class_def.rs deleted file mode 100644 index 01d8b4b..0000000 --- a/src/dalvik/file/class_def.rs +++ /dev/null @@ -1,356 +0,0 @@ -use crate::dalvik::{ - dex::*, - error::{Error, Result}, -}; - -use binrw::BinRead; -use byteorder::{LittleEndian, ReadBytesExt}; -use std::{ - collections::{btree_map::Values, BTreeMap}, - fmt::Debug, - io::{Read, Seek}, - rc::Rc, -}; - -use super::{ - annotation::DexAnnotation, field::DexField, lazy_file::Dex, method::*, DexValue, IDex, -}; - -#[derive(Debug)] -pub struct DexClassDef { - pub identity: u32, - /// The type reference storing the package name and the simple - /// name of this class. - pub type_: Rc, - - /// Same as [DexMethod] and [DexField], access flags of this class are - /// stored as a single [AccessFlags] instance. Use [AccessFlags::iter] - /// to retrieve all matched access flags. - pub flags: Option, - - /// Optional reference to the superclass of this class. - pub super_class: Option>, - - /// List of interfaces implemented by this class. - pub interfaces: Vec>, - - /// Optional debug information which lists the source file name. - pub source_file: Option>, - - /// List of annotations associated with this class or empty if - /// none were specified. - pub annotations: Vec, - - /// List of static fields defined in this class. - static_fields: BTreeMap, - - /// List of instance fields defined in this class. - instance_fields: BTreeMap, - - /// List of direct methods defined in this class. - direct_methods: BTreeMap, - - /// List of virtual methods defined in this class. - virtual_methods: BTreeMap, -} - -impl DexClassDef { - pub fn new(dex: &mut Dex<'_, R>, index: u32) -> Result { - let class_def_item = ClassDefItem::read(dex.fd)?; - let mut class_def = DexClassDef { - identity: index, - type_: dex.get_type(class_def_item.class_idx)?, - flags: AccessFlags::from_bits(class_def_item.access_flags), - super_class: None, - // dynamic: will be added in #process_definition - interfaces: Vec::new(), - source_file: None, - // annotations: will be added in #process_annotations - annotations: Vec::new(), - // will be added after #process_definition - static_fields: BTreeMap::new(), - instance_fields: BTreeMap::new(), - direct_methods: BTreeMap::new(), - virtual_methods: BTreeMap::new(), - }; - - class_def.process_definition(&class_def_item, dex)?; - if class_def_item.class_data_off != 0 { - dex.seeks(class_def_item.class_data_off as u64)?; - let class_data = ClassDataItem::read(dex.fd)?; - - // process fields and methods - class_def.process_fields(&class_data, dex)?; - class_def.process_methods(&class_data, dex)?; - - // lastly, identify possible static values - class_def.process_init_values(&class_def_item, &class_data, dex)?; - } - - // annotations are parsed regardless of class_data_off - class_def.process_annotations(&class_def_item, dex)?; - Ok(class_def) - } - - /* private impl */ - - fn process_definition(&mut self, def: &ClassDefItem, dex: &mut Dex<'_, R>) -> Result<()> - where - R: Read + Seek, - { - if def.superclass_idx != NO_INDEX { - self.super_class = Some(dex.get_type(def.superclass_idx)?); - } - - if def.interfaces_off != 0 { - /*Format: - ┌─────────────────────┐ ┌────────────────────────┐ - │ TypeList │ ┌──►│ TypeIdItem │ - ├─────────────────────┤ │ ├────────────────────────┤ - │ items: TypeIdItem[] ├─────┘ │ type_idx: u16 ├──────┐ - └─────────────────────┘ └────────────────────────┘ │ - │ - ┌────────────────────────┐ │ - │ StringIdItem │◄─────┘ - ├────────────────────────┤ - │ descriptor_idx: u32 ├──────┐ - └────────────────────────┘ │ - │ - ┌────────────────────────┐ │ - │ StringDataItem │◄─────┘ - ├────────────────────────┤ - │ data: u8[] │ - └────────────────────────┘ - */ - dex.seeks(def.interfaces_off as u64)?; - let types = TypeList::read(dex.fd)?; - for type_item in &types.list { - self.interfaces - .push(dex.get_type(type_item.type_idx as u32)?); - } - } - - if def.source_file_idx != NO_INDEX { - self.source_file = Some(dex.get_string(def.source_file_idx)?); - } - Ok(()) - } - - fn process_fields(&mut self, data: &ClassDataItem, dex: &mut Dex<'_, R>) -> Result<()> - where - R: Read + Seek, - { - // TODO: explain - macro_rules! _process { - ($attr:ident) => { - let mut i = 0; - for encoded_field in &data.$attr { - let field = DexField::build(dex, encoded_field, i)?; - i += encoded_field.field_idx_diff.0; - self.$attr.insert(field.identity, field); - } - }; - } - _process!(static_fields); - _process!(instance_fields); - Ok(()) - } - - fn process_methods(&mut self, data: &ClassDataItem, dex: &mut Dex<'_, R>) -> Result<()> - where - R: Read + Seek, - { - // TODO: explain - macro_rules! _process { - ($attr:ident) => { - let mut i = 0; - for encoded_method in &data.$attr { - let method = DexMethod::build(dex, encoded_method, i)?; - i += encoded_method.method_idx_diff.0; - self.$attr.insert(method.identity, method); - } - }; - } - _process!(direct_methods); - _process!(virtual_methods); - Ok(()) - } - - fn process_annotations(&mut self, def: &ClassDefItem, dex: &mut Dex<'_, R>) -> Result<()> - where - R: Read + Seek, - { - if def.annotations_off == 0 { - return Ok(()); - } - - /*Format: - ┌──────────────────────────────────────────────┐ - │ AnnotationsDirectoryItem │ - ├──────────────────────────────────────────────┤ - │ cls_annotations_off: u32 (ref to set) │ - │ method_annotations: MethodAnnotation[] │ - │ field_annotations: FieldAnnotation[] │ - │ parameter_annotations: ParameterAnnotation[] │ - └──────────────────────────────────────────────┘ - */ - dex.seeks(def.annotations_off as u64)?; - let directory_item = AnnotationsDirectoryItem::read(dex.fd)?; - - if directory_item.class_annotations_off != 0 { - // parse class annotations (REVISIT: maybe inspect them directly?) - dex.seeks(directory_item.class_annotations_off as u64)?; - DexAnnotation::read_set_into(dex, &mut self.annotations)?; - } - - macro_rules! iter_annotations { - ($attr:ident, $sattr:ident, $mth:ident, $error:ident) => { - for _a in &directory_item.$attr { - if _a.annotations_off == 0 { - continue; // ignore irrelevant items - } - - let item = self.$mth(_a.$sattr as u32); - // REVISIT: what should we do in case of error? - if item.is_none() { - return Err(Error::$error(_a.$sattr as usize)); - } - - dex.seeks(_a.annotations_off as u64)?; - DexAnnotation::read_set_into(dex, &mut item.unwrap().annotations)?; - } - }; - } - - iter_annotations!( - method_annotations, - method_idx, - get_method_mut, - MethodNotFound - ); - iter_annotations!(field_annotations, field_idx, get_field_mut, FieldNotFound); - - // parameters are handled differently: - let mut param_idx = 0; - let mut method_idx = 0; - for param_annotation in &directory_item.parameter_annotations { - // if the method index differs from the previous one, the parameter index - // needs to be reset - if method_idx != param_annotation.method_idx { - method_idx = param_annotation.method_idx; - param_idx = 0; - } - - let method = self.get_method_mut(method_idx); - if method.is_none() { - return Err(Error::MethodNotFound(method_idx as usize)); - } - - let parameter = method.unwrap().parameters.get_mut(param_idx as usize); - if parameter.is_none() { - return Err(Error::ParameterNotFound(param_idx as usize)); - } - - dex.seeks(param_annotation.annotations_off as u64)?; - parameter.unwrap().read_annotations(dex)?; - } - - Ok(()) - } - - fn process_init_values( - &mut self, - def: &ClassDefItem, - class_data: &ClassDataItem, - dex: &mut Dex<'_, R>, - ) -> Result<()> - where - R: Read + Seek, - { - if def.static_values_off == 0 { - return Ok(()); - } - - dex.seeks(def.static_values_off as u64)?; - let data = EncodedArray::read(&mut dex.fd)?; - if data.values.len() > self.static_fields.len() { - return Err(Error::InvalidData("Too many static values".to_string())); - } - - let mut diff = 0; - for (i, value) in data.values.iter().enumerate() { - let field_item = &class_data.static_fields[i]; - let idx = field_item.field_idx_diff.0 + diff; - let field = self.static_fields.get_mut(&idx).unwrap(); - field.init_value = Some(DexValue::from(value, dex)?); - - diff += field_item.field_idx_diff.0; - } - - Ok(()) - } -} - -macro_rules! _at { - ($name:ident, $attr:ident, $type:ty) => { - pub fn $name(&self, index: u32) -> Option<&$type> { - if index >= self.$attr.len() as u32 { - return None; - } - - let pos = if index == 0 { 0 } else { index - 1 } as usize; - self.$attr.values().skip(pos).next() - } - }; - - ($name:ident, $attr:ident, $attr_fallback:ident, $($type:tt)*) => { - pub fn $name(&mut self, index: u32) -> Option<&$($type)*> { - self.$attr - .get_mut(&index) - .or_else(|| self.$attr_fallback.get_mut(&index)) - } - } -} - -/* public API */ -impl DexClassDef { - _at!(get_method_mut, direct_methods, virtual_methods, mut DexMethod); - _at!(get_field_mut, static_fields, instance_fields, mut DexField); - _at!(get_direct_method, direct_methods, DexMethod); - _at!(get_virtual_method, virtual_methods, DexMethod); - _at!(get_static_field, static_fields, DexField); - _at!(get_instance_field, instance_fields, DexField); - - pub fn get_direct_methods(&self) -> Values { - self.direct_methods.values() - } - - pub fn get_virtual_methods(&self) -> Values { - self.virtual_methods.values() - } - - pub fn get_methods(&self) -> impl Iterator { - self.direct_methods - .iter() - .chain(self.virtual_methods.iter()) - } - - pub fn get_static_fields(&self) -> Values { - self.static_fields.values() - } - - pub fn get_instance_fields(&self) -> Values { - self.instance_fields.values() - } - - pub fn get_fields(&self) -> impl Iterator { - self.static_fields - .iter() - .chain(self.instance_fields.iter()) - } -} - - - - - diff --git a/src/dalvik/file/debug.rs b/src/dalvik/file/debug.rs deleted file mode 100644 index 4e4bc83..0000000 --- a/src/dalvik/file/debug.rs +++ /dev/null @@ -1,243 +0,0 @@ -use std::{ - collections::HashMap, - io::{Read, Seek}, - rc::Rc, -}; - -use binrw::BinRead; - -use crate::dalvik::{ - dex::*, - error::{Error, Result}, - file::{method::DexPrototype, Dex, IDex}, -}; - -#[derive(Debug)] -pub struct LocalVariable { - pub register_num: UInt, - pub name: Option>, - pub type_: Option>, - pub signature: Option>, - pub start_pc: UInt, - pub end_pc: UInt, - pub parameter: bool, -} - -#[derive(Debug)] -pub struct DebugInfo { - // A table that maps each instruction offset to its line number. - pub lines: HashMap, - - // A list of all defined local variables mapped to their instruction offsets. - pub local_variables: HashMap, - - // The name of the source file containing the code. - pub source_file: Option>, -} - -impl DebugInfoItem { - pub fn parse_debug_info( - &self, - code: &CodeItem, - dex: &mut Dex<'_, R>, - prototype: &DexPrototype, - ) -> Result - where - R: Read + Seek, - { - let mut file: Option> = None; - let mut lines: HashMap = HashMap::new(); - let mut local_variables: HashMap = HashMap::new(); - let mut buf = [0u8; 1]; - - let mut pc = 0; - let mut line: i64 = self.line_start.0 as i64; - let mut regs: Vec> = Vec::with_capacity(code.registers_size as usize); - for _ in 0..code.registers_size { - regs.push(None); - } - - macro_rules! ulebp1_unwrap { - ($idx:ident, $func:ident, $ty:ident) => { - if let ULeb128p1::Pos(pos) = $idx { - Some(dex.$func(pos as $ty)?) - } else { - None - } - }; - } - - let mut i = code.registers_size as isize; - for (p_type, idx) in prototype - .parameters - .iter() - .zip(self.parameter_names.iter()) - .rev() - { - i -= match &(p_type.descriptor)[..] { - "D" | "J" => 2, - _ => 1, - }; - if i < 0 { - break; - } - regs[i as usize] = Some(LocalVariable { - register_num: i as u32, - name: if let ULeb128p1::Pos(pos) = idx { - Some(dex.get_string(*pos)?) - } else { - None - }, - type_: Some(p_type.clone()), - signature: None, - start_pc: 0, - end_pc: 0, - parameter: true, - }); - } - - macro_rules! start_var { - // starts a new local variable by removing any previously defined local variable - // from the target register. - ($reg:ident, $var:ident) => { - if let Some(mut prev_var) = regs[$reg.0 as usize].take() { - prev_var.end_pc = pc; - local_variables.insert(prev_var.start_pc, prev_var); - } - regs[$reg.0 as usize] = Some($var); - }; - } - - macro_rules! end_var { - ($reg:ident) => { - if let Some(mut var) = regs[$reg.0 as usize].take() { - var.end_pc = pc; - local_variables.insert(var.start_pc, var); - } - regs[$reg.0 as usize] = None; - }; - } - - loop { - if dex.fd.read(&mut buf)? != 1 { - return Err(Error::Custom("Unexpected EOF")); - }; - let c = buf[0]; - - match c { - // terminates a debug info sequence for a code_item - DebugInfoItem::DBG_END_SEQUENCE => break, - - // advances the address register without emitting a positions entry - DebugInfoItem::DBG_ADVANCE_PC => { - let addr_diff = ULeb128::read(dex.fd)?; - pc += addr_diff.0; - } - - // advances the line register without emitting a positions entry - DebugInfoItem::DBG_ADVANCE_LINE => { - let line_diff = SLeb128::read(dex.fd)?; - line += line_diff.0 as i64; - } - - // introduces a local variable at the current address. Either name_idx - // or type_idx may be NO_INDEX to indicate that that value is unknown. - DebugInfoItem::DBG_START_LOCAL => { - let register_num = ULeb128::read(dex.fd)?; - let name_idx = ULeb128p1::read(dex.fd)?; - let type_idx = ULeb128p1::read(dex.fd)?; - let var = LocalVariable { - register_num: register_num.0, - name: ulebp1_unwrap!(name_idx, get_string, u32), - type_: ulebp1_unwrap!(type_idx, get_type, u32), - signature: None, - start_pc: pc, - end_pc: 0, - parameter: false, - }; - start_var!(register_num, var); - } - - // introduces a local with a type signature at the current address. Any of - // name_idx, type_idx, or sig_idx may be NO_INDEX to indicate that that - // value is unknown. (If sig_idx is -1, though, the same data could be - // represented more efficiently using the opcode DBG_START_LOCAL.) - DebugInfoItem::DBG_START_LOCAL_EXTENDED => { - let register_num = ULeb128::read(dex.fd)?; - let name_idx = ULeb128p1::read(dex.fd)?; - let type_idx = ULeb128p1::read(dex.fd)?; - let sig_idx = ULeb128p1::read(dex.fd)?; - let var = LocalVariable { - register_num: register_num.0, - name: ulebp1_unwrap!(name_idx, get_string, u32), - type_: ulebp1_unwrap!(type_idx, get_type, u32), - signature: ulebp1_unwrap!(sig_idx, get_string, u32), - start_pc: pc, - end_pc: 0, - parameter: false, - }; - start_var!(register_num, var); - } - - // marks a currently-live local variable as out of scope at the current address - DebugInfoItem::DBG_END_LOCAL => { - let register_num = ULeb128::read(dex.fd)?; - end_var!(register_num); - } - - // re-introduces a local variable at the current address. The name and type are - // the same as the last local that was live in the specified register. - DebugInfoItem::DBG_RESTART_LOCAL => { - let register_num = ULeb128::read(dex.fd)?; - if let Some(var) = regs[register_num.0 as usize].take() { - let new_var = LocalVariable { - register_num: var.register_num, - name: var.name.clone(), - type_: var.type_, - signature: var.signature.clone(), - start_pc: pc, - end_pc: 0, - parameter: false, - }; - start_var!(register_num, new_var); - } - } - - // ignore those states as they don't contribute to the debug info - DebugInfoItem::DBG_SET_PROLOGUE_END | DebugInfoItem::DBG_SET_EPILOGUE_BEGIN => { - // ignore - } - - // indicates that all subsequent line number entries make reference to this source - // file name, instead of the default name specified in code_item - DebugInfoItem::DBG_SET_FILE => { - if let ULeb128p1::Pos(file_idx) = ULeb128p1::read(dex.fd)? { - file = Some(dex.get_string(file_idx)?); - } - } - - 0x0A..=0xFF => { - // special opcodes - let adjusted_opcode = c - DebugInfoItem::DBG_FIRST_SPECIAL; - - line += DebugInfoItem::DBG_LINE_BASE as i64 - + ((adjusted_opcode % DebugInfoItem::DBG_LINE_RANGE) as i64); - - pc += (adjusted_opcode / DebugInfoItem::DBG_LINE_RANGE) as u32; - lines.insert(pc, line as ULong); - } - } - } - - for mut var in regs.into_iter().flatten() { - var.end_pc = pc; - local_variables.insert(var.start_pc, var); - } - - Ok(DebugInfo { - lines, - local_variables, - source_file: file, - }) - } -} diff --git a/src/dalvik/file/field.rs b/src/dalvik/file/field.rs deleted file mode 100644 index 278c261..0000000 --- a/src/dalvik/file/field.rs +++ /dev/null @@ -1,50 +0,0 @@ -use crate::dalvik::dex::{AccessFlags, DexType, EncodedField}; -use crate::dalvik::error::Result; - -use super::annotation::DexAnnotation; -use super::{DexValue, IDexRef}; -use std::rc::Rc; - -#[derive(Debug)] -pub struct DexField { - pub identity: u32, - - /// The declaring class of this field in the DEX file stored as - /// a type reference. - pub class: Rc, - - /// The name of the field - pub name: Rc, - - /// The type of the field (may be primitive, class or array type) - pub type_: Rc, - - /// list of annotations associated with this field (optional) - pub annotations: Vec, - - /// The access flags for this field organized as a single [AccessFlags] - /// instance. - pub access_flags: Option, - - /// Stores the initial value for this field. This field is only - /// present if a static initializer has been declared for this - /// field. - pub init_value: Option, -} - -impl DexField { - pub fn build(dex: IDexRef<'_>, field: &EncodedField, prev_diff: u32) -> Result { - let index = field.field_idx_diff.0 + prev_diff; - let field_item = dex.get_field(index)?; - Ok(DexField { - type_: dex.get_type(field_item.type_idx as u32)?, - class: dex.get_type(field_item.class_idx as u32)?, - name: dex.get_string(field_item.name_idx)?, - access_flags: AccessFlags::from_bits(field.access_flags.0), - identity: index, - // Annotations and the initial value will be added later on - annotations: Vec::new(), - init_value: None, - }) - } -} diff --git a/src/dalvik/file/lazy_file.rs b/src/dalvik/file/lazy_file.rs deleted file mode 100644 index c60893f..0000000 --- a/src/dalvik/file/lazy_file.rs +++ /dev/null @@ -1,375 +0,0 @@ -use crate::dalvik::{ - dex::*, - error::{Error, Result}, -}; - -use binrw::BinRead; -use std::{ - collections::{btree_map::Entry::Vacant, BTreeMap}, - fmt::Debug, - io::{self, Read, Seek}, - rc::Rc, -}; - -use super::{method::DexPrototype, DexClassDef, IDex}; - -type Pool = BTreeMap>; - -#[derive(Debug)] -pub struct Dex<'a, R: Read + Seek> { - pub(super) fd: &'a mut R, - - /// ## Dex Header - /// All publicly available header information are stored in this field - /// and should not be modified. They can be used to parse the desired - /// section, although that is not recommended. - pub header: HeaderItem, - - // Internal fields to provide fast access to method handles and call sites - method_handles_size: u32, - method_handles_off: u32, - call_sites_size: u32, - call_sites_off: u32, - - /// All types defined by a DEX file parsed from the map list. Note that - /// types can be retrieved by providing the referenced index value using - /// `.type_at(index)`. - types: Pool, - - /// ## String Table - /// The map is allocated by the number of string id items defined in - /// the map list for [StringIdItem]. All strings used throughout the other - /// objects are referencing objects within this vector. - strings: Pool, - - /// ## Method Prototypes - /// Internal list of all prototypes defined within the DEX file. - protos: Pool, - - // ## Field items - fields: Pool, - - // ## Method items - methods: Pool, - - // Internal fields to provide fast access to method handles and call sites - methods_handles: Pool, - call_sites: Pool, - classes: Pool, -} - -macro_rules! check_index { - ($index: expr, item_size=$item_size: expr, $size: expr, $offset: expr) => {{ - let _offset = $offset + $index * $item_size; - if _offset >= ($size * $item_size) + $offset { - return Err(Error::InvalidIndex($index as usize)); - } - _offset - }}; -} - -impl<'b, R: Read + Seek> Dex<'b, R> { - // fundamental seek methods - pub(super) fn seeks(&mut self, offset: u64) -> Result<()> { - self.fd.seek(io::SeekFrom::Start(offset))?; - Ok(()) - } - - #[allow(dead_code)] - pub(super) fn seekc(&mut self, offset: i64) -> Result<()> { - self.fd.seek(io::SeekFrom::Current(offset))?; - Ok(()) - } - - #[allow(dead_code)] - pub(super) fn seeke(&mut self, offset: i64) -> Result<()> { - self.fd.seek(io::SeekFrom::End(offset))?; - Ok(()) - } - - pub fn read(mut reader: &mut R, verify: bool) -> Result> - where - R: Read + Seek, - { - let header = HeaderItem::read(&mut reader)?; - if verify { - // validate the header against Android's global constraints - header.verify(&mut reader, 0)?; - } - // In order to parse all other items, we need to create the map - // list first. - reader.seek(io::SeekFrom::Start(header.map_off as u64))?; - let map_list = MapList::read(&mut reader)?; - Ok(Dex { - fd: reader, - header, - method_handles_off: map_list.item_offset(MapListItemType::MethodHandleItem) as u32, - call_sites_off: map_list.item_offset(MapListItemType::CallSiteIdItem) as u32, - method_handles_size: map_list.item_size(MapListItemType::MethodHandleItem) as u32, - call_sites_size: map_list.item_size(MapListItemType::CallSiteIdItem) as u32, - // parsing is done lazily: types, strings, and protos will be - // populated on demand - types: BTreeMap::new(), - strings: BTreeMap::new(), - protos: BTreeMap::new(), - fields: BTreeMap::new(), - methods: BTreeMap::new(), - methods_handles: BTreeMap::new(), - call_sites: BTreeMap::new(), - classes: BTreeMap::new(), - }) - } - - // pub fn string_at<'a>(&'a self, index: u32) -> Result<&'a String> { - // // first tries to find the string in the string table - // match self.strings.get(&index) { - // Some(x) => Ok(x), - // None => Err(Error::InvalidIndex(index as usize)), - // } - // } - - /// ### Format: - /// ```text - /// ┌──────────────────────┐ ┌──────────────┐ ┌────────────────────┐ - /// │ ProtoIdItem │ │ StringIdItem │ │ StringDataItem │ - /// ├──────────────────────┤ ├──────────────┤ ├────────────────────┤ - /// │ shorty_idx: u32 ├─────────►│ offset: u32 ├───────────►│ data: mutf8_string │ - /// │ │ └─────────▲────┘ └────────────────────┘ - /// │ return_type_idx: u32 ├─────┐ │ - /// │ parameters_off: u32 │ │ └───────────────┐ - /// └─┬────────────────────┘ │ │ - /// │ │ ┌─────────────────────┐ │ - /// │ │ │ TypeIdItem │ │ - /// │ │ ├─────────────────────┤ │ - /// │ └───►│ descriptor_idx: u32 ├───┘ - /// │ └─────────▲───────────┘ - /// │ │ - /// │ └─────────────────┐ - /// │ │ - /// │ ┌──────────────────────┐ │ - /// │ │ TypeList │ │ - /// │ ├──────────────────────┤ │ - /// └──────────────────────────────►│ items: TypeIdItem[] ├────┘ - /// └──────────────────────┘ - ///``` - fn parse_proto(&mut self, index: u32) -> Result<()> { - let offset = check_index!( - index, - item_size = 12, - self.header.proto_ids_size, - self.header.proto_ids_off - ); - self.fd.seek(io::SeekFrom::Start(offset as u64))?; - - let proto_item = ProtoIdItem::read(self.fd)?; - let shorty = self.get_string(proto_item.shorty_idx)?; - let return_type = self.get_type(proto_item.return_type_idx)?; - let mut proto = DexPrototype { - shorty, - return_type, - parameters: Vec::new(), - }; - - if proto_item.parameters_off != 0 { - // type list only present if offset is != 0 - self.fd - .seek(io::SeekFrom::Start(proto_item.parameters_off as u64))?; - let params = TypeList::read(self.fd)?; - for j in 0..params.size { - // the parameter item stores the type index of the parameter - let index = params.list[j as usize].type_idx; - let ty = self.get_type(index as u32)?; - proto.parameters.push(ty); - } - } - - self.protos.insert(index, Rc::new(proto)); - Ok(()) - } - - /* Format: - ┌─────────────────────┐ - │ TypeIdItem │ - ├─────────────────────┤ - │ descriptor_idx: u32 ├───┐ - └─────────────────────┘ │ - │ - ┌──────────────────┘ - │ - ┌──────▼───────┐ ┌────────────────────┐ - │ StringIdItem │ │ StringDataItem │ - ├──────────────┤ ├────────────────────┤ - │ offset: u32 ├───────────►│ data: mutf8_string │ - └──────────────┘ └────────────────────┘ - */ - fn parse_type(&mut self, index: u32) -> Result<()> { - let offset = check_index!( - index, - item_size = 4, - self.header.type_ids_size, - self.header.type_ids_off - ); - self.fd.seek(io::SeekFrom::Start(offset as u64))?; - let type_item = TypeIdItem::read(self.fd)?; - - let string = self.get_string(type_item.descriptor_idx)?; - let dtype = DexType::read(&string)?; - - self.types.insert(index, Rc::new(dtype)); - Ok(()) - } - - fn parse_field(&mut self, index: u32) -> Result<()> { - let offset = check_index!( - index, - item_size = 8, - self.header.field_ids_size, - self.header.field_ids_off - ); - self.fd.seek(io::SeekFrom::Start(offset as u64))?; - let field_item = FieldIdItem::read(self.fd)?; - - self.fields.insert(index, Rc::new(field_item)); - Ok(()) - } - - fn parse_method(&mut self, index: u32) -> Result<()> { - let offset = check_index!( - index, - item_size = 8, - self.header.method_ids_size, - self.header.method_ids_off - ); - self.fd.seek(io::SeekFrom::Start(offset as u64))?; - let method_item = MethodIdItem::read(self.fd)?; - - self.methods.insert(index, Rc::new(method_item)); - Ok(()) - } - - fn parse_method_handle(&mut self, index: u32) -> Result<()> { - let offset = check_index!( - index, - item_size = 4, - self.method_handles_size, - self.method_handles_off - ); - self.fd.seek(io::SeekFrom::Start(offset as u64))?; - let method_handle = MethodHandleItem::read(self.fd)?; - self.methods_handles.insert(index, Rc::new(method_handle)); - Ok(()) - } - - fn parse_call_site(&mut self, index: u32) -> Result<()> { - let offset = check_index!( - index, - item_size = 4, - self.call_sites_size, - self.call_sites_off - ); - self.fd.seek(io::SeekFrom::Start(offset as u64))?; - let call_site = CallSiteIdItem::read(self.fd)?; - self.call_sites.insert(index, Rc::new(call_site)); - Ok(()) - } -} - -impl<'a, R: Read + Seek> IDex for Dex<'a, R> { - /* Format: - ┌──────────────┐ ┌────────────────────┐ - │ StringIdItem │ │ StringDataItem │ - ├──────────────┤ ├────────────────────┤ - │ offset: u32 ├───────────►│ data: mutf8_string │ - └──────────────┘ └────────────────────┘ - */ - fn get_string(&mut self, index: u32) -> Result> { - // first tries to find the string in the string table - // if not found, tries to read it from the file - if let Vacant(e) = self.strings.entry(index) { - let offset = check_index!( - index, - item_size = 4, - self.header.string_ids_size, - self.header.string_ids_off - ); - - self.fd.seek(io::SeekFrom::Start(offset as u64))?; - let string_item = StringIdItem::read(self.fd)?; - self.fd - .seek(io::SeekFrom::Start(string_item.offset as u64))?; - e.insert(Rc::new(mutf8::read(self.fd)?)); - } - Ok(self.strings[&index].clone()) - } - - /// Returns the prototype at the given index. - /// - /// If the index is out of bounds, an error is returned. Note that - /// this method will cache the prototype in the dex file. - fn get_proto(&mut self, index: u32) -> Result> { - // same as before: first tries to find the proto in the proto table - // if not found, tries to read it from the file - if !self.protos.contains_key(&index) { - self.parse_proto(index)?; - } - Ok(self.protos[&index].clone()) - } - - fn get_type(&mut self, index: u32) -> Result> { - // same as before: first tries to find the type in the type table - // if not found, tries to read it from the file - if !self.types.contains_key(&index) { - self.parse_type(index)?; - } - - Ok(self.types[&index].clone()) - } - - fn get_method_handle(&mut self, index: u32) -> Result> { - // same as before: first tries to find the proto in the proto table - // if not found, tries to read it from the file - if !self.methods_handles.contains_key(&index) { - self.parse_method_handle(index)?; - } - Ok(self.methods_handles[&index].clone()) - } - - fn get_field(&mut self, index: u32) -> Result> { - if !self.fields.contains_key(&index) { - self.parse_field(index)?; - } - Ok(self.fields[&index].clone()) - } - - fn get_method(&mut self, index: u32) -> Result> { - if !self.methods.contains_key(&index) { - self.parse_method(index)?; - } - Ok(self.methods[&index].clone()) - } - - fn get_call_site(&mut self, index: u32) -> Result> { - if !self.call_sites.contains_key(&index) { - self.parse_call_site(index)?; - } - Ok(self.call_sites[&index].clone()) - } - - fn get_class_def(&mut self, index: u32) -> Result> { - // Note: we can't use btree_map::Entry::Vacant here as it would - // introduce a second mutable borrow of 'self' - if !self.classes.contains_key(&index) { - let offset = check_index!( - index, - item_size = 32, - self.header.class_defs_size, - self.header.class_defs_off - ); - - self.fd.seek(io::SeekFrom::Start(offset as u64))?; - let class_def = DexClassDef::new(self, index)?; - self.classes.insert(index, Rc::new(class_def)); - } - Ok(self.classes[&index].clone()) - } -} diff --git a/src/dalvik/file/method.rs b/src/dalvik/file/method.rs deleted file mode 100644 index 368512c..0000000 --- a/src/dalvik/file/method.rs +++ /dev/null @@ -1,179 +0,0 @@ -use crate::dalvik::dex::{ - AccessFlags, AnnotationSetRefList, CodeItem, DebugInfoItem, DexType, EncodedMethod, SLeb128, ULeb128, ULeb128p1 -}; -use crate::dalvik::error::Result; -use crate::dalvik::insns::{self, Insn}; - -use super::annotation::DexAnnotation; -use super::{debug::DebugInfo, Dex, IDex, IDexRef}; -use binrw::BinRead; -use std::io::{Read, Seek}; -use std::rc::Rc; - -#[derive(Debug)] -pub struct DexPrototype { - /// The shorty of the prototype (short type descriptor) - pub shorty: Rc, - /// The return type of this prototype - pub return_type: Rc, - /// The parameters of this prototype (only types) - pub parameters: Vec>, -} - -#[derive(Debug)] -pub struct DexParameter { - /// The type of this parameter - pub type_: Rc, - - /// The name of this parameter (optional). - /// - /// *Note*: The actual name can be retriebed either through - /// parsing debug info items or through the `@MethodParameters` - /// annotation. - pub name: Option>, - - /// list of annotations associated with this parameter (optional) - pub annotations: Vec, - - /// The access flags for this parameter organized as a single [AccessFlags] - /// instance. - pub access_flags: Option, -} - -impl DexParameter { - pub fn read_annotations(&mut self, dex: &mut Dex<'_, R>) -> Result<()> - where - R: Read + Seek, - { - let set_ref_list = AnnotationSetRefList::read(dex.fd)?; - for set_ref in &set_ref_list.list { - if set_ref.annotations_off == 0 { - continue; - } - dex.seeks(set_ref.annotations_off as u64)?; - DexAnnotation::read_set_into(dex, &mut self.annotations)?; - } - Ok(()) - } -} - -#[derive(Debug)] -pub struct DexMethod { - pub identity: u32, - - /// The declaring class of this method in the DEX file stored as - /// a type reference. - pub class: Rc, - - /// The name of the method - pub name: Rc, - - /// The method signature as a prototype reference - pub proto: Rc, - - /// list of annotations associated with this method (optional) - pub annotations: Vec, - pub parameters: Vec, - - /// The access flags for this method organized as a single [AccessFlags] - /// instance. - pub access_flags: Option, - - /// Optional code associated with this method (abstract or native methods - /// won't store any code). - pub code: Option, - - /// Additional debug information for this method. - pub debug_info: Option, -} - -impl DexMethod { - pub fn build( - dex: &mut Dex<'_, R>, - encoded_method: &EncodedMethod, - prev_diff: u32, - ) -> Result - where - R: Read + Seek, - { - // The method_idx_diff value in the first encoded_method item in each - // of the method types holds the index of the matching item in the method_ids - // section. - // - // In subsequent items, however, this value is the difference from the index - // of the previous item, and to calculate the method_ids index the difference - // must be incremented to the previous method_idx_diff values. - let index = prev_diff + encoded_method.method_idx_diff.0; - let method_item = dex.get_method(index)?; - - let proto = dex.get_proto(method_item.proto_idx as u32)?; - let mut parameters: Vec = proto - .parameters - .iter() - // The parameters will be cloned here to ensure we can infer the right - // annotations or access flags once we've parsed additional debug information. - .map(|x| DexParameter { - type_: x.clone(), - name: None, - annotations: Vec::new(), - access_flags: None, - }) - .collect(); - - let mut code: Option = None; - let mut debug: Option = None; - if encoded_method.code_off.0 != 0 { - // parse code item but don't start parsing instructions just yet - dex.seeks(encoded_method.code_off.0 as u64)?; - let code_item = CodeItem::read(dex.fd)?; - - if code_item.debug_info_off != 0 { - // directly parse debug information - dex.seeks(code_item.debug_info_off as u64)?; - let debug_info = DebugInfoItem::read(dex.fd)?; - DexMethod::apply_debug_info(&mut parameters, &debug_info, dex)?; - - // parse additional information - debug = Some(debug_info.parse_debug_info(&code_item, dex, &proto)?); - } - code = Some(code_item); - } - - // put everything together - Ok(DexMethod { - identity: index, - class: dex.get_type(method_item.class_idx as u32)?, - name: dex.get_string(method_item.name_idx)?, - proto: proto.clone(), - annotations: Vec::new(), - parameters, - access_flags: AccessFlags::from_bits(encoded_method.access_flags.0), - code, - debug_info: debug, - }) - } - - fn apply_debug_info( - parameters: &mut [DexParameter], - debug_info: &DebugInfoItem, - dex: IDexRef<'_>, - ) -> Result<()> { - for (i, param_name_idx) in debug_info.parameter_names.iter().enumerate() { - if let ULeb128p1::Pos(index) = param_name_idx { - parameters[i].name = Some(dex.get_string(*index)?); - } - } - Ok(()) - } -} - -/* Pulic API */ -impl DexMethod { - pub fn disasm(&self, dex: IDexRef<'_>) -> Result> { - if let Some(code) = &self.code { - Ok(insns::disasm(code, dex)?) - } else { - Ok(Vec::new()) - } - } -} \ No newline at end of file diff --git a/src/dalvik/file/mod.rs b/src/dalvik/file/mod.rs deleted file mode 100644 index 8f97fca..0000000 --- a/src/dalvik/file/mod.rs +++ /dev/null @@ -1,35 +0,0 @@ -use super::{ - dex::{CallSiteIdItem, DexType, FieldIdItem, MethodHandleItem, MethodIdItem}, - error::Result, -}; -use std::rc::Rc; - -pub mod value; -pub use value::*; - -pub mod class_def; -pub use class_def::*; - -pub mod lazy_file; -pub use lazy_file::*; - -pub mod annotation; -pub mod debug; -pub mod field; -pub mod method; - -// public interfaces that define behaviour of all classes - -pub trait IDex { - fn get_string(&mut self, index: u32) -> Result>; - fn get_proto(&mut self, index: u32) -> Result>; - fn get_type(&mut self, index: u32) -> Result>; - fn get_method_handle(&mut self, index: u32) -> Result>; - fn get_field(&mut self, index: u32) -> Result>; - fn get_method(&mut self, index: u32) -> Result>; - fn get_call_site(&mut self, index: u32) -> Result>; - fn get_class_def(&mut self, index: u32) -> Result>; -} - -pub type IDexRef<'a> = &'a mut dyn IDex; -pub type IDexRc = Box; diff --git a/src/dalvik/file/value.rs b/src/dalvik/file/value.rs deleted file mode 100644 index 04c75e8..0000000 --- a/src/dalvik/file/value.rs +++ /dev/null @@ -1,65 +0,0 @@ -use std::rc::Rc; - -use crate::dalvik::{dex::*, error::Result}; - -use super::{annotation::DexAnnotation, method::DexPrototype, IDexRef}; - -#[derive(Debug)] -pub enum DexValue { - Byte(i8), - Short(i16), - Char(char), - Int(i32), - Long(i64), - Float(f32), - Double(f64), - String(Rc), - Type(Rc), - Annotation(DexAnnotation), - MethodType(Rc), - MethodRef(u32, Rc), - FieldRef(Rc), - MethodHandle(Rc), - Array(Vec), - True, - False, - Null, - Enum(Rc), - Data(u8, Vec), -} - -impl DexValue { - pub fn from_array(array: &EncodedArray, dex: IDexRef<'_>) -> Result { - let mut values = Vec::with_capacity(array.values.len()); - for value in &array.values { - values.push(DexValue::from(value, dex)?); - } - Ok(DexValue::Array(values)) - } - - pub fn from(value: &EncodedValue, dex: IDexRef<'_>) -> Result { - match value { - EncodedValue::Byte(v) => Ok(DexValue::Byte(*v)), - EncodedValue::Short(v) => Ok(DexValue::Short(*v)), - EncodedValue::Char(v) => Ok(DexValue::Char(*v)), - EncodedValue::Int(v) => Ok(DexValue::Int(*v)), - EncodedValue::Long(v) => Ok(DexValue::Long(*v)), - EncodedValue::Float(v) => Ok(DexValue::Float(*v)), - EncodedValue::Double(v) => Ok(DexValue::Double(*v)), - EncodedValue::String(v) => Ok(DexValue::String(dex.get_string(*v)?)), - EncodedValue::Type(v) => Ok(DexValue::Type(dex.get_type(*v)?)), - EncodedValue::MethodType(v) => Ok(DexValue::MethodType(dex.get_proto(*v)?)), - EncodedValue::Annotation(v) => { - Ok(DexValue::Annotation(DexAnnotation::from_encoded(v, dex)?)) - } - EncodedValue::Field(v) => Ok(DexValue::FieldRef(dex.get_field(*v)?)), - EncodedValue::Method(v) => Ok(DexValue::MethodRef(*v, dex.get_method(*v)?)), - EncodedValue::MethodHandle(v) => Ok(DexValue::MethodHandle(dex.get_method_handle(*v)?)), - EncodedValue::Array(v) => DexValue::from_array(v, dex), - EncodedValue::True => Ok(DexValue::True), - EncodedValue::False => Ok(DexValue::False), - EncodedValue::Null => Ok(DexValue::Null), - EncodedValue::Enum(v) => Ok(DexValue::Enum(dex.get_field(*v)?)), // _ => unreachable!("unhandled value type"), - } - } -} diff --git a/src/dalvik/insns.rs b/src/dalvik/insns.rs deleted file mode 100644 index 823da23..0000000 --- a/src/dalvik/insns.rs +++ /dev/null @@ -1,1128 +0,0 @@ -//! Dalvik executable instruction set -//! -//! More details can be taken from [Android Docs: Dalvik executable -//! instruction formats](https://source.android.com/docs/core/runtime/instruction-formats) -//! -//! Instruction formats are implemented as functions and will be used to -//! parse a single instruction. As multiple opcodes store the same instruction -//! format, they simply reference their corresponding function to parse the -//! contents. - -use binrw::{ - BinRead, // trait for reading -}; -use byteorder::{LittleEndian, ReadBytesExt}; - -use crate::dalvik::error::Result; - -use std::fmt::Debug; -use std::io::{Cursor, Seek}; -use std::ops::Range; -use std::rc::Rc; - -use super::dex::{ - CallSiteIdItem, CodeItem, DexType, FieldIdItem, FillArrayData, MethodHandleItem, MethodIdItem, - PackedSwitch, SparseSwitch, -}; -use crate::dalvik::file::{method::DexPrototype, IDexRef}; - -// The function below is important: -pub fn disasm(item: &CodeItem, dex: IDexRef<'_>) -> Result> { - let mut insns = Vec::new(); - let mut cursor = Cursor::new(item.insns.as_ref()); - // 1. Fetch information for the next opcode - while let Some(raw_opcode) = match cursor.read_u16::() { - Ok(raw_opcode) => Some(raw_opcode), - Err(_) => None, - } { - // 2. Decode the opcode and its representation - let opcode = &OPCODES[(raw_opcode & 0xFF) as usize]; - let start = (cursor.position() - 2) as usize; - - let mut insn = Insn { - opcode, - range: start..(start + opcode.length as usize), - format: InsnFormat::Format00x, - payload: None, - }; - // 3. Execute the instruction format and insert the instruction's - // information into the instruction list - cursor.set_position(start as u64); - let format = match (opcode.format_factory)(&mut cursor, &mut insn, dex) { - Ok(format) => format, - Err(e) => { - return Err(super::error::Error::InvalidData(format!( - "failed to parse instruction: {:?} at {:?}", - e, opcode - ))); - } - }; - - insn.format = format; - // update range if necessary - if cursor.position() > insn.range.end as u64 { - insn.range.end = cursor.position() as usize; - } - insns.push(insn); - } - Ok(insns) -} - -// just the implementation for above -pub enum Index { - Type(Rc), - Field(Rc), - MethodHandle(Rc), - Proto(Rc), - String(Rc), - CallSite(Rc), - Method(Rc), - Unknown(u32), - Literal(i64), -} - -#[derive(Debug)] -pub enum InsnFormat { - Format00x, - Format10x, - Format12x { - a: u8, - b: u8, - }, - // REVISIT: change to string reference - Format11n { - a: u8, - b: Index, - }, - Format11x { - a: u8, - }, - Format10t { - a: i8, - }, - /// ### Format: `AA|op` (`20t`) - /// - /// This format specifically describes the `goto/16`` opcode. - Format20t { - a: i16, - }, - /// suggested format for statically determined verification errors; A is the type - /// of error and B is an index into a type-appropriate table (e.g. method references - /// for a no-such-method error). - Format20bc { - a: u8, - b: Index, - }, - - Format22x { - a: u8, - b: u16, - }, - - Format21t { - a: u8, - b: i16, - }, - - Format21s { - a: u8, - b: Index, - }, - /// B register's format is special - Format21h { - a: u8, - b: Index, - }, - Format21c { - a: u8, - b: Index, - }, - - Format23x { - a: u8, - b: u8, - c: u8, - }, - Format22b { - a: u8, - b: u8, - c: Index, - }, - - Format22t { - a: u8, - b: u8, - c: i16, - }, - - Format22s { - a: u8, - b: u8, - c: Index, - }, - Format22c { - a: u8, - b: u8, - c: Index, - }, - - // Format22cs: suggested format for statically linked field - // access instructions of format 22c - Format30t { - a: i32, - }, - - Format32x { - a: u16, - b: u16, - }, - - Format31i { - a: u8, - b: Index, - }, - - Format31t { - a: u8, - b: i32, - }, - - Format31c { - a: u8, - b: Index, - }, - - Format35c { - a: u32, - b: Index, // reference - c: u32, - d: u32, - e: u32, - f: u32, - g: u32, - }, - - Format3rc { - a: u8, - b: Index, - c: u16, - regs: Range, - }, - - Format45cc { - a: u8, - b: Index, - c: u8, - d: u8, - e: u8, - f: u8, - g: u8, - h: Index, - }, - - Format4rcc { - a: u8, - b: Index, - c: u16, - h: Index, - regs: Range, - }, - Format51l { - a: u8, - b: Index, - }, -} - -#[derive(Debug)] -pub enum Payload { - PackedSwitch(PackedSwitch), - SparseSwitch(SparseSwitch), - FillArrayData(FillArrayData), -} - -#[derive(Debug)] -pub struct Insn { - pub opcode: &'static Opcode, - pub range: Range, - pub format: InsnFormat, - pub payload: Option, -} - -type IFormatFactory = dyn Fn(&mut Cursor<&[u8]>, &mut Insn, IDexRef<'_>) -> Result; -// \____/ \________________/ \_________/ \________________/ - The function returns an instance of -// | | | InsnFormat type with all parsed data -// | | | -// | | +------------------------------ Mutable reference to the current DEX -// | | file to resolve possible index refs -// | | -// | +---------------------------------------------------------- A mutable reference to the code pointer -// | (as u8 values to apply ByteorderReadExt) -// | -// +----------------------------------------------------------------------- A type that can be represented by the -// Fn-trait - -pub struct Opcode { - pub opcode: u8, - pub name: &'static str, - pub registers: u8, - pub length: u8, - pub format_factory: &'static IFormatFactory, -} - -impl Debug for Opcode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "Opcode{{val={:02x}, name={}, registers={}, length={}}}", - self.opcode, self.name, self.registers, self.length - ) - } -} - -// REVISIT: is it possible to make this Sync? -unsafe impl Sync for Opcode {} - -macro_rules! opcode { - ($name:literal:= $_opcode_:literal impl $func:ident[len=$length:literal, reg=$registers:literal]) => { - Opcode { - opcode: $_opcode_, - name: $name, - registers: $registers, - length: $length, - format_factory: &$func, - } - }; - ($name:literal:= $_opcode_:literal impl $func:ident []) => { - Opcode { - opcode: $_opcode_, - name: $name, - registers: 0, - length: 0, - format_factory: &$func, - } - }; - - ($_opcode_:literal) => { - Opcode { - opcode: $_opcode_, - name: stringify!($_opcode_), - registers: 0, - length: 1, - format_factory: &format_10x, - } - }; -} - -/* Notes on opcodes definitions: - -The first identifier specifies the name of the opcode (it will be -stringified internally). Next, the opcode number is specified as -a [uByte], followed by a reference to the function implementation -of the opcode format. The last two numbers are essentially the -number of code units this opcode uses and the amount of registers -to allocate. - -format := ':=' 'impl' '[' 'use' len=, 'reg'= ']' - -The format is designed to operate on an iterator of [USHort] values, -and will be used to parse the opcode's contents. For example, - -pub fn format_00x(cursor: &mut Cursor<&'a [u8]>, insn: &mut Insn) -> Result { - // parsing is done here -} - -will parse opcodes using the format "00x" and update the instruction's -contents. The specified length can be used while parsing to validate -whether the format function has been used correctly. Additionally, the -number of registers used by the opcode must be specified to remove -the amount of unnecessary register allocations. -*/ -pub const OPCODES: &[Opcode] = &[ - opcode! { "nop" := 0x00 impl format_10x [len=1, reg=0] }, - // move ops - opcode! { "move" := 0x01 impl format_12x[len=1, reg=2] }, - opcode! { "move/from16" := 0x02 impl format_22x[len=2, reg=2] }, - opcode! { "move/16" := 0x03 impl format_32x[len=3, reg=2] }, - opcode! { "move-wide" := 0x04 impl format_12x[len=1, reg=2] }, - opcode! { "move-wide/from16" := 0x05 impl format_22x[len=2, reg=2] }, - opcode! { "move-wide/16" := 0x06 impl format_32x[len=3, reg=2] }, - opcode! { "move-object" := 0x07 impl format_12x[len=1, reg=2] }, - opcode! { "move-object/from16" := 0x08 impl format_22x[len=2, reg=2] }, - opcode! { "move-object/16" := 0x09 impl format_32x[len=3, reg=2] }, - opcode! { "move-result" := 0x0A impl format_11x[len=1, reg=1] }, - opcode! { "move-result-wide" := 0x0B impl format_11x[len=1, reg=1] }, - opcode! { "move-result-object" := 0x0C impl format_11x[len=1, reg=1] }, - opcode! { "move-exception" := 0x0D impl format_11x[len=1, reg=1] }, - // returnops - opcode! { "return-void" := 0x0E impl format_10x[len=1, reg=0] }, - opcode! { "return" := 0x0F impl format_11x[len=1, reg=1] }, - opcode! { "return-wide" := 0x10 impl format_11x[len=1, reg=1] }, - opcode! { "return-object" := 0x11 impl format_11x[len=1, reg=1] }, - // constops - opcode! { "const/4" := 0x12 impl format_11n[len=1, reg=2] }, - opcode! { "const/16" := 0x13 impl format_21s[len=2, reg=2] }, - opcode! { "const" := 0x14 impl format_31i[len=3, reg=2] }, - opcode! { "const/high16" := 0x15 impl format_21h[len=2, reg=2] }, - opcode! { "const-wide/16" := 0x16 impl format_21s[len=2, reg=2] }, - opcode! { "const-wide/32" := 0x17 impl format_31i[len=3, reg=2] }, - opcode! { "const-wide" := 0x18 impl format_51l[len=5, reg=2] }, - opcode! { "const-wide/high16" := 0x19 impl format_21h[len=2, reg=2] }, - opcode! { "const-string" := 0x1A impl format_21c[len=2, reg=2] }, - opcode! { "const-string/jumbo" := 0x1B impl format_31c[len=3, reg=2] }, - opcode! { "const-class" := 0x1C impl format_21c[len=2, reg=2] }, - // monitorops - opcode! { "monitor-enter" := 0x1D impl format_11x[len=1, reg=1] }, - opcode! { "monitor-exit" := 0x1E impl format_11x[len=1, reg=1] }, - // (other ops) - opcode! { "check-cast" := 0x1F impl format_21c[len=2, reg=2] }, - opcode! { "instance-of" := 0x20 impl format_22c[len=2, reg=2] }, - opcode! { "array-length" := 0x21 impl format_12x[len=1, reg=1] }, - // new-* ops - opcode! { "new-instance" := 0x22 impl format_21c[len=2, reg=2] }, - opcode! { "new-array" := 0x23 impl format_22c[len=2, reg=2] }, - // filled-* ops - opcode! { "filled-new-array" := 0x24 impl format_35c[len=3, reg=7] }, - opcode! { "filled-new-array/range" := 0x25 impl format_3rc[len=3, reg=2] }, - opcode! { "fill-array-data" := 0x26 impl format_31t[len=3, reg=2] }, - // throw - opcode! { "throw" := 0x27 impl format_11x[len=1, reg=1] }, - // goto - opcode! { "goto" := 0x28 impl format_10t[len=1, reg=1] }, - opcode! { "goto/16" := 0x29 impl format_20t[len=2, reg=1] }, - opcode! { "goto/32" := 0x2A impl format_30t[len=3, reg=1] }, - // branches - opcode! { "packed-branch" := 0x2B impl format_31t[len=1, reg=1] }, - opcode! { "sparse-branch" := 0x2C impl format_31t[len=2, reg=1] }, - // comparisons - opcode! { "cmpl-float" := 0x2D impl format_23x[len=2, reg=3] }, - opcode! { "cmpg-float" := 0x2E impl format_23x[len=2, reg=3] }, - opcode! { "cmpl-double" := 0x2F impl format_23x[len=2, reg=3] }, - opcode! { "cmpg-double" := 0x30 impl format_23x[len=2, reg=3] }, - opcode! { "cmp-long" := 0x31 impl format_23x[len=2, reg=3] }, - // ifops - opcode! { "if-eq" := 0x32 impl format_22t[len=2, reg=3] }, - opcode! { "if-ne" := 0x33 impl format_22t[len=2, reg=3] }, - opcode! { "if-lt" := 0x34 impl format_22t[len=2, reg=3] }, - opcode! { "if-ge" := 0x35 impl format_22t[len=2, reg=3] }, - opcode! { "if-gt" := 0x36 impl format_22t[len=2, reg=3] }, - opcode! { "if-le" := 0x37 impl format_22t[len=2, reg=3] }, - opcode! { "if-eqz" := 0x38 impl format_21t[len=2, reg=2] }, - opcode! { "if-nez" := 0x39 impl format_21t[len=2, reg=2] }, - opcode! { "if-ltz" := 0x3A impl format_21t[len=2, reg=2] }, - opcode! { "if-gez" := 0x3B impl format_21t[len=2, reg=2] }, - opcode! { "if-gtz" := 0x3C impl format_21t[len=2, reg=2] }, - opcode! { "if-lez" := 0x3D impl format_21t[len=2, reg=2] }, - // unused - opcode!(0x3E), - opcode!(0x3F), - opcode!(0x40), - opcode!(0x41), - opcode!(0x42), - opcode!(0x43), - // arrayops - opcode! { "aget" := 0x44 impl format_23x[len=2, reg=3] }, - opcode! { "aget-wide" := 0x45 impl format_23x[len=2, reg=3] }, - opcode! { "aget-object" := 0x46 impl format_23x[len=2, reg=3] }, - opcode! { "aget-boolean" := 0x47 impl format_23x[len=2, reg=3] }, - opcode! { "aget-byte" := 0x48 impl format_23x[len=2, reg=3] }, - opcode! { "aget-char" := 0x49 impl format_23x[len=2, reg=3] }, - opcode! { "aget-short" := 0x4A impl format_23x[len=2, reg=3] }, - opcode! { "aput" := 0x4B impl format_23x[len=2, reg=3] }, - opcode! { "aput-wide" := 0x4C impl format_23x[len=2, reg=3] }, - opcode! { "aput-object" := 0x4D impl format_23x[len=2, reg=3] }, - opcode! { "aput-boolean" := 0x4E impl format_23x[len=2, reg=3] }, - opcode! { "aput-byte" := 0x4F impl format_23x[len=2, reg=3] }, - opcode! { "aput-char" := 0x50 impl format_23x[len=2, reg=3] }, - opcode! { "aput-short" := 0x51 impl format_23x[len=2, reg=3] }, - // instanceops - opcode! { "iget" := 0x52 impl format_22c[len=2, reg=2] }, - opcode! { "iget-wide" := 0x53 impl format_22c[len=2, reg=2] }, - opcode! { "iget-object" := 0x54 impl format_22c[len=2, reg=2] }, - opcode! { "iget-boolean" := 0x55 impl format_22c[len=2, reg=2] }, - opcode! { "iget-byte" := 0x56 impl format_22c[len=2, reg=2] }, - opcode! { "iget-char" := 0x57 impl format_22c[len=2, reg=2] }, - opcode! { "iget-short" := 0x58 impl format_22c[len=2, reg=2] }, - opcode! { "iput" := 0x59 impl format_22c[len=2, reg=2] }, - opcode! { "iput-wide" := 0x5A impl format_22c[len=2, reg=2] }, - opcode! { "iput-object" := 0x5B impl format_22c[len=2, reg=2] }, - opcode! { "iput-boolean" := 0x5C impl format_22c[len=2, reg=2] }, - opcode! { "iput-byte" := 0x5D impl format_22c[len=2, reg=2] }, - opcode! { "iput-char" := 0x5E impl format_22c[len=2, reg=2] }, - opcode! { "iput-short" := 0x5F impl format_22c[len=2, reg=2] }, - // staticops - opcode! { "sget" := 0x60 impl format_21c[len=2, reg=2] }, - opcode! { "sget-wide" := 0x61 impl format_21c[len=2, reg=2] }, - opcode! { "sget-object" := 0x62 impl format_21c[len=2, reg=2] }, - opcode! { "sget-boolean" := 0x63 impl format_21c[len=2, reg=2] }, - opcode! { "sget-byte" := 0x64 impl format_21c[len=2, reg=2] }, - opcode! { "sget-char" := 0x65 impl format_21c[len=2, reg=2] }, - opcode! { "sget-short" := 0x66 impl format_21c[len=2, reg=2] }, - opcode! { "sput" := 0x67 impl format_21c[len=2, reg=2] }, - opcode! { "sput-wide" := 0x68 impl format_21c[len=2, reg=2] }, - opcode! { "sput-object" := 0x69 impl format_21c[len=2, reg=2] }, - opcode! { "sput-boolean" := 0x6A impl format_21c[len=2, reg=2] }, - opcode! { "sput-byte" := 0x6B impl format_21c[len=2, reg=2] }, - opcode! { "sput-char" := 0x6C impl format_21c[len=2, reg=2] }, - opcode! { "sput-short" := 0x6D impl format_21c[len=2, reg=2] }, - // invokeops - opcode! { "invoke-virtual" := 0x6E impl format_35c[len=3, reg=7] }, - opcode! { "invoke-super" := 0x6F impl format_35c[len=3, reg=7] }, - opcode! { "invoke-direct" := 0x70 impl format_35c[len=3, reg=7] }, - opcode! { "invoke-static" := 0x71 impl format_35c[len=3, reg=7] }, - opcode! { "invoke-interface" := 0x72 impl format_35c[len=3, reg=7] }, - // unused - opcode!(0x73), - // invoke/range - opcode! { "invoke-virtual/range" := 0x74 impl format_3rc[len=3, reg=7] }, - opcode! { "invoke-super/range" := 0x75 impl format_3rc[len=3, reg=7] }, - opcode! { "invoke-direct/range" := 0x76 impl format_3rc[len=3, reg=7] }, - opcode! { "invoke-static/range" := 0x77 impl format_3rc[len=3, reg=7] }, - opcode! { "invoke-interface/range" := 0x78 impl format_3rc[len=3, reg=7] }, - // unused - opcode!(0x79), - opcode!(0x7A), - // unops - opcode! { "neg-int" := 0x7B impl format_12x[len=1, reg=2] }, - opcode! { "not-int" := 0x7C impl format_12x[len=1, reg=2] }, - opcode! { "neg-long" := 0x7D impl format_12x[len=1, reg=2] }, - opcode! { "not-long" := 0x7E impl format_12x[len=1, reg=2] }, - opcode! { "neg-float" := 0x7F impl format_12x[len=1, reg=2] }, - opcode! { "neg-double" := 0x80 impl format_12x[len=1, reg=2] }, - opcode! { "int-to-long" := 0x81 impl format_12x[len=1, reg=2] }, - opcode! { "int-to-float" := 0x82 impl format_12x[len=1, reg=2] }, - opcode! { "int-to-double" := 0x83 impl format_12x[len=1, reg=2] }, - opcode! { "long-to-int" := 0x84 impl format_12x[len=1, reg=2] }, - opcode! { "long-to-float" := 0x85 impl format_12x[len=1, reg=2] }, - opcode! { "long-to-double" := 0x86 impl format_12x[len=1, reg=2] }, - opcode! { "float-to-int" := 0x87 impl format_12x[len=1, reg=2] }, - opcode! { "float-to-long" := 0x88 impl format_12x[len=1, reg=2] }, - opcode! { "float-to-double" := 0x89 impl format_12x[len=1, reg=2] }, - opcode! { "double-to-int" := 0x8A impl format_12x[len=1, reg=2] }, - opcode! { "double-to-long" := 0x8B impl format_12x[len=1, reg=2] }, - opcode! { "double-to-float" := 0x8C impl format_12x[len=1, reg=2] }, - opcode! { "int-to-byte" := 0x8D impl format_12x[len=1, reg=2] }, - opcode! { "int-to-char" := 0x8E impl format_12x[len=1, reg=2] }, - opcode! { "int-to-short" := 0x8F impl format_12x[len=1, reg=2] }, - // binops - opcode! { "add-int" := 0x90 impl format_23x[len=2, reg=3] }, - opcode! { "sub-int" := 0x91 impl format_23x[len=2, reg=3] }, - opcode! { "mul-int" := 0x92 impl format_23x[len=2, reg=3] }, - opcode! { "div-int" := 0x93 impl format_23x[len=2, reg=3] }, - opcode! { "rem-int" := 0x94 impl format_23x[len=2, reg=3] }, - opcode! { "and-int" := 0x95 impl format_23x[len=2, reg=3] }, - opcode! { "or-int" := 0x96 impl format_23x[len=2, reg=3] }, - opcode! { "xor-int" := 0x97 impl format_23x[len=2, reg=3] }, - opcode! { "shl-int" := 0x98 impl format_23x[len=2, reg=3] }, - opcode! { "shr-int" := 0x99 impl format_23x[len=2, reg=3] }, - opcode! { "ushr-int" := 0x9A impl format_23x[len=2, reg=3] }, - opcode! { "add-long" := 0x9B impl format_23x[len=2, reg=3] }, - opcode! { "sub-long" := 0x9C impl format_23x[len=2, reg=3] }, - opcode! { "mul-long" := 0x9D impl format_23x[len=2, reg=3] }, - opcode! { "div-long" := 0x9E impl format_23x[len=2, reg=3] }, - opcode! { "rem-long" := 0x9F impl format_23x[len=2, reg=3] }, - opcode! { "and-long" := 0xA0 impl format_23x[len=2, reg=3] }, - opcode! { "or-long" := 0xA1 impl format_23x[len=2, reg=3] }, - opcode! { "xor-long" := 0xA2 impl format_23x[len=2, reg=3] }, - opcode! { "shl-long" := 0xA3 impl format_23x[len=2, reg=3] }, - opcode! { "shr-long" := 0xA4 impl format_23x[len=2, reg=3] }, - opcode! { "ushr-long" := 0xA5 impl format_23x[len=2, reg=3] }, - opcode! { "add-float" := 0xA6 impl format_23x[len=2, reg=3] }, - opcode! { "sub-float" := 0xA7 impl format_23x[len=2, reg=3] }, - opcode! { "mul-float" := 0xA8 impl format_23x[len=2, reg=3] }, - opcode! { "div-float" := 0xA9 impl format_23x[len=2, reg=3] }, - opcode! { "rem-float" := 0xAA impl format_23x[len=2, reg=3] }, - opcode! { "add-double" := 0xAB impl format_23x[len=2, reg=3] }, - opcode! { "sub-double" := 0xAC impl format_23x[len=2, reg=3] }, - opcode! { "mul-double" := 0xAD impl format_23x[len=2, reg=3] }, - opcode! { "div-double" := 0xAE impl format_23x[len=2, reg=3] }, - opcode! { "rem-double" := 0xAF impl format_23x[len=2, reg=3] }, - // binops/2addr - opcode! { "add-int/2addr" := 0xB0 impl format_12x[len=1, reg=2] }, - opcode! { "sub-int/2addr" := 0xB1 impl format_12x[len=1, reg=2] }, - opcode! { "mul-int/2addr" := 0xB2 impl format_12x[len=1, reg=2] }, - opcode! { "div-int/2addr" := 0xB3 impl format_12x[len=1, reg=2] }, - opcode! { "rem-int/2addr" := 0xB4 impl format_12x[len=1, reg=2] }, - opcode! { "and-int/2addr" := 0xB5 impl format_12x[len=1, reg=2] }, - opcode! { "or-int/2addr" := 0xB6 impl format_12x[len=1, reg=2] }, - opcode! { "xor-int/2addr" := 0xB7 impl format_12x[len=1, reg=2] }, - opcode! { "shl-int/2addr" := 0xB8 impl format_12x[len=1, reg=2] }, - opcode! { "shr-int/2addr" := 0xB9 impl format_12x[len=1, reg=2] }, - opcode! { "ushr-int/2addr" := 0xBA impl format_12x[len=1, reg=2] }, - opcode! { "add-long/2addr" := 0xBB impl format_12x[len=1, reg=2] }, - opcode! { "sub-long/2addr" := 0xBC impl format_12x[len=1, reg=2] }, - opcode! { "mul-long/2addr" := 0xBD impl format_12x[len=1, reg=2] }, - opcode! { "div-long/2addr" := 0xBE impl format_12x[len=1, reg=2] }, - opcode! { "rem-long/2addr" := 0xBF impl format_12x[len=1, reg=2] }, - opcode! { "and-long/2addr" := 0xC0 impl format_12x[len=1, reg=2] }, - opcode! { "or-long/2addr" := 0xC1 impl format_12x[len=1, reg=2] }, - opcode! { "xor-long/2addr" := 0xC2 impl format_12x[len=1, reg=2] }, - opcode! { "shl-long/2addr" := 0xC3 impl format_12x[len=1, reg=2] }, - opcode! { "shr-long/2addr" := 0xC4 impl format_12x[len=1, reg=2] }, - opcode! { "ushr-long/2addr" := 0xC5 impl format_12x[len=1, reg=2] }, - opcode! { "add-float/2addr" := 0xC6 impl format_12x[len=1, reg=2] }, - opcode! { "sub-float/2addr" := 0xC7 impl format_12x[len=1, reg=2] }, - opcode! { "mul-float/2addr" := 0xC8 impl format_12x[len=1, reg=2] }, - opcode! { "div-float/2addr" := 0xC9 impl format_12x[len=1, reg=2] }, - opcode! { "rem-float/2addr" := 0xCA impl format_12x[len=1, reg=2] }, - opcode! { "add-double/2addr" := 0xCB impl format_12x[len=1, reg=2] }, - opcode! { "sub-double/2addr" := 0xCC impl format_12x[len=1, reg=2] }, - opcode! { "mul-double/2addr" := 0xCD impl format_12x[len=1, reg=2] }, - opcode! { "div-double/2addr" := 0xCE impl format_12x[len=1, reg=2] }, - opcode! { "rem-double/2addr" := 0xCF impl format_12x[len=1, reg=2] }, - // binops/lit16 - opcode! { "add-int/lit16" := 0xD0 impl format_22s[len=2, reg=3] }, - opcode! { "rsub-int/lit16" := 0xD1 impl format_22s[len=2, reg=3] }, - opcode! { "mul-int/lit16" := 0xD2 impl format_22s[len=2, reg=3] }, - opcode! { "div-int/lit16" := 0xD3 impl format_22s[len=2, reg=3] }, - opcode! { "rem-int/lit16" := 0xD4 impl format_22s[len=2, reg=3] }, - opcode! { "and-int/lit16" := 0xD5 impl format_22s[len=2, reg=3] }, - opcode! { "or-int/lit16" := 0xD6 impl format_22s[len=2, reg=3] }, - opcode! { "xor-int/lit16" := 0xD7 impl format_22s[len=2, reg=3] }, - // binops/lit8 - opcode! { "add-int/lit8" := 0xD8 impl format_22b[len=2, reg=3] }, - opcode! { "rsub-int/lit8" := 0xD9 impl format_22b[len=2, reg=3] }, - opcode! { "mul-int/lit8" := 0xDA impl format_22b[len=2, reg=3] }, - opcode! { "div-int/lit8" := 0xDB impl format_22b[len=2, reg=3] }, - opcode! { "rem-int/lit8" := 0xDC impl format_22b[len=2, reg=3] }, - opcode! { "and-int/lit8" := 0xDD impl format_22b[len=2, reg=3] }, - opcode! { "or-int/lit8" := 0xDE impl format_22b[len=2, reg=3] }, - opcode! { "xor-int/lit8" := 0xDF impl format_22b[len=2, reg=3] }, - opcode! { "shl-int/lit8" := 0xE0 impl format_22b[len=2, reg=3] }, - opcode! { "shr-int/lit8" := 0xE1 impl format_22b[len=2, reg=3] }, - opcode! { "ushr-int/lit8" := 0xE2 impl format_22b[len=2, reg=3] }, - // (unused) - opcode!(0xE3), - opcode!(0xE4), - opcode!(0xE5), - opcode!(0xE6), - opcode!(0xE7), - opcode!(0xE8), - opcode!(0xE9), - opcode!(0xEA), - opcode!(0xEB), - opcode!(0xEC), - opcode!(0xED), - opcode!(0xEE), - opcode!(0xEF), - opcode!(0xF0), - opcode!(0xF1), - opcode!(0xF2), - opcode!(0xF3), - opcode!(0xF4), - opcode!(0xF5), - opcode!(0xF6), - opcode!(0xF7), - opcode!(0xF8), - opcode!(0xF9), - opcode! { "invoke-polymorphic" := 0xFA impl format_45cc[len=4, reg=7] }, - opcode! { "invoke-polymorphic/range" := 0xFB impl format_4rcc[len=4, reg=7] }, - opcode! { "invoke-custom" := 0xFC impl format_35c[len=4, reg=7] }, - opcode! { "invoke-custom/range" := 0xFD impl format_3rc[len=4, reg=7] }, - opcode! { "const-method-handle" := 0xFE impl format_21c[len=2, reg=2] }, - opcode! { "const-method-type" := 0xFF impl format_21c[len=2, reg=2] }, -]; - -/// pseudo-format used for unused opcodes; suggested for use as the nominal -/// format for a breakpoint opcode -pub fn format_00x(_: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - Ok(InsnFormat::Format00x) -} - -pub fn format_10x( - code: &mut Cursor<&'_ [u8]>, - insn: &mut Insn, - _: IDexRef<'_>, -) -> Result { - let val = code.read_u16::()?; - if val & 0xFF == 0 { - match val { - 0x0100 => { - packed_switch(code, insn)?; - } - 0x0200 => { - sparse_switch(code, insn)?; - } - 0x0300 => { - fill_array_data(code, insn)?; - } - _ => {} - } - } - Ok(InsnFormat::Format10x) -} - -/// ID: 12x -/// Syntax: `op vA, vB` -/// Format: `B|A|op` -pub fn format_12x(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - Ok(InsnFormat::Format12x { - a: ((value & 0x0F00) >> 8) as u8, - b: ((value & 0xF000) >> 12) as u8, - }) -} - -/// ID: 11n -/// Syntax: `op vA, #+B` -/// Format: `B|A|op` -pub fn format_11n(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - Ok(InsnFormat::Format11n { - a: ((value & 0x0F00) >> 8) as u8, - b: Index::Literal(((value & 0xF000) >> 12) as i64), - }) -} - -/// ID: 11x -/// Syntax: `op vAA` -/// Format: `AA|op` -pub fn format_11x(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - Ok(InsnFormat::Format11x { - a: ((value & 0xFF00) >> 8) as u8, - }) -} - -/// ID: 10t -/// Syntax: `op +AA` -/// Format: `AA|op` -pub fn format_10t(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - Ok(InsnFormat::Format10t { - a: ((value & 0xFF00) >> 8) as i8, - }) -} - -/// ID: 20t -/// Syntax: `op +AAAA` -/// Format: `||op AAAA` -pub fn format_20t(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - code.seek(std::io::SeekFrom::Current(2))?; - Ok(InsnFormat::Format20t { - a: code.read_i16::()?, - }) -} - -/// ID: 20bc (unused) -/// Syntax: `op AA, kind@BBBB` -/// Format: `AA|op BBBB` -pub fn format_20bc( - code: &mut Cursor<&'_ [u8]>, - _: &mut Insn, - _: IDexRef<'_>, -) -> Result { - let value = code.read_u16::()?; - let index_value = code.read_u16::()?; - Ok(InsnFormat::Format20bc { - a: ((value & 0xFF00) >> 8) as u8, - b: Index::Unknown(index_value as u32), - }) -} - -/// ID: 22x -/// Syntax: `op vAA, vBBBB` -/// Format: `AA|op BBBB` -pub fn format_22x(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - Ok(InsnFormat::Format22x { - a: ((value & 0xFF00) >> 8) as u8, - b: code.read_u16::()?, - }) -} - -/// ID: 21t -/// Syntax: `op vAA, +BBBB` -/// Format: `AA|op BBBB` -pub fn format_21t(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - Ok(InsnFormat::Format21t { - a: ((value & 0xFF00) >> 8) as u8, - b: code.read_u16::()? as i16, - }) -} - -/// ID: 21s -/// Syntax: `op vAA, #+BBBB` -/// Format: `AA|op BBBB` -pub fn format_21s(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - Ok(InsnFormat::Format21s { - a: ((value & 0xFF00) >> 8) as u8, - b: Index::Literal(code.read_u16::()? as i64), - }) -} - -/// ID: 21h -/// Syntax: `op vAA, #+BBBB0000` -/// Format: `AA|op BBBB` -pub fn format_21h(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - let index_value = code.read_u16::()?; - Ok(InsnFormat::Format21h { - a: ((value & 0xFF00) >> 8) as u8, - b: match value & 0xFF { - 0x15 => - /* const/high16 */ - { - Index::Literal((index_value as i64) << 16) - } - 0x19 => - /* const-wide/high16 */ - { - Index::Literal((index_value as i64) << 48) - } - _ => Index::Unknown(index_value as u32), - }, - }) -} - -/// ID: 21c -/// Syntax: `op vAA, thing@BBBB` -/// Format: `AA|op BBBB` -pub fn format_21c( - code: &mut Cursor<&'_ [u8]>, - _: &mut Insn, - dex: IDexRef<'_>, -) -> Result { - let value = code.read_u16::()?; - let index_value = code.read_u16::()? as u32; - Ok(InsnFormat::Format21c { - a: ((value & 0xFF00) >> 8) as u8, - b: match value & 0xFF { - 0x1A => - /* const-string */ - { - Index::String(dex.get_string(index_value)?) - } - 0x1C | 0x60..=0x6d => - /* const-class */ - { - Index::Field(dex.get_field(index_value)?) - } - 0x1F | 0x22 => - /* check-cast | new-instance */ - { - Index::Type(dex.get_type(index_value)?) - } - 0xFE => - /* const-method-handle */ - { - Index::MethodHandle(dex.get_method_handle(index_value)?) - } - 0xFF => - /* const-method-type */ - { - Index::Proto(dex.get_proto(index_value)?) - } - _ => Index::Unknown(index_value), - }, - }) -} - -/// ID: 23x -/// Syntax: `op vAA, vBB, vCC` -/// Format: `AA|op CC|BB` -pub fn format_23x(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - let next = code.read_u16::()?; - Ok(InsnFormat::Format23x { - a: ((value & 0xFF00) >> 8) as u8, - b: (next & 0x00FF) as u8, - c: ((next & 0xFF00) >> 8) as u8, - }) -} - -/// ID: 22b -/// Syntax: `op vAA, vBB, +#CC` -/// Format: `AA|op CC|BB` -pub fn format_22b(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - let next = code.read_u16::()?; - Ok(InsnFormat::Format22b { - a: ((value & 0xFF00) >> 8) as u8, - b: (next & 0x00FF) as u8, - c: Index::Literal(((next & 0xFF00) >> 8) as i64), - }) -} - -/// ID: 22t -/// Syntax: `op vA, vB, +CCCC` -/// Format: `B|A|op CCCCC` -pub fn format_22t(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - let next = code.read_u16::()?; - Ok(InsnFormat::Format22t { - a: ((value & 0x0F00) >> 8) as u8, - b: ((value & 0xF000) >> 12) as u8, - c: next as i16, - }) -} - -/// ID: 22s -/// Syntax: `op vA, vB, #+CCCC` -/// Format: `B|A|op CCCCC` -pub fn format_22s(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - let next = code.read_u16::()?; - Ok(InsnFormat::Format22s { - a: ((value & 0x0F00) >> 8) as u8, - b: ((value & 0xF000) >> 12) as u8, - c: Index::Literal(next as i64), - }) -} - -/// ID: 22c -/// Syntax: `op vA, vB, thing@CCCC` -/// Format: `B|A|op CCCCC` -pub fn format_22c( - code: &mut Cursor<&'_ [u8]>, - _: &mut Insn, - dex: IDexRef<'_>, -) -> Result { - let value = code.read_u16::()?; - let next = code.read_u16::()?; - Ok(InsnFormat::Format22c { - a: ((value & 0x0F00) >> 8) as u8, - b: ((value & 0xF000) >> 12) as u8, - c: match value & 0xFF { - 0x20 /* instance-of */ => { - Index::Type(dex.get_type(next as u32)?) - } - _=> { - Index::Field(dex.get_field(next as u32)?) - } - }, - }) -} - -/// ID: 30t -/// Syntax: `op +AAAAAAAA` -/// Format: `||op AAAA_lo AAAA_hi` -pub fn format_30t(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - code.seek(std::io::SeekFrom::Current(2))?; - Ok(InsnFormat::Format30t { - // index 0 is where the opcode is stored - a: code.read_i32::()?, - }) -} - -/// ID: 32x -/// Syntax: `op vAAAA, vBBBB` -/// Format: `||op AAAA BBBB` -pub fn format_32x(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - code.seek(std::io::SeekFrom::Current(2))?; - Ok(InsnFormat::Format32x { - a: code.read_u16::()?, - b: code.read_u16::()?, - }) -} - -/// ID: 31i -/// Syntax: `op vAA, #+BBBBBBBB` -/// Format: `AA|op BBBB_lo BBBB_hi` -pub fn format_31i(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - let index = code.read_i32::()?; - Ok(InsnFormat::Format31i { - a: ((value & 0xFF) >> 8) as u8, - b: Index::Literal(index as i64), - }) -} - -/// ID: 31t -/// Syntax: `op: vAA, +BBBBBBBB` -/// Format: `AA|op BBBB_lo BBBB_hi` -pub fn format_31t(code: &mut Cursor<&'_ [u8]>, _: &mut Insn, _: IDexRef<'_>) -> Result { - let value = code.read_u16::()?; - let b = code.read_i32::()?; - Ok(InsnFormat::Format31t { - a: ((value & 0xFF) >> 8) as u8, - b, - }) -} - -/// ID: 31c -/// Syntax: `op vAA, string@BBBBBBBB` -/// Format: `AA|op BBBB_lo BBBB_hi` -pub fn format_31c( - code: &mut Cursor<&'_ [u8]>, - _: &mut Insn, - dex: IDexRef<'_>, -) -> Result { - let a = code.read_u16::()?; - let index = code.read_u32::()?; - Ok(InsnFormat::Format31c { - a: ((a & 0xFF) >> 8) as u8, - b: Index::String(dex.get_string(index)?), - }) -} - -/// ID: 35c -/// Syntax: `op {vC, vD, vE, vF, vG}, ref@BBBB` (based on A) -/// Format: `A|G|op BBBB F|E|D|C` -pub fn format_35c( - code: &mut Cursor<&'_ [u8]>, - _: &mut Insn, - dex: IDexRef<'_>, -) -> Result { - let first = code.read_u16::()?; - let second = code.read_u16::()?; - let third = code.read_u16::()?; - Ok(InsnFormat::Format35c { - a: ((first & 0xF000) >> 12) as u32, - g: ((first & 0x0F00) >> 8) as u32, - b: match first & 0x00FF { - 0x24 /* filled-new-array */ => { - Index::Type(dex.get_type(second as u32)?) - }, - 0x6E..=0x72 /* invoke-kind */ => { - Index::Method(dex.get_method(second as u32)?) - }, - 0xFC /* invoke-custom */ => { - Index::CallSite(dex.get_call_site(second as u32)?) - }, - _ => { - Index::Unknown(second as u32) - } - }, - f: ((third & 0xF000) >> 12) as u32, - e: ((third & 0x0F00) >> 8) as u32, - d: ((third & 0x00F0) >> 4) as u32, - c: (third & 0x000F) as u32, - }) -} - -/// ID: 3rc -/// Syntax: `op {vCCCC .. vNNNN}, {vCCCC .. vNNNN}` -/// Format: `AA|op BBBB CCCC .. NNNN` -pub fn format_3rc( - code: &mut Cursor<&'_ [u8]>, - _: &mut Insn, - dex: IDexRef<'_>, -) -> Result { - let value: u16 = code.read_u16::()?; - let count = (value & 0xFF00) >> 8; - let b: u16 = code.read_u16::()?; - let c = code.read_u16::()?; - - let n = (c + count) - 1; - Ok(InsnFormat::Format3rc { - a: count as u8, - b: match value & 0xFF { - 0x25 /* filled-new-array/range */ => { - Index::Type(dex.get_type(b as u32)?) - }, - 0x74..=0x78 /* invoke-kind/range */=> { - Index::Method(dex.get_method(b as u32)?) - }, - 0xFD /* invoke-custom/range */ => { - Index::CallSite(dex.get_call_site(b as u32)?) - } - _ => Index::Unknown(b as u32), - }, - c, - /* from AOSP: - where NNNN = CCCC+AA-1, that is A determines the count 0..255, and C determines - the first register. - */ - regs: c..n, - }) -} - -/// ID: 45cc -/// Syntax: `op {vC, vD, vE, vF, vG}, method@BBBB, prototype@HHHH` -/// Format: `A|G|op BBBB F|E|D|C HHHH` -pub fn format_45cc( - code: &mut Cursor<&'_ [u8]>, - _: &mut Insn, - dex: IDexRef<'_>, -) -> Result { - let value = code.read_u16::()?; - let b: u16 = code.read_u16::()?; - let v2 = code.read_u16::()?; - let h: u16 = code.read_u16::()?; - Ok(InsnFormat::Format45cc { - a: ((value & 0xF000) >> 12) as u8, - g: ((value & 0x0F00) >> 8) as u8, - b: Index::Method(dex.get_method(b as u32)?), - f: ((v2 & 0xF000) >> 8) as u8, - e: ((v2 & 0x0F00) >> 8) as u8, - d: ((v2 & 0x00F0) >> 4) as u8, - c: (v2 & 0x000F) as u8, - h: Index::Proto(dex.get_proto(h as u32)?), - }) -} - -/// ID: 4rcc -/// Syntax: `op {vCCCC .. vNNNN}, method@BBBB, prototype@HHHH` -/// AA|op BBBB CCCC HHHH -pub fn format_4rcc( - code: &mut Cursor<&'_ [u8]>, - _: &mut Insn, - dex: IDexRef<'_>, -) -> Result { - let count = (code.read_u16::()? & 0xFF00) >> 8; - let b = code.read_u16::()?; - let c = code.read_u16::()?; - let h = code.read_u16::()?; - let n = (c + count) - 1; - Ok(InsnFormat::Format4rcc { - a: count as u8, - b: Index::Method(dex.get_method(b as u32)?), - c, - regs: c..n, - h: Index::Proto(dex.get_proto(h as u32)?), - }) -} - -/// ID: 51l -/// Syntax: `op vAA, #+BBBBBBBBBBBBBBBB` -/// Format: `AA|op AAAA BBBB_lo BBBB BBBB BBBB_hi` -pub fn format_51l( - code: &mut Cursor<&'_ [u8]>, - _: &mut Insn, - _dex: IDexRef<'_>, -) -> Result { - let a = ((code.read_u16::()? & 0xF000) >> 12) as u8; - let b = code.read_i64::()?; - Ok(InsnFormat::Format51l { - a, - b: Index::Literal(b), - }) -} - -// payload implementation -pub fn packed_switch(code: &mut Cursor<&'_ [u8]>, insn: &mut Insn) -> Result<()> { - let data = PackedSwitch::read(code)?; - insn.payload = Some(Payload::PackedSwitch(data)); - Ok(()) -} - -pub fn sparse_switch(code: &mut Cursor<&'_ [u8]>, insn: &mut Insn) -> Result<()> { - let data = SparseSwitch::read(code)?; - insn.payload = Some(Payload::SparseSwitch(data)); - Ok(()) -} - -pub fn fill_array_data(code: &mut Cursor<&'_ [u8]>, insn: &mut Insn) -> Result<()> { - // ident is already processed - let data = FillArrayData::read(code)?; - insn.payload = Some(Payload::FillArrayData(data)); - Ok(()) -} - -impl Debug for Index { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Index::Unknown(x) => write!(f, "{:#x}", x), - Index::String(x) => write!(f, "{}", x), - Index::Type(x) => write!(f, "{:?}", x), - Index::Field(x) => write!(f, "{:?}", x), - Index::Method(x) => write!(f, "{:?}", x), - Index::MethodHandle(x) => write!(f, "{:?}", x), - Index::Proto(x) => write!(f, "{:?}", x), - Index::CallSite(x) => write!(f, "{:?}", x), - Index::Literal(x) => write!(f, "{:#x}", x), - } - } -} diff --git a/src/dalvik/mod.rs b/src/dalvik/mod.rs deleted file mode 100644 index ef3502b..0000000 --- a/src/dalvik/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod dex; -pub mod error; -pub mod insns; -pub mod file; \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 4c626b3..8b13789 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1 @@ - -pub mod dalvik; -pub mod smali; \ No newline at end of file diff --git a/src/smali/io.rs b/src/smali/io.rs deleted file mode 100644 index e3307fc..0000000 --- a/src/smali/io.rs +++ /dev/null @@ -1,506 +0,0 @@ -use std::io::Write; -use std::rc::Rc; - -use crate::dalvik::dex::{AccessFlags, DexType, FieldIdItem, MethodIdItem}; -use crate::dalvik::error::Result; -use crate::dalvik::file::annotation::DexAnnotation; -use crate::dalvik::file::field::DexField; -use crate::dalvik::file::method::DexMethod; -use crate::dalvik::file::DexClassDef; -use crate::dalvik::file::{method::DexPrototype, DexValue, IDexRef}; -use crate::dalvik::insns::{self, Index, Insn, InsnFormat, Payload}; - -// A small hack to implement write_* operations for all -// `Write` types. -impl SmaliWrite for W {} - -pub trait SmaliWrite: Write { - //TODO: docs - - fn write_access_flags(&mut self, access_flags: &AccessFlags) -> Result<()> { - // Access flags are written using their lowercase names - access_flags - .iter_names() - .map(|(x, _)| x.to_lowercase()) - .try_for_each(|f| write!(self, "{} ", f))?; - Ok(()) - } - - fn write_type(&mut self, type_: &DexType) -> Result<()> { - if type_.dim > 0 { - write!(self, "{}", "[".repeat(type_.dim))?; - } - write!(self, "{}", type_.descriptor)?; - Ok(()) - } - - fn write_field_ref(&mut self, ref_: &Rc, dex: IDexRef<'_>) -> Result<()> { - // class->field_name:field_type - let class = dex.get_type(ref_.class_idx as u32)?; - let name = dex.get_string(ref_.name_idx)?; - let type_ = dex.get_type(ref_.type_idx as u32)?; - self.write_type(&class)?; - write!(self, "->{}:", name)?; - self.write_type(&type_)?; - Ok(()) - } - - fn write_method_ref(&mut self, ref_: &Rc, dex: IDexRef<'_>) -> Result<()> { - // class->method_name|method_descriptor - let class = dex.get_type(ref_.class_idx as u32)?; - let name = dex.get_string(ref_.name_idx)?; - let type_ = dex.get_proto(ref_.proto_idx as u32)?; - self.write_type(&class)?; - write!(self, "->{}", name)?; - self.write_proto(&type_)?; - Ok(()) - } - - fn write_proto(&mut self, proto: &DexPrototype) -> Result<()> { - // (param_types) return_type - write!(self, "(")?; - for param in proto.parameters.iter() { - self.write_type(param)?; - } - write!(self, ")")?; - self.write_type(&proto.return_type)?; - Ok(()) - } - - fn write_value(&mut self, value: &DexValue, dex: IDexRef<'_>) -> Result<()> { - match value { - DexValue::String(v) => write!(self, "\"{}\"", v.escape_default())?, - DexValue::Type(v) => self.write_type(v)?, - DexValue::FieldRef(v) => self.write_field_ref(v, dex)?, - DexValue::MethodRef(.., v) => self.write_method_ref(v, dex)?, - DexValue::MethodType(v) => self.write_proto(v)?, - DexValue::Int(v) => write!(self, "{:#x}", v)?, - DexValue::Float(v) => write!(self, "{}", v)?, - DexValue::Long(v) => write!(self, "{:#x}", v)?, - DexValue::Double(v) => write!(self, "{}", v)?, - DexValue::True => write!(self, "true")?, - DexValue::False => write!(self, "false")?, - DexValue::Null => write!(self, "null")?, - DexValue::Array(v) => { - write!(self, "[")?; - for (i, value) in v.iter().enumerate() { - self.write_value(value, dex)?; - if i != v.len() - 1 { - write!(self, ", ")?; - } - } - write!(self, "]")?; - } - DexValue::Data(v, _) => write!(self, "", v)?, - DexValue::Char(v) => write!(self, "'{}'", v.escape_default())?, - DexValue::Short(v) => write!(self, "{:#x}", v)?, - DexValue::Byte(v) => write!(self, "{:#x}", v)?, - // DexValue::Annotation(v) => self.w, - DexValue::Enum(v) => { - self.write_field_ref(v, dex)?; - } - _ => write!(self, "{:?}", value)?, - } - Ok(()) - } - - fn write_index(&mut self, index: &Index, dex: IDexRef<'_>) -> Result<()> { - match index { - Index::Literal(a) => write!(self, "{:#x}", a)?, - Index::Field(a) => { - self.write_field_ref(a, dex)?; - } - Index::Method(a) => { - self.write_method_ref(a, dex)?; - } - Index::Proto(a) => { - // (arg_type)return_type - self.write_proto(a)?; - } - Index::Type(a) => { - // type_name:field_type - write!(self, "{}", a)?; - } - Index::String(a) => { - write!(self, "\"{}\"", a.escape_default())?; - } - _ => { - // TODO - write!(self, "{:?}", index)?; - } - } - Ok(()) - } - - fn write_insn(&mut self, insn: &Insn, dex: IDexRef<'_>, indent: usize) -> Result<()> { - let indent_val = " ".repeat(indent); - write!(self, "{}", indent_val)?; - if let Some(payload) = &insn.payload { - let indent2 = " ".repeat(indent + 1); - match payload { - Payload::FillArrayData(data) => { - write!(self, ".array-data {:#x} {:#x}", data.width, data.size)?; - for v in data.data.iter() { - writeln!(self, "{}{:#x}", indent2, v)?; - } - write!(self, ".end array-data")?; - } - Payload::PackedSwitch(pswitch) => { - writeln!(self, ".packed-switch {:#x}", pswitch.first_key)?; - for v in pswitch.targets.iter() { - writeln!(self, "{}{:#x}", indent2, v)?; - } - writeln!(self, "{}.end packed-switch", indent_val)?; - } - Payload::SparseSwitch(switch) => { - writeln!(self, ".sparse-switch")?; - for (key, target) in switch.keys.iter().zip(switch.targets.iter()) { - write!(self, "{}{:#x} -> {:#x}", indent2, key, target)?; - } - writeln!(self, "{}.end sparse-switch", indent_val)?; - } - } - Ok(()) - } else { - write!(self, "{}", insn.opcode.name)?; - if insn.opcode.length > 0 { - write!(self, " ")?; - } - match &insn.format { - // N/A - InsnFormat::Format00x => { - write!(self, "")?; - } - InsnFormat::Format10x => { /* op */ } - - InsnFormat::Format12x { a, b } => { - write!(self, "v{}, v{}", a, b)?; // op vA, vB - } - InsnFormat::Format11n { a, b } => { - write!(self, "v{}, {:?}", a, b)?; // op vA, #+B - } - InsnFormat::Format11x { a } => { - write!(self, "v{}", a)?; // op vAA - } - InsnFormat::Format10t { a } => { - write!(self, "{}", a)?; // op +AA - } - InsnFormat::Format20t { a } => { - write!(self, "{}", a)?; // op +AAAA - } - InsnFormat::Format22x { a, b } => { - write!(self, "v{}, v{}", a, b)?; // op vAA, vBBBB - } - InsnFormat::Format21t { a, b } => { - write!(self, "v{}, {}", a, b)?; // op vAA, +BBBB - } - InsnFormat::Format21s { a, b } => { - write!(self, "v{}, ", a)?; // op vAA, +BBBB - self.write_index(b, dex)?; - } - InsnFormat::Format21h { a, b } => { - write!(self, "v{}, ", a)?; // op vAA, +BBBB0000 - self.write_index(b, dex)?; - } - InsnFormat::Format21c { a, b } => { - write!(self, "v{}, ", a)?; // op vAA, kind@BBBB - self.write_index(b, dex)?; - } - InsnFormat::Format23x { a, b, c } => { - write!(self, "v{}, v{}, v{}", a, b, c)?; // op vAA, vBB, vCC - } - InsnFormat::Format22b { a, b, c } => { - write!(self, "v{}, v{}, ", a, b)?; // op vAA, vBB, #+CC - self.write_index(c, dex)?; - } - InsnFormat::Format22t { a, b, c } => { - write!(self, "v{}, v{}, {}", a, b, c)?; // op vAA, vBB, +CCCC - } - InsnFormat::Format22s { a, b, c } => { - write!(self, "v{}, v{}, ", a, b)?; // op vAA, vBB, +CCCC - self.write_index(c, dex)?; - } - InsnFormat::Format22c { a, b, c } => { - write!(self, "v{}, v{}, ", a, b)?; // op vAA, vBB, kind@CCCC - self.write_index(c, dex)?; - } - InsnFormat::Format30t { a } => { - write!(self, "{}", a)?; // op +AAAAAAAA - } - InsnFormat::Format32x { a, b } => { - write!(self, "v{}, v{}", a, b)?; // op vAAAA, vBBBB - } - InsnFormat::Format31i { a, b } => { - write!(self, "v{}, ", a)?; // op vAA, #+BBBBBBBB - self.write_index(b, dex)?; - } - InsnFormat::Format31t { a, b } => { - write!(self, "v{}, {}", a, b)?; // op vAAAA, +BBBB - } - InsnFormat::Format31c { a, b } => { - write!(self, "v{}, ", a)?; // op vAAAA, kind@BBBB - self.write_index(b, dex)?; - } - - InsnFormat::Format35c { - a, - b, - c, - d, - e, - f, - g, - } => { - // [A=n] op {vX...vN}, kind@BBBB - write!(self, "{{")?; - match a { - 1 => write!(self, "v{}", c)?, - 2 => write!(self, "v{}, v{}", c, d)?, - 3 => write!(self, "v{}, v{}, v{}", c, d, e)?, - 4 => write!(self, "v{}, v{}, v{}, v{}", c, d, e, f)?, - 5 => write!(self, "v{}, v{}, v{}, v{}, v{}", c, d, e, f, g)?, - _ => {} - } - write!(self, "}}, ")?; - self.write_index(b, dex)?; - } - - InsnFormat::Format3rc { - a: _, - b, - c: _, - regs, - } => { - // [A=n] op {vX...vN}, kind@BBBB - write!(self, "{{")?; - for i in regs.start..regs.end { - write!(self, "v{}", i)?; - if i != regs.end { - write!(self, ", ")?; - } - } - write!(self, "}}, ")?; - self.write_index(b, dex)?; - } - - InsnFormat::Format45cc { - a, - b, - c, - d, - e, - f, - g, - h, - } => { - // [A=n] op {vX...vN}, kind@BBBB, proto@HHHH - write!(self, "{{")?; - match a { - 1 => write!(self, "v{}", c)?, - 2 => write!(self, "v{}, v{}", c, d)?, - 3 => write!(self, "v{}, v{}, v{}", c, d, e)?, - 4 => write!(self, "v{}, v{}, v{}, v{}", c, d, e, f)?, - 5 => write!(self, "v{}, v{}, v{}, v{}, v{}", c, d, e, f, g)?, - _ => {} - } - write!(self, "}}, ")?; - self.write_index(b, dex)?; - write!(self, ", ")?; - self.write_index(h, dex)?; - } - - InsnFormat::Format4rcc { - a: _, - b, - c: _, - h, - regs, - } => { - // [A=n] op {vX...vN}, kind@BBBB, proto@HHHH - write!(self, "{{")?; - for i in regs.start..regs.end { - write!(self, "v{}", i)?; - if i != regs.end { - write!(self, ", ")?; - } - } - write!(self, "}}, ")?; - self.write_index(b, dex)?; - write!(self, ", ")?; - self.write_index(h, dex)?; - } - - InsnFormat::Format51l { a, b } => { - write!(self, "v{}, ", a)?; // op vAA, +BBBBBBBB - self.write_index(b, dex)?; - } - - _ => { - write!(self, "")?; - } - } - Ok(()) - } - } - - // --- implementation for multiline content --- - - /// Writes an annotation to the underlying stream using the given - /// indent. This method uses '.subannotation' if is_sub is true. - fn write_annotation( - &mut self, - annotation: &DexAnnotation, - dex: IDexRef<'_>, - indent: usize, - is_sub: bool, - ) -> Result<()> { - let indent_val = " ".repeat(indent); - if is_sub { - write!(self, "{}.subannotation ", indent_val)?; - } else { - write!(self, "{}.annotation ", indent_val)?; - } - - // - if let Some(visibility) = &annotation.visibility { - write!(self, "{} ", format!("{:?}", visibility).to_lowercase())?; - } - self.write_type(&annotation.type_)?; - - // possible values - if !annotation.values.is_empty() { - let indent2 = " ".repeat(indent + 1); - for (key, value) in annotation.values.iter() { - write!(self, "\n{}{} = ", indent2, key)?; - match value { - // let us format annotations with sub-annotations - DexValue::Annotation(a) => self.write_annotation(a, dex, indent + 2, true)?, - DexValue::Array(a) => { - writeln!(self, "[")?; - let indent3 = " ".repeat(indent + 2); - for (i, v) in a.iter().enumerate() { - write!(self, "{}", indent3)?; - self.write_value(v, dex)?; - if i != a.len() - 1 { - writeln!(self, ",")?; - } - } - write!(self, "\n{}]", indent2)?; - } - _ => self.write_value(value, dex)?, - } - } - } - - if is_sub { - write!(self, "\n{}.end subannotation", indent_val)?; - } else { - write!(self, "\n{}.end annotation", indent_val)?; - } - - Ok(()) - } - /// Writes a field to the underlying stream. - fn write_field(&mut self, field: &DexField, dex: IDexRef<'_>) -> Result<()> { - write!(self, ".field ")?; - if let Some(flags) = &field.access_flags { - self.write_access_flags(flags)?; - } - write!(self, "{}:", field.name)?; - self.write_type(&field.type_)?; - - if let Some(init_val) = &field.init_value { - write!(self, " = ")?; - self.write_value(init_val, dex)?; - } - - if !field.annotations.is_empty() { - writeln!(self)?; - for annotation in &field.annotations { - self.write_annotation(annotation, dex, 1, false)?; - } - writeln!(self, "\n.end field")?; - } - Ok(()) - } - - /// Dex method representation for smali - fn write_method(&mut self, method: &DexMethod, dex: IDexRef<'_>) -> Result<()> { - write!(self, ".method ")?; - if let Some(flags) = &method.access_flags { - self.write_access_flags(flags)?; - } - write!(self, "{}", method.name)?; - self.write_proto(&method.proto)?; - - if let Some(code) = &method.code { - let indent = " "; - writeln!(self, "\n{}.registers {}", indent, code.registers_size)?; - - if !method.annotations.is_empty() { - writeln!(self)?; - for annotation in &method.annotations { - self.write_annotation(annotation, dex, 1, false)?; - } - writeln!(self)?; - } - - for instruction in insns::disasm(code, dex)? { - write!(self, "\n{:#06x}:\n", instruction.range.start)?; - if let Some(debug) = &method.debug_info { - if let Some(line) = debug.lines.get(&(instruction.range.start as u32)) { - writeln!(self, "{}.line {}", indent, line)?; - } - } - self.write_insn(&instruction, dex, 1)?; - } - } - writeln!(self, "\n.end method")?; - Ok(()) - } - - /// Writes a class to the underlying stream. - fn write_class(&mut self, class: &DexClassDef, dex: IDexRef<'_>) -> Result<()> { - // class header includes name, potential superclass, source file - // name and interfaces. - write!(self, ".class ")?; - if let Some(flags) = &class.flags { - self.write_access_flags(flags)?; - } - writeln!(self, "{}", class.type_.descriptor)?; - if let Some(superclass) = &class.super_class { - writeln!(self, ".extends {}", superclass.descriptor)?; - } - if !class.interfaces.is_empty() { - for interface in class.interfaces.iter() { - // We use the descriptor here to avoid having to call - // self.write_type() multiple times. - writeln!(self, ".implements {}", interface.descriptor)?; - } - } - if let Some(source) = &class.source_file { - write!(self, ".source \"{}\"", source.escape_default())?; - } - - if !class.annotations.is_empty() { - for annotation in &class.annotations { - writeln!(self, "\n")?; - self.write_annotation(annotation, dex, 0, false)?; - } - } - - // iterate over all fields and write them - for (_, field) in class.get_fields() { - writeln!(self, "\n")?; - self.write_field(field, dex)?; - } - - // iterate over all methods and write them - for (_, method) in class.get_methods() { - writeln!(self, "\n")?; - self.write_method(method, dex)?; - } - - Ok(()) - } -} diff --git a/src/smali/mod.rs b/src/smali/mod.rs deleted file mode 100644 index 99dd6fa..0000000 --- a/src/smali/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod io; -pub use io::*; From 2fc9ad65bf59cb690dd8c152d8f056a1966d92bc Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:38:17 +0100 Subject: [PATCH 02/46] Change dependencies and edition to 2021 --- .gitignore | 4 +++- Cargo.toml | 11 +++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index b089d6e..167e36d 100644 --- a/.gitignore +++ b/.gitignore @@ -178,4 +178,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -_build/ \ No newline at end of file +_build/ + +*.class \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index b559791..6f0ec3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,13 +1,12 @@ [package] name = "dexrs" version = "0.1.0" -edition = "2024" +edition = "2021" [dependencies] adler32 = "1.2.0" -binrw = "0.13.3" -bitflags = "2.5.0" -byteorder = "1.5.0" -lazy_static = "1.4.0" -leb128 = "0.2.5" +memmap2 = "0.9.5" openssl = "0.10.64" +plain = "0.2.3" +thiserror = "2.0.11" +varint-simd = "0.4.1" From eae70386176b91416020cd06f72ab4a1d867c02e Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:39:19 +0100 Subject: [PATCH 03/46] Generic errors for validating DEX files --- src/error.rs | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 src/error.rs diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..6ca6f00 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,111 @@ +use std::fmt::Debug; + +use thiserror::Error; + +#[derive(Error)] +pub enum DexError { + #[error("Empty or truncated file")] + TruncatedFile, + + #[error("Bad file magic")] + BadFileMagic, + + #[error("Unknown dex version: {version}")] + UnknownDexVersion { version: u32 }, + + #[error("Bad file size ({actual}, expected at least {expected})")] + FileSizeAtLeast { actual: usize, expected: usize }, + + #[error("Bad file size ({actual}, expected at most {expected})")] + FileSizeAtMost { actual: usize, expected: usize }, + + #[error("Bad header size: {size}, expected {expected}")] + BadHeaderSize { size: u32, expected: u32 }, + + #[error("Unexpected endian tag: {0:x}")] + UnexpectedEndianess(u32), + + #[error("Bad checksum: {actual:#08x}, expected {expected:#08x}")] + BadChecksum { actual: u32, expected: u32 }, + + #[error("Offset({offset}) should be within file size {size} for {section}")] + BadOffsetTooLarge { + offset: u32, + size: usize, + section: &'static str, + }, + + #[error("Offset({offset}) should be after header({header_size}) for {section}")] + BadOffsetInHeader { + offset: u32, + header_size: usize, + section: &'static str, + }, + + #[error("Offset({offset}) should be zero when size is zero for {section}")] + BadOffsetNoSize { offset: u32, section: &'static str }, + + #[error("Section end({offset}) should be within file size {size} for {section}")] + BadSection { + offset: u32, + size: usize, + section: &'static str, + }, + + #[error("{0}")] + DexFileError(String), + + #[error("Index({index}) to {item_ty} should be less than {max}")] + DexIndexError { + index: u32, + max: usize, + item_ty: &'static str, + }, + + #[error("Bad string data({0}) does not end with a null byte!")] + BadStringData(usize), + + #[error("{0}")] + Mutf8DecodeError(#[from] std::string::FromUtf16Error), + + #[error("Failed to read {location}: {item_ty} at offset {offset} (array_len={array_len}) overflows with file size({file_size})")] + DexLayoutError { + location: String, + offset: u32, + item_ty: &'static str, + array_len: usize, + file_size: usize, + }, + +} + +#[macro_export] +macro_rules! dex_err { + ($name:ident) => { + Err(DexError::$name) + }; + ($name:ident, $arg1:literal, $($arg:tt)*) => { + Err(DexError::$name(format!($arg1, $($arg)*))) + }; + (DexLayoutError, $dex:ident, $off:ident, $item_ty:expr, $array_len:expr) => { + Err(DexError::DexLayoutError { + location: $dex.get_location().to_string(), + offset: $off, + item_ty: $item_ty, + array_len: $array_len, + file_size: $dex.file_size(), + }) + }; + ($name:ident { $($arg:tt)* }) => { + Err(DexError::$name { $($arg)* }) + }; + ($name:ident, $($arg:tt)*) => { + Err(DexError::$name($($arg)*)) + }; +} + +impl Debug for DexError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} From 627dd75362c468f04b9049a3b9240d14e4135f74 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:39:40 +0100 Subject: [PATCH 04/46] Wrapper functions to parse leb128 integers --- src/leb128.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/leb128.rs diff --git a/src/leb128.rs b/src/leb128.rs new file mode 100644 index 0000000..64984a6 --- /dev/null +++ b/src/leb128.rs @@ -0,0 +1,27 @@ +use varint_simd; + +#[inline(always)] +pub fn decode_leb128(data_in: &[u8]) -> (T, usize) { + // TODO: convert to result + match varint_simd::decode::(data_in) { + Ok((value, size)) => (value, size), + Err(err) => panic!( + "Error decoding LEB128: {:?}. Data: {:?}", + err, + data_in.as_ptr() + ), + } +} + +#[inline(always)] +pub fn decode_leb128p1(data_in: &[u8]) -> (i32, usize) { + let (result, size) = decode_leb128::(data_in); + ((result - 1) as i32, size) +} + +#[inline(always)] +pub fn decode_leb128_off(data_in: &[u8], ptr_pos: &mut usize) -> T { + let (value, size) = decode_leb128(data_in); + *ptr_pos += size; + value +} From a4d0635febd8d0eb4e9ce2dd27ba9e2282f3b2f0 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:40:47 +0100 Subject: [PATCH 05/46] UTF module to parse mutf8 --- + THIS CODE IS NOT STABLE AND WILL CHANGE! + Next todo will be to adjust the parsing and add fuzzer targets --- src/utf.rs | 250 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 src/utf.rs diff --git a/src/utf.rs b/src/utf.rs new file mode 100644 index 0000000..3688aae --- /dev/null +++ b/src/utf.rs @@ -0,0 +1,250 @@ + +pub fn mutf8_to_str(utf8_data_in: &[u8]) -> crate::Result { + let utf16_data = mutf8_to_utf16(utf8_data_in); + Ok(String::from_utf16(&utf16_data)?) +} + +pub fn mutf8_to_str_lossy(utf8_data_in: &[u8]) -> String { + let utf16_data = mutf8_to_utf16(utf8_data_in); + String::from_utf16_lossy(&utf16_data) +} + +pub fn str_to_mutf8(str_data_in: &str) -> Vec { + let utf16_data_in: Vec = str_data_in.encode_utf16().collect(); + utf16_to_mutf8(&utf16_data_in, &Options::new()) +} + +pub fn str_to_mutf8_lossy(str_data_in: &str) -> Vec { + let utf16_data_in: Vec = str_data_in.encode_utf16().collect(); + let options = Options::new().replace_bad_surrogates(true); + utf16_to_mutf8(&utf16_data_in, &options) +} + +#[inline] +pub fn utf16_from_utf8(utf8_data_in: &[u8], offset: &mut usize) -> u32 { + let one = utf8_data_in[*offset]; + *offset += 1; + if one & 0x80 == 0 { + return one as u32; + } + + let two = utf8_data_in[*offset]; + *offset += 1; + if one & 0x20 == 0 { + return ((one & 0x1f) as u32) << 6 | (two & 0x3F) as u32; + } + + let three = utf8_data_in[*offset]; + *offset += 1; + if one & 0x10 == 0 { + return ((one & 0x0f) as u32) << 12 | ((two & 0x3F) as u32) << 6 | (three & 0x3F) as u32; + } + + let four = utf8_data_in[*offset]; + *offset += 1; + let code_point = ((one & 0x0F) as u32) << 18 + | ((two & 0x3F) as u32) << 12 + | ((three & 0x3F) as u32) << 6 + | (four & 0x3F) as u32; + + let mut surrogate_pair: u32 = 0x00; + surrogate_pair |= ((code_point >> 10) + 0xd7c0) & 0xFFFF; + surrogate_pair |= ((code_point & 0x03FF) + 0xdc80) << 16; + return surrogate_pair; +} + +#[inline(always)] +pub fn trailing_utf16_char(maybe_pair: u32) -> u16 { + (maybe_pair >> 16) as u16 +} + +#[inline(always)] +pub fn leading_utf16_char(maybe_pair: u32) -> u16 { + (maybe_pair & 0x0000FFFFF) as u16 +} + +#[inline(always)] +pub fn is_lead(ch: u16) -> bool { + ch & 0xFC00 == 0xd800 +} + +#[inline(always)] +pub fn is_trail(ch: u16) -> bool { + ch & 0xFC00 == 0xDC00 +} + +#[inline(always)] +pub fn is_surrogate(ch: u16) -> bool { + ch & 0xF800 == 0xD800 +} + +#[inline(always)] +pub fn is_surrogate_lead(ch: u16) -> bool { + ch & 0x0400 == 0x00 +} + +#[inline(always)] +pub fn get_supplementary(lead: u16, trail: u16) -> u32 { + const OFFSET: u32 = (0xd800 << 10) + 0xdc00 - 0x10000; + ((lead as u32) << 10) + (trail as u32) - OFFSET +} + +pub fn mutf8_len(utf8_data_in: &[u8], utf8_in_len: usize) -> usize { + let mut len = 0; + let mut in_idx = 0; + while in_idx < utf8_in_len { + let ic = utf8_data_in[in_idx]; + in_idx += 1; + len += 1; + if ic & 0x80 == 0 { + continue; // one byze encoding + } + + in_idx += 1; + if ic & 0x20 == 0 { + // two byze encoding + continue; + } + + in_idx += 1; + if ic & 0x10 == 0 { + continue; + } + + // Four-byte encoding: needs to be converted into a surrogate + // pair. + in_idx += 1; + len += 1; + } + len +} + +pub fn mutf8_to_utf16(utf8_data_in: &[u8]) -> Vec { + let utf8_in_len = utf8_data_in.len() - 1; + let out_chars = mutf8_len(utf8_data_in, utf8_in_len); + convert_mutf8_to_utf16(utf8_data_in, utf8_in_len, out_chars) +} + +pub fn convert_mutf8_to_utf16( + utf8_data_in: &[u8], + utf8_in_len: usize, + out_chars: usize, +) -> Vec { + if utf8_data_in.len() == out_chars { + // common case where all chars are ASCII + return utf8_data_in.iter().map(|i| *i as u16).collect(); + } + + let mut utf16_data_out: Vec = Vec::with_capacity(out_chars); + let mut in_idx = 0x00; + while in_idx < utf8_in_len { + let ch = utf16_from_utf8(utf8_data_in, &mut in_idx); + let leading = leading_utf16_char(ch); + let trailing = trailing_utf16_char(ch); + + utf16_data_out.push(leading); + if trailing != 0 { + utf16_data_out.push(trailing); + } + } + utf16_data_out +} + +pub fn utf16_to_mutf8(utf16_in: &[u16], options: &Options) -> Vec { + let mut mutf8_len = 0; + convert_utf16_to_mutf8(utf16_in, options, |_| mutf8_len += 1); + + let mut mutf8_out; + if mutf8_len == utf16_in.len() { + // only ascii chars + mutf8_out = utf16_in.iter().map(|ch| *ch as u8).collect(); + } else { + mutf8_out = vec![0x00; mutf8_len + 1]; + convert_utf16_to_mutf8(utf16_in, options, |ch| mutf8_out.push(ch)); + } + + // append trailing null + mutf8_out.push(0x00); + mutf8_out + +} + +pub struct Options { + pub short_zero: bool, + pub replace_bad_surrogates: bool, +} + +impl Options { + pub fn new() -> Options { + Options { + short_zero: false, + replace_bad_surrogates: false, + } + } + + pub fn use_short_zero(mut self, enable: bool) -> Self { + self.short_zero = enable; + self + } + + pub fn replace_bad_surrogates(mut self, enable: bool) -> Self { + self.replace_bad_surrogates = enable; + self + } +} + +fn convert_utf16_to_mutf8(utf16_in: &[u16], options: &Options, mut append: Append) +where + Append: FnMut(u8) -> (), +{ + let mut in_idx = 0; + while in_idx < utf16_in.len() { + let ch = utf16_in[in_idx]; + if ch < 0x80 && (options.short_zero || ch != 0) { + append(ch as u8); + } else if ch < 0x800 { + append(((ch >> 6) | 0xC0) as u8); + append(((ch & 0x3F) | 0x80) as u8); + } else if is_surrogate(ch) + || (is_lead(ch) && in_idx + 1 != utf16_in.len() && is_trail(utf16_in[in_idx + 1])) + { + if options.replace_bad_surrogates + && (!is_surrogate_lead(ch) + && in_idx + 1 != utf16_in.len() + && !is_trail(utf16_in[in_idx + 1])) + { + append('?' as u8); + } else { + let code_point = get_supplementary(ch, utf16_in[in_idx + 1]); + in_idx += 1; + append(((code_point >> 18) | 0xf0) as u8); + append((((code_point >> 12) & 0x3f) | 0x80) as u8); + append((((code_point >> 6) & 0x3f) | 0x80) as u8); + append(((code_point & 0x3f) | 0x80) as u8); + } + } else { + append(((ch >> 12) | 0xE0) as u8); + append((((ch >> 6) & 0x3F) | 0x80) as u8); + append(((ch & 0x3F) | 0x80) as u8); + } + + in_idx += 1; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_str_to_mutf8() { + let data = "foobar"; + assert_eq!(str_to_mutf8(data), b"foobar\0"); + } + + #[test] + fn test_mutf8_to_str() { + let data = &[102, 111, 111, 98, 97, 114, 0]; + assert_eq!(mutf8_to_str_lossy(data), "foobar".to_string()); + } +} From 6dfc2242e4f8ccf9c55142731ecf6e8acdbaddb7 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:41:43 +0100 Subject: [PATCH 06/46] Main library entry --- + Result type should be used accross the library --- src/lib.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 8b13789..23aa1b7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,8 @@ +use std::result; +pub mod error; +pub mod file; +pub mod leb128; +pub mod utf; + +pub type Result = result::Result; From 81eaa27e01e1d5dfdc5ab54341aa0d90a59fd606 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:42:51 +0100 Subject: [PATCH 07/46] Dex file structs (incomplete) --- + Header + various items --- src/file/header.rs | 115 ++++++++++++++++++++++++++++ src/file/structs.rs | 181 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 src/file/header.rs create mode 100644 src/file/structs.rs diff --git a/src/file/header.rs b/src/file/header.rs new file mode 100644 index 0000000..eeb95a0 --- /dev/null +++ b/src/file/header.rs @@ -0,0 +1,115 @@ +#[repr(C)] +#[derive(Debug)] +pub struct Header { + /// magic value + magic: [u8; 8], + + /// Taken from Android docs: + /// + /// Adler32 checksum of the rest of the file (everything but `magic` and this + /// field); used to detect file corruption. + pub checksum: u32, + + /// Android docs: + /// + /// SHA-1 signature (hash) of the rest of the file (everything but `magic`, + /// `checksum`, and this field); used to uniquely identify files. + signature: [u8; 20], + + /// Size of the entire file including the header. + pub file_size: u32, + + /// Size of the header (this struct), in bytes. It is always 0x70. + pub header_size: u32, + + /// Endian contant - ART source code only supports one byte order + pub endian_tag: u32, + + // unused { + /// size of the link section, or 0 if this file isn't statically linked + pub link_size: u32, + + /// offset from the start of the file to the link section, or `0` if + /// `link_size == 0`. The offset, if non-zero, should be to an offset + /// into the `link_data` section. + pub link_off: u32, + // } unused + /// offset from the start of the file to the map item. The offset, which + /// must be non-zero, should be to an offset into the `data` section. + pub map_off: u32, + + /// count of strings in the string identifiers list + pub string_ids_size: u32, + + /// offset from the start of the file to the string identifiers list, or + /// `0` if `string_ids_size == 0`. + pub string_ids_off: u32, + + /// count of elements in the type identifiers list, at most `65535` + pub type_ids_size: u32, + + /// offset from the start of the file to the type identifiers list, or + /// `0` if `type_ids_size == 0`. + pub type_ids_off: u32, + + /// count of elements in the proto identifiers list, at most `65535` + pub proto_ids_size: u32, + + /// offset from the start of the file to the proto identifiers list, or + /// `0` if `proto_ids_size == 0`. + pub proto_ids_off: u32, + + /// count of elements in the field identifiers list + pub field_ids_size: u32, + + /// offset from the start of the file to the field identifiers list, or + /// `0` if `field_ids_size == 0`. + pub field_ids_off: u32, + + /// count of elements in the method identifiers list + pub method_ids_size: u32, + + /// offset from the start of the file to the method identifiers list, or + /// `0` if `method_ids_size == 0`. + pub method_ids_off: u32, + + /// count of elements in the class definitions list + pub class_defs_size: u32, + + /// offset from the start of the file to the class definitions list, or + /// `0` if `class_defs_size == 0`. + pub class_defs_off: u32, + + /// size of the data section (in bytes) + pub data_size: u32, + + /// offset from the start of the file to the data section + pub data_off: u32, +} + +unsafe impl plain::Plain for Header {} + +impl Header { + pub fn get_magic(&self) -> &[u8; 8] { + &self.magic + } + + pub fn get_signature(&self) -> &[u8; 20] { + &self.signature + } + + pub fn get_version(&self) -> u32 { + let version_raw = &self.magic[4..7]; + String::from_utf8_lossy(version_raw) + .parse() + .unwrap_or_default() // will lead to invalid dex file + } +} + +#[repr(C)] +#[derive(Debug)] +pub struct HeaderV41 { + pub inner: Header, + pub container_size: u32, // total size of all dex files in the container. + pub header_off: u32, // offset of this dex's header in the container. +} diff --git a/src/file/structs.rs b/src/file/structs.rs new file mode 100644 index 0000000..28f9377 --- /dev/null +++ b/src/file/structs.rs @@ -0,0 +1,181 @@ +use plain::Plain; + +pub type StringIndex = u32; + +#[repr(C)] +#[derive(Debug)] +pub struct StringId { + pub string_data_off: u32, +} + +unsafe impl plain::Plain for StringId {} + +impl StringId { + #[inline] + pub const fn offset(&self) -> usize { + self.string_data_off as usize + } +} + +pub type TypeIndex = u16; + +#[repr(C)] +#[derive(Debug)] +pub struct TypeId { + pub descriptor_idx: StringIndex, +} + +unsafe impl plain::Plain for TypeId {} + +#[repr(C)] +#[derive(Debug)] +pub struct FieldId { + pub class_idx: TypeIndex, // index into type_ids_ array for defining class + pub type_idx: TypeIndex, // index into type_ids_ array for field type + pub name_idx: StringIndex, // index into string_ids_ array for field name +} + +unsafe impl plain::Plain for FieldId {} + +pub type ProtoIndex = u16; + +#[repr(C)] +#[derive(Debug)] +pub struct ProtoId { + pub shorty_idx: StringIndex, // index into string_ids array for shorty descriptor + pub return_type_idx: TypeIndex, // index into type_ids array for return type + pad_: u16, // padding = 0 + pub parameters_off: u32, // file offset to type_list for parameter types +} + +unsafe impl plain::Plain for ProtoId {} + +#[repr(C)] +#[derive(Debug)] +pub struct MethodId { + pub class_idx: TypeIndex, // index into type_ids_ array for defining class + pub proto_idx: ProtoIndex, // index into proto_ids_ array for method signature + pub name_idx: StringIndex, // index into string_ids_ array for method name +} + +unsafe impl plain::Plain for MethodId {} + +#[repr(C)] +#[derive(Debug)] +pub struct ClassDef { + pub class_idx: TypeIndex, // index into type_ids_ array for this class + pad1_: u16, // padding = 0 + pub access_flags: u32, + pub superclass_idx: TypeIndex, // index into type_ids_ array for superclass + pad2_: u16, // padding = 0 + pub interfaces_off: u32, // file offset to TypeList + pub source_file_idx: StringIndex, // index into string_ids_ for source file name + pub annotations_off: u32, // file offset to annotations_directory_item + pub class_data_off: u32, // file offset to class_data_item + pub static_values_off: u32, // file offset to EncodedArray +} + +unsafe impl plain::Plain for ClassDef {} + +#[repr(C)] +#[derive(Debug)] +pub struct TypeItem { + pub type_idx: TypeIndex, // index into type_ids section +} + +unsafe impl plain::Plain for TypeItem {} + +pub type TypeList<'a> = &'a [TypeItem]; + +#[repr(C)] +#[derive(Debug)] +pub struct MapItem { + pub type_: MapItemType, + unused_: u16, + pub size: u32, + pub off: u32, +} + +unsafe impl plain::Plain for MapItem {} + +pub type MapList<'a> = &'a [MapItem]; + +#[repr(u16)] +#[derive(Debug)] +pub enum MapItemType { + HeaderItem = 0x0000, + StringIdItem = 0x0001, + TypeIdItem = 0x0002, + ProtoIdItem = 0x0003, + FieldIdItem = 0x0004, + MethodIdItem = 0x0005, + ClassDefItem = 0x0006, + CallSiteIdItem = 0x0007, + MethodHandleItem = 0x0008, + MapList = 0x1000, + TypeList = 0x1001, + AnnotationSetRefList = 0x1002, + AnnotationSetItem = 0x1003, + ClassDataItem = 0x2000, + CodeItem = 0x2001, + StringDataItem = 0x2002, + DebugInfoItem = 0x2003, + AnnotationItem = 0x2004, + EncodedArrayItem = 0x2005, + AnnotationsDirectoryItem = 0x2006, + HiddenapiClassData = 0xF000, +} + +#[repr(C)] +#[derive(Debug)] +pub struct MethodHandleItem { + pub method_handle_type: TypeIndex, + reserved1_: u16, + pub field_or_method_idx: u16, // Field index for accessors, method index otherwise. + reserved2_: u16, +} + +unsafe impl plain::Plain for MethodHandleItem {} + +#[repr(C)] +#[derive(Debug)] +pub struct CallSiteIdItem { + pub data_off: u32, // Offset into data section pointing to encoded array items. +} + +unsafe impl plain::Plain for CallSiteIdItem {} + +#[repr(C)] +#[derive(Debug)] +pub struct HiddenapiClassData<'a> { + pub size: u32, + flags_offset: &'a [u8], +} + +impl<'a> HiddenapiClassData<'a> { + pub fn get_flags_slice(&self, class_def_idx: u32) -> Option<&'a [u8]> { + let offset = (class_def_idx * 4) as usize; + match u32::from_bytes(&self.flags_offset[offset..]) { + Ok(0) => None, + // offset starts from beginning of this object + Ok(start) => Some(&self.flags_offset[(*start - 4) as usize..]), + _ => None, + } + } +} + +unsafe impl<'a> plain::Plain for HiddenapiClassData<'a> {} + +#[repr(C)] +#[derive(Debug)] +pub struct CodeItem { + pub registers_size: u16, + pub ins_size: u16, + pub outs_size: u16, + pub tries_size: u16, + pub debug_info_off: u32, + pub insns_size: u32, +} + + +unsafe impl plain::Plain for CodeItem {} \ No newline at end of file From 3f65228b8d6e23a24ffedabca5bab9c7a6a978ef Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:43:22 +0100 Subject: [PATCH 08/46] main DexFile struct, class accessor and dex verifier (incomplete) --- src/file/class_accessor.rs | 381 ++++++++++++++++++++++++ src/file/mod.rs | 573 +++++++++++++++++++++++++++++++++++++ src/file/modifiers.rs | 35 +++ src/file/verifier.rs | 178 ++++++++++++ 4 files changed, 1167 insertions(+) create mode 100644 src/file/class_accessor.rs create mode 100644 src/file/mod.rs create mode 100644 src/file/modifiers.rs create mode 100644 src/file/verifier.rs diff --git a/src/file/class_accessor.rs b/src/file/class_accessor.rs new file mode 100644 index 0000000..322e07d --- /dev/null +++ b/src/file/class_accessor.rs @@ -0,0 +1,381 @@ +use super::{modifiers, ClassDef, DexFile, FieldId, InvokeType, MethodId, ACC_STATIC}; +use crate::{ + file::{ACC_CONSTRUCTOR, ACC_INTERFACE}, + leb128::decode_leb128_off, + Result, +}; + +pub trait ClassItemBase<'a>: Copy + Clone { + fn read(&mut self, data: &'a [u8], pos: &mut usize); + + fn init(dex: &'a DexFile<'a>) -> Self; + + fn next_section(&mut self); +} + +#[derive(Copy, Clone)] +pub struct Method<'a> { + dex: &'a DexFile<'a>, + + pub index: u32, + pub access_flags: u32, + pub code_offset: u32, + pub is_static_or_direct: bool, +} + +impl<'a> Method<'a> { + #[inline] + pub fn get_direct_invoke_type(&self) -> InvokeType { + if self.access_flags & ACC_STATIC != 0 { + InvokeType::Static + } else { + InvokeType::Direct + } + } + + #[inline(always)] + pub fn get_method_id(&self) -> Result<&'a MethodId> { + self.dex.get_method_id(self.index) + } + + #[inline(always)] + pub fn get_virtual_invoke_type(&self, class_access_flags: u32) -> InvokeType { + debug_assert!(self.access_flags & ACC_STATIC == 0x00); + if class_access_flags & ACC_INTERFACE != 0 { + InvokeType::Interface + } else if (self.access_flags & ACC_CONSTRUCTOR != 0) { + InvokeType::Super + } else { + InvokeType::Virtual + } + } +} + +impl<'a> ClassItemBase<'a> for Method<'a> { + fn read(&mut self, data: &'a [u8], pos: &mut usize) { + self.index += decode_leb128_off::(&data[*pos..], pos); + self.access_flags = decode_leb128_off::(&data[*pos..], pos); + self.code_offset = decode_leb128_off::(&data[*pos..], pos); + } + + fn init(dex: &'a DexFile<'a>) -> Self { + Self { + dex, + index: 0, + access_flags: 0, + code_offset: 0, + is_static_or_direct: false, + } + } + + fn next_section(&mut self) { + self.is_static_or_direct = true; + } +} + +#[derive(Copy, Clone)] +pub struct Field<'a> { + dex: &'a DexFile<'a>, + + pub index: u32, + pub access_flags: u32, + pub is_static: bool, +} + +impl<'a> Field<'a> { + #[inline(always)] + pub fn is_static(&self) -> bool { + self.is_static + } + + pub fn get_field_id(&self) -> Result<&'a FieldId> { + self.dex.get_field_id(self.index) + } +} + +impl<'a> ClassItemBase<'a> for Field<'a> { + fn read(&mut self, data: &'a [u8], pos: &mut usize) { + self.index += decode_leb128_off::(&data[*pos..], pos); + self.access_flags = decode_leb128_off::(&data[*pos..], pos); + } + + fn init(dex: &'a DexFile<'a>) -> Self { + Self { + dex, + index: 0, + access_flags: 0, + is_static: true, + } + } + + fn next_section(&mut self) { + self.is_static = false; + } +} + +pub struct ClassAccessor<'a> { + dex: &'a DexFile<'a>, + ptr_pos: usize, + class_data: &'a [u8], + + pub num_static_fields: u32, + pub num_instance_fields: u32, + pub num_direct_methods: u32, + pub num_virtual_methods: u32, + + // will be set after first time parsing the data + static_fields_off: u32, +} + +impl<'a> DexFile<'a> { + pub fn get_class_accessor(&self, class_def: &ClassDef) -> Option> { + match class_def.class_data_off { + 0 => None, + off => Some(ClassAccessor::from_raw(self, &self.mmap[off as usize..])), + } + } +} + +type FieldVisitor = fn(&Field<'_>) -> Result<()>; +type MethodVisitor = fn(&Method<'_>) -> Result<()>; + +fn null_method_visitor(_method: &Method<'_>) -> Result<()> { + Ok(()) +} + +fn null_field_visitor(_field: &Field<'_>) -> Result<()> { + Ok(()) +} + +impl<'a> ClassAccessor<'a> { + pub fn from_raw(dex: &'a DexFile<'a>, class_data: &'a [u8]) -> Self { + let mut accessor = Self { + dex, + ptr_pos: 0, + class_data, + num_direct_methods: 0, + num_virtual_methods: 0, + num_static_fields: 0, + num_instance_fields: 0, + static_fields_off: 0, + }; + accessor.num_static_fields = decode_leb128_off(&class_data, &mut accessor.ptr_pos); + accessor.num_instance_fields = decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); + accessor.num_direct_methods = decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); + accessor.num_virtual_methods = decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); + accessor.static_fields_off = accessor.ptr_pos as u32; + accessor + } + + #[inline(always)] + pub fn num_fields(&self) -> usize { + self.num_instance_fields as usize + self.num_static_fields as usize + } + + #[inline(always)] + pub fn num_methods(&self) -> usize { + self.num_direct_methods as usize + self.num_virtual_methods as usize + } + + #[inline(always)] + pub fn visit_fields( + &self, + static_field_visitor: FieldVisitor, + instance_field_visitor: FieldVisitor, + ) -> Result<()> { + self.visit_fields_and_methods( + static_field_visitor, + instance_field_visitor, + null_method_visitor, + null_method_visitor, + ) + } + + #[inline(always)] + pub fn visit_methods( + &self, + direct_method_visitor: MethodVisitor, + virtual_method_visitor: MethodVisitor, + ) -> Result<()> { + self.visit_fields_and_methods( + null_field_visitor, + null_field_visitor, + direct_method_visitor, + virtual_method_visitor, + ) + } + + #[inline] + pub fn visit_fields_and_methods( + &self, + static_field_visitor: FieldVisitor, + instance_field_visitor: FieldVisitor, + direct_method_visitor: MethodVisitor, + virtual_method_visitor: MethodVisitor, + ) -> Result<()> { + let mut field = Field::init(self.dex); + let mut offset = self.static_fields_off as usize; + if offset == 0 { + panic!("Static fields offset is zero which means there is no class data associated with this class"); + } + + self.visit_members( + self.num_static_fields, + &mut offset, + static_field_visitor, + &mut field, + )?; + // switch to instance fields + field.next_section(); + self.visit_members( + self.num_instance_fields, + &mut offset, + instance_field_visitor, + &mut field, + )?; + + let mut method = Method::init(self.dex); + self.visit_members( + self.num_direct_methods, + &mut offset, + direct_method_visitor, + &mut method, + )?; + method.next_section(); + self.visit_members( + self.num_virtual_methods, + &mut offset, + virtual_method_visitor, + &mut method, + ) + } + + #[inline(always)] + pub fn get_fields(&self) -> DataIterator<'a, Field<'a>> { + DataIterator::new( + self.dex, + self.class_data, + self.static_fields_off as usize, + self.num_static_fields as usize, + self.num_fields(), + ) + } + + #[inline(always)] + pub fn get_static_fieds(&self) -> impl Iterator> { + DataIterator::new( + self.dex, + self.class_data, + self.static_fields_off as usize, + self.num_static_fields as usize, + self.num_static_fields as usize, + ) + } + + #[inline(always)] + pub fn get_instance_fields(&self) -> impl Iterator> { + self.get_fields().skip(self.num_static_fields as usize) + } + + #[inline(always)] + pub fn get_methods(&self) -> Result>> { + let mut field = Field::init(self.dex); + let mut offset = self.static_fields_off as usize; + self.visit_members( + self.num_fields() as u32, + &mut offset, + null_field_visitor, + &mut field, + )?; + // switch to instance fields + Ok(DataIterator::new( + self.dex, + self.class_data, + offset as usize, + self.num_direct_methods as usize, + self.num_methods(), + )) + } + + #[inline(always)] + pub fn get_direct_methods(&self) -> Result>> { + Ok(self.get_methods()?.take(self.num_direct_methods as usize)) + } + + #[inline(always)] + pub fn get_virtual_methods(&self) -> Result>> { + Ok(self.get_methods()?.skip(self.num_direct_methods as usize)) + } + + #[inline(always)] + fn visit_members( + &self, + count: u32, + offset: &mut usize, + visitor: F, + iter: &mut T, + ) -> Result<()> + where + T: ClassItemBase<'a>, + F: Fn(&T) -> Result<()>, + { + for _ in 0..count { + iter.read(&self.class_data, offset); + visitor(&iter)?; + } + Ok(()) + } +} + +pub struct DataIterator<'a, T: ClassItemBase<'a>> { + class_data: &'a [u8], + value: T, + + pos: usize, // mutable + off: usize, // mutable + partition_pos: usize, // const + end_pos: usize, // const +} + +impl<'a, T: ClassItemBase<'a>> DataIterator<'a, T> { + pub fn new( + dex: &'a DexFile<'a>, + class_data: &'a [u8], + start_pos: usize, + partition_pos: usize, + end_pos: usize, + ) -> Self { + Self { + class_data, + value: T::init(dex), + pos: 0, + partition_pos, + off: start_pos, + end_pos, + } + } + + pub fn is_valid(&self) -> bool { + self.pos < self.end_pos + } + + pub fn offset(&self) -> usize { + self.off + } +} + +impl<'a, T: ClassItemBase<'a>> Iterator for DataIterator<'a, T> { + type Item = T; + + fn next(&mut self) -> Option { + if self.is_valid() { + if self.pos == self.partition_pos { + self.value.next_section(); + } + self.value.read(&self.class_data, &mut self.off); + self.pos += 1; + return Some(self.value); + } + return None; + } +} diff --git a/src/file/mod.rs b/src/file/mod.rs new file mode 100644 index 0000000..3dbebde --- /dev/null +++ b/src/file/mod.rs @@ -0,0 +1,573 @@ +use memmap2::{Mmap, MmapAsRawDesc}; +use plain::Plain; + +pub mod structs; +pub use structs::*; +pub mod header; +pub use header::*; +pub mod class_accessor; +pub mod verifier; +pub use class_accessor::*; +pub mod modifiers; +pub use modifiers::*; +pub mod instruction; +pub use instruction::*; + +use crate::{dex_err, error::DexError, leb128::decode_leb128, utf, Result}; + +pub const DEX_MAGIC: &[u8] = b"dex\n"; +pub const DEX_MAGIC_VERSIONS: &[&[u8]] = &[ + b"035\0", b"037\0", // Dex version 038: Android "O" and beyond. + b"038\0", // Dex version 039: Android "P" and beyond. + b"039\0", // Dex version 040: Android "Q" and beyond (aka Android 10). + b"040\0", // Dex version 041: Android "V" and beyond (aka Android 15). + b"041\0", +]; + +pub const DEX_ENDIAN_CONSTANT: u32 = 0x12345678; + +pub struct DexFileContainer { + mmap: memmap2::Mmap, + location: String, + pub verify: bool, + pub verify_checksum: bool, +} + +impl DexFileContainer { + pub fn new(file: T) -> Self + where + T: MmapAsRawDesc, + { + Self { + mmap: unsafe { memmap2::Mmap::map(file).unwrap() }, + verify: false, + verify_checksum: false, + location: "[anonymous]".to_string(), + } + } + + pub fn location(&mut self, location: String) -> &mut Self { + self.location = location; + self + } + + pub fn verify(mut self, verify: bool) -> Self { + self.verify = verify; + self + } + + pub fn verify_checksum(mut self, verify_checksum: bool) -> Self { + self.verify_checksum = verify_checksum; + self + } + + pub fn open(&self) -> Result> { + DexFile::open(self) + } + + pub fn get_location(&self) -> &str { + &self.location + } + + pub fn data(&self) -> &memmap2::Mmap { + &self.mmap + } +} + +pub struct DexFile<'a> { + mmap: &'a memmap2::Mmap, + header: &'a Header, + + string_ids: &'a [StringId], + type_ids: &'a [TypeId], + field_ids: &'a [FieldId], + proto_ids: &'a [ProtoId], + method_ids: &'a [MethodId], + class_defs: &'a [ClassDef], + method_handles: &'a [MethodHandleItem], + call_site_ids: &'a [CallSiteIdItem], + + hiddenapi_data: Option<&'a HiddenapiClassData<'a>>, + + location: String, +} + +macro_rules! check_lt { + ($idx:expr, $count:expr, $item_ty:tt) => { + if $idx >= $count { + panic!( + "Index({}) of {} is bigger than maximum({})", + $idx, + stringify!($item_ty), + $count + ); + } + }; +} + +macro_rules! check_lt_result { + ($idx:expr, $count:expr, $item_ty:tt) => { + if ($idx as usize) >= ($count as usize) { + return dex_err!(DexIndexError { + index: $idx as u32, + item_ty: stringify!($item_ty), + max: $count as usize, + }); + } + }; +} + +impl<'a> DexFile<'a> { + pub fn get_section(base: &'a Mmap, offset: u32, len: u32) -> &'a [T] { + let size = base.len(); + if size < std::mem::size_of::
() || len == 0 { + return &[]; + } + + let data = &base[offset as usize..]; + match T::slice_from_bytes_len(data, len as usize) { + Ok(slice) => slice, + Err(_) => &[], + } + } + + pub fn from_raw_parts(base: &'a Mmap, location: &str) -> Self { + let header = Header::from_bytes(&base).unwrap(); + let mut dex = Self { + mmap: base, + header, + string_ids: DexFile::get_section(base, header.string_ids_off, header.string_ids_size), + type_ids: DexFile::get_section(base, header.type_ids_off, header.type_ids_size), + field_ids: DexFile::get_section(base, header.field_ids_off, header.field_ids_size), + proto_ids: DexFile::get_section(base, header.proto_ids_off, header.proto_ids_size), + method_ids: DexFile::get_section(base, header.method_ids_off, header.method_ids_size), + class_defs: DexFile::get_section(base, header.class_defs_off, header.class_defs_size), + method_handles: &[], + call_site_ids: &[], + hiddenapi_data: None, + location: location.to_string(), + }; + + if dex.file_size() < std::mem::size_of::
() { + return dex; // don't parse data + } + + dex.init_sections_from_maplist(); + dex + } + + pub fn open(container: &DexFileContainer) -> Result> { + let loc = container.get_location(); + let size = container.data().len(); + if size < std::mem::size_of::
() { + return dex_err!(DexFileError, "Invalid or truncated file {:?}", loc); + } + + let dex = DexFile::from_raw_parts(container.data(), &loc); + dex.init()?; + if container.verify { + DexFile::verify(&dex, container.verify_checksum)?; + } + Ok(dex) + } + + pub fn expected_header_size(&self) -> u32 { + let version = self.header.get_version(); + if version != 0 { + if version < 41 { + std::mem::size_of::
() as u32 + } else { + std::mem::size_of::() as u32 + } + } else { + 0 + } + } + + pub fn get_location(&self) -> &str { + &self.location + } + + #[inline(always)] + pub fn file_size(&self) -> usize { + self.mmap.len() + } + + // -- strings + #[inline(always)] + pub fn get_string_id(&self, idx: u32) -> Result<&'a StringId> { + check_lt_result!(idx, self.num_string_ids(), StringId); + Ok(&self.string_ids[idx as usize]) + } + + #[inline(always)] + pub fn string_ids(&self) -> &'a [StringId] { + self.string_ids + } + + #[inline(always)] + pub fn num_string_ids(&self) -> u32 { + self.header.string_ids_size + } + + #[inline] + pub fn get_string_data(&self, string_id: &StringId) -> Result<(u32, &'a [u8])> { + check_lt_result!(string_id.offset(), self.file_size(), "string-id"); + let (utf16_len, size) = decode_leb128(&self.mmap[string_id.offset()..]); + + let start = string_id.offset() + size; + check_lt_result!(start, self.file_size(), "string-data"); + match &self.mmap[start..].iter().position(|x| *x == 0) { + Some(pos) => Ok((utf16_len, &self.mmap[start..start + pos + 1])), + None => dex_err!(BadStringData, start), + } + } + + #[inline(always)] + pub fn get_utf16_str_lossy(&self, string_id: &StringId) -> Result { + let (_, data) = self.get_string_data(string_id)?; + Ok(utf::mutf8_to_str_lossy(data)) + } + + #[inline(always)] + pub fn get_utf16_str_lossy_at(&self, idx: u32) -> Result { + let string_id = self.get_string_id(idx)?; + self.get_utf16_str_lossy(string_id) + } + + #[inline(always)] + pub fn get_utf16_str(&self, string_id: &StringId) -> Result { + let (_, data) = self.get_string_data(string_id)?; + crate::utf::mutf8_to_str(data) + } + + #[inline(always)] + pub fn get_utf16_str_at(&self, idx: u32) -> Result { + let string_id = self.get_string_id(idx)?; + self.get_utf16_str(string_id) + } + + // -- types + #[inline(always)] + pub fn get_type_id(&self, idx: TypeIndex) -> Result<&'a TypeId> { + check_lt_result!(idx as u32, self.num_type_ids(), TypeId); + Ok(&self.type_ids[idx as usize]) + } + + #[inline(always)] + pub fn num_type_ids(&self) -> u32 { + self.header.type_ids_size + } + + #[inline(always)] + pub fn get_type_ids(&self) -> &'a [TypeId] { + self.type_ids + } + + pub fn get_type_desc(&self, type_id: &TypeId) -> Result<(u32, &'a [u8])> { + self.get_string_data(self.get_string_id(type_id.descriptor_idx)?) + } + + pub fn get_type_desc_at(&self, idx: TypeIndex) -> Result<(u32, &'a [u8])> { + let type_id = self.get_type_id(idx)?; + self.get_string_data(self.get_string_id(type_id.descriptor_idx)?) + } + + pub fn get_type_desc_utf16_lossy_at(&self, idx: TypeIndex) -> Result { + let type_id = self.get_type_id(idx)?; + self.get_utf16_str_lossy_at(type_id.descriptor_idx) + } + + pub fn get_type_desc_utf16_lossy(&self, type_id: &TypeId) -> Result { + self.get_utf16_str_lossy_at(type_id.descriptor_idx) + } + + pub fn get_type_desc_utf16(&self, type_id: &TypeId) -> Result { + self.get_utf16_str_at(type_id.descriptor_idx) + } + + pub fn get_type_desc_utf16_at(&self, idx: TypeIndex) -> Result { + let type_id = self.get_type_id(idx)?; + self.get_utf16_str_at(type_id.descriptor_idx) + } + + // -- code item + #[inline(always)] + pub fn get_code_item(&self, offset: u32) -> Result> { + check_lt_result!(offset, self.file_size(), "code item offset"); + self.data_ptr(offset) + } + + #[inline(always)] + pub fn get_insns_raw(&self, code_off: u32, size_in_code_units: u32) -> Result<&'a [u16]> { + check_lt_result!(code_off, self.file_size(), "code stream offset"); + self.non_null_array_data_ptr(code_off, size_in_code_units as usize) + } + + // -- fields + #[inline] + pub fn get_field_id(&self, idx: u32) -> Result<&'a FieldId> { + check_lt_result!(idx, self.header.field_ids_size, FieldId); + Ok(&self.field_ids[idx as usize]) + } + + #[inline(always)] + pub fn get_field_ids(&self) -> &'a [FieldId] { + self.field_ids + } + + // Proto related methods + pub fn get_proto_id(&self, idx: u32) -> &'a ProtoId { + check_lt!(idx, self.header.proto_ids_size, ProtoId); + &self.proto_ids[idx as usize] + } + + pub fn num_proto_ids(&self) -> u32 { + self.header.proto_ids_size + } + + pub fn get_proto_ids(&self) -> &'a [ProtoId] { + self.proto_ids + } + + // method ids related methods + #[inline(always)] + pub fn get_method_id(&self, idx: u32) -> Result<&'a MethodId> { + check_lt_result!(idx, self.header.method_ids_size, MethodId); + Ok(&self.method_ids[idx as usize]) + } + + #[inline(always)] + pub fn num_method_ids(&self) -> u32 { + self.header.method_ids_size + } + + #[inline(always)] + pub fn get_method_ids(&self) -> &'a [MethodId] { + self.method_ids + } + + // classdef related methods + #[inline(always)] + pub fn get_class_def(&self, idx: u32) -> &'a ClassDef { + check_lt!(idx, self.header.class_defs_size, ClassDef); + &self.class_defs[idx as usize] + } + + #[inline(always)] + pub fn num_class_defs(&self) -> u32 { + self.header.class_defs_size + } + + #[inline(always)] + pub fn get_class_defs(&self) -> &'a [ClassDef] { + self.class_defs + } + + #[inline] + pub fn get_class_desc(&self, class_def: &ClassDef) -> Result<(u32, &'a [u8])> { + self.get_type_desc_at(class_def.class_idx) + } + + #[inline] + pub fn get_class_desc_utf16_lossy(&self, class_def: &ClassDef) -> Result { + self.get_type_desc_utf16_lossy_at(class_def.class_idx) + } + + #[inline] + pub fn get_class_desc_utf16(&self, class_def: &ClassDef) -> Result { + self.get_type_desc_utf16_at(class_def.class_idx) + } + + #[inline] + pub fn get_interfaces_list(&self, class_def: &ClassDef) -> Result>> { + self.get_type_list(class_def.interfaces_off) + } + + // type list related methods + #[inline(always)] + pub fn get_type_list(&self, offset: u32) -> Result>> { + if offset == 0 { + return Ok(None); + } + + check_lt_result!(offset, self.file_size(), TypeList); + let length = u32::from_bytes(&self.mmap[offset as usize..]).unwrap(); + let data_off = offset + std::mem::size_of::() as u32; + + self.array_data_ptr(data_off, *length as usize) + } + + // private methods + #[inline] + fn data_ptr(&self, offset: u32) -> Result> { + match offset { + 0 => Ok(None), + _ => Ok(Some(self.non_null_data_ptr(offset)?)), + } + } + + #[inline] + fn non_null_data_ptr(&self, offset: u32) -> Result<&'a T> { + if offset == 0 { + panic!( + "Attempted to read a null pointer for data type {:?}.", + std::any::type_name::() + ); + } + match T::from_bytes(&self.mmap[offset as usize..]) { + Ok(v) => Ok(&v), + Err(plain::Error::TooShort) => { + dex_err!(DexLayoutError, self, offset, std::any::type_name::(), 0) + } + Err(err) => panic!( + "Error decoding data type {:?}: {:?}", + std::any::type_name::(), + err + ), + } + } + + #[inline] + fn array_data_ptr(&self, offset: u32, len: usize) -> Result> { + match offset { + 0 => Ok(None), + _ => Ok(Some(self.non_null_array_data_ptr(offset, len)?)), + } + } + + #[inline] + fn non_null_array_data_ptr(&self, offset: u32, len: usize) -> Result<&'a [T]> { + if offset == 0 { + panic!( + "Attempted to read a null pointer for data type {:?}.", + std::any::type_name::() + ); + } + match T::slice_from_bytes_len(&self.mmap[offset as usize..], len) { + Ok(v) => Ok(&v), + Err(plain::Error::TooShort) => dex_err!( + DexLayoutError, + self, + offset, + std::any::type_name::(), + len + ), + Err(plain::Error::BadAlignment) => todo!(), + } + } + + fn init(&self) -> Result<()> { + let container_size = self.file_size(); + if container_size < std::mem::size_of::
() { + return dex_err!( + DexFileError, + "Unable to open {:?}: File size is too small to fit dex header", + self.location + ); + } + + self.check_magic_and_version()?; + + let expected_header_size = self.expected_header_size(); + if expected_header_size < self.header.header_size { + return dex_err!( + DexFileError, + "Unable to open {:?}: Header size is {} but {} was expected", + self.location, + expected_header_size, + self.header.header_size + ); + } + + if container_size < self.header.file_size as usize { + return dex_err!( + DexFileError, + "Unable to open {:?}: File size is {} but the header expects {}", + self.location, + container_size, + self.header.file_size + ); + } + Ok(()) + } + + fn check_magic_and_version(&self) -> Result<()> { + if !self.is_magic_valid() { + return dex_err!( + DexFileError, + "Unrecognized magic number in {:?}: {:?}", + self.location, + &self.header.get_magic()[..4] + ); + } + + if !self.is_version_valid() { + return dex_err!( + DexFileError, + "Unrecognized dex version in {:?}: {:?}", + self.location, + &self.header.get_magic()[4..] + ); + } + Ok(()) + } + + fn init_sections_from_maplist(&mut self) { + if self.header.map_off == 0x00 + || self.header.map_off as usize > self.file_size() - std::mem::size_of::() + { + // bad offset + return; + } + + let map_list_size_off = self.header.map_off; + let map_list_off = self.header.map_off + std::mem::size_of::() as u32; + let count: &u32 = match self.non_null_data_ptr(map_list_size_off) { + Ok(v) => v, + Err(_) => { + // bad file will be reported through verifier + return; + } + }; + let map_limit = + (self.file_size() - std::mem::size_of::() - map_list_size_off as usize) + / std::mem::size_of::(); + + if *count as usize > map_limit { + // bad file + return; + } + + // we should unwrap this here + let items = match self.non_null_array_data_ptr::(map_list_off, *count as usize) { + Ok(v) => v, + Err(_) => { + // bad file will be reported through verifier + return; + } + }; + for map_item in items { + match map_item.type_ { + MapItemType::MethodHandleItem => { + self.method_handles = + DexFile::get_section(&self.mmap, map_item.off, map_item.size) + } + MapItemType::CallSiteIdItem => { + self.call_site_ids = + DexFile::get_section(&self.mmap, map_item.off, map_item.size) + } + MapItemType::HiddenapiClassData => { + let item_off = map_item.off as usize; + self.hiddenapi_data = Some( + HiddenapiClassData::from_bytes( + &self.mmap[item_off..item_off + map_item.size as usize], + ) + .unwrap(), + ); + } + _ => {} + } + } + } +} diff --git a/src/file/modifiers.rs b/src/file/modifiers.rs new file mode 100644 index 0000000..8a7ee95 --- /dev/null +++ b/src/file/modifiers.rs @@ -0,0 +1,35 @@ +#[repr(u32)] +#[derive(Default)] +pub enum InvokeType { + Static = 0x00, + Direct, + Virtual, + Super, + Interface, + Polymorphic, + #[default] + Custom, +} + +pub const ACC_PUBLIC: u32 = 0x0001; // class, field, method, ic +pub const ACC_PRIVATE: u32 = 0x0002; // field, method, ic +pub const ACC_PROTECTED: u32 = 0x0004; // field, method, ic +pub const ACC_STATIC: u32 = 0x0008; // field, method, ic +pub const ACC_FINAL: u32 = 0x0010; // class, field, method, ic +pub const ACC_SYNCHRONIZED: u32 = 0x0020; // method (only allowed on natives) +pub const ACC_SUPER: u32 = 0x0020; // class (not used in dex) +pub const ACC_VOLATILE: u32 = 0x0040; // field +pub const ACC_BRIDGE: u32 = 0x0040; // method (1.5) +pub const ACC_TRANSIENT: u32 = 0x0080; // field +pub const ACC_VARARGS: u32 = 0x0080; // method (1.5) +pub const ACC_NATIVE: u32 = 0x0100; // method +pub const ACC_INTERFACE: u32 = 0x0200; // class, ic +pub const ACC_ABSTRACT: u32 = 0x0400; // class, method, ic +pub const ACC_STRICT: u32 = 0x0800; // method +pub const ACC_SYNTHETIC: u32 = 0x1000; // class, field, method, ic +pub const ACC_ANNOTATION: u32 = 0x2000; // class, ic (1.5) +pub const ACC_ENUM: u32 = 0x4000; // class, field, ic (1.5) + +pub const ACC_CONSTRUCTOR: u32 = 0x00010000; // method (dex only) <(cl)init> +pub const ACC_DECLARED_SYNCHRONIZED: u32 = 0x00020000; // method (dex only) +pub const ACC_CLASSISPROXY: u32 = 0x00040000; // class (dex only) \ No newline at end of file diff --git a/src/file/verifier.rs b/src/file/verifier.rs new file mode 100644 index 0000000..65637d2 --- /dev/null +++ b/src/file/verifier.rs @@ -0,0 +1,178 @@ +use adler32; + +use crate::{dex_err, error::DexError, Result}; + +use super::{ + DexFile, Header, HeaderV41, DEX_ENDIAN_CONSTANT, DEX_MAGIC, DEX_MAGIC_VERSIONS, +}; + +impl<'a> DexFile<'a> { + pub fn is_magic_valid(&self) -> bool { + &self.header.get_magic()[..4] == DEX_MAGIC + } + + pub fn is_version_valid(&self) -> bool { + let version_raw = &self.header.get_magic()[4..]; + DEX_MAGIC_VERSIONS.contains(&version_raw) + } + + pub fn verify(dex: &DexFile<'_>, verify_checksum: bool) -> Result<()> { + check_header(dex, verify_checksum)?; + // REVISIT: maybe validate map list items + Ok(()) + } + + pub fn calculate_checksum(&self) -> u32 { + let size = self.file_size(); + let data = &self.mmap[12..size]; + adler32::adler32(data).unwrap() + } +} + +fn check_header(dex: &DexFile<'_>, verify_checksum: bool) -> Result<()> { + let size = dex.file_size(); + if size < std::mem::size_of::
() { + return dex_err!(TruncatedFile); + } + + if !dex.is_magic_valid() { + return dex_err!(BadFileMagic); + } + + if !dex.is_version_valid() { + return dex_err!(UnknownDexVersion { version: dex.header.get_version() }); + } + + // check file size from header + let version = dex.header.get_version(); + let file_size = dex.header.file_size as usize; + let header_size = if version >= 41 { + std::mem::size_of::() + } else { + std::mem::size_of::
() + }; + + if file_size < header_size { + return dex_err!(FileSizeAtLeast { + actual: file_size, + expected: header_size + }); + } + if file_size > size { + return dex_err!(FileSizeAtMost { + actual: file_size, + expected: size + }); + } + + // check header size + if dex.header.header_size as usize != header_size { + return dex_err!(BadHeaderSize { + size: dex.header.header_size, + expected: header_size as u32 + }); + } + + // check endian + if dex.header.endian_tag != DEX_ENDIAN_CONSTANT { + return dex_err!(UnexpectedEndianess, dex.header.endian_tag); + } + + if verify_checksum { + let checksum = dex.calculate_checksum(); + if checksum != dex.header.checksum { + return dex_err!(BadChecksum { + actual: checksum, + expected: dex.header.checksum + }); + } + } + + let header = dex.header; + check_valid_offset_and_size(dex, header.link_off, header.link_size, "link")?; + check_valid_offset_and_size( + dex, + header.map_off, + std::mem::size_of::() as u32, + "map", + )?; + check_valid_offset_and_size( + dex, + header.string_ids_off, + header.string_ids_size, + "string-ids", + )?; + check_valid_offset_and_size(dex, header.type_ids_off, header.type_ids_size, "type-ids")?; + check_valid_offset_and_size( + dex, + header.proto_ids_off, + header.proto_ids_size, + "proto-ids", + )?; + check_valid_offset_and_size( + dex, + header.field_ids_off, + header.field_ids_size, + "field-ids", + )?; + check_valid_offset_and_size( + dex, + header.method_ids_off, + header.method_ids_size, + "method-ids", + )?; + check_valid_offset_and_size( + dex, + header.class_defs_off, + header.class_defs_size, + "class-defs", + )?; + check_valid_offset_and_size(dex, header.data_off, header.data_size, "data")?; + Ok(()) +} + +fn check_valid_offset_and_size( + dex: &DexFile<'_>, + offset: u32, + size: u32, + label: &'static str, +) -> Result<()> { + if size == 0 { + if offset != 0 { + return dex_err!(BadOffsetNoSize { + offset, + section: label + }); + } + + return Ok(()); + } + + let file_size = dex.file_size(); + let header_offset = std::mem::size_of::
() as u32; + if offset < header_offset { + return dex_err!(BadOffsetInHeader { + offset, + header_size: header_offset as usize, + section: label + }); + } + if offset as usize > file_size { + return dex_err!(BadOffsetTooLarge { + offset, + size: dex.file_size(), + section: label + }); + } + + if (file_size - offset as usize) < size as usize { + return dex_err!(BadSection { + offset: offset + size, + size: file_size as usize, + section: label + }); + } + + // TODO alignment checks + Ok(()) +} From a042f0771c66e2b7aeab73945631bd19e2e6739f Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 5 Feb 2025 22:26:54 +0100 Subject: [PATCH 09/46] CodeItem accessor and instructions --- + 2nd approach to instructions ported from ART --- src/file/code_item_accessors.rs | 69 +++++ src/file/instruction.rs | 454 ++++++++++++++++++++++++++++++++ src/file/mod.rs | 13 + 3 files changed, 536 insertions(+) create mode 100644 src/file/code_item_accessors.rs create mode 100644 src/file/instruction.rs diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs new file mode 100644 index 0000000..9691a99 --- /dev/null +++ b/src/file/code_item_accessors.rs @@ -0,0 +1,69 @@ +use crate::Result; + +use super::{CodeItem, DexFile, Instruction}; + +pub struct CodeItemAccessor<'a> { + code_item: &'a CodeItem, + insns: &'a [u16], +} + +impl<'a> CodeItemAccessor<'a> { + #[inline] + pub fn insns_size_in_code_units(&self) -> u32 { + self.insns.len() as u32 + } + + #[inline] + pub fn insns_size_in_bytes(&self) -> u32 { + self.insns.len() as u32 * 2 + } + + #[inline(always)] + pub fn insns(&self) -> &'a [u16] { + self.insns + } + + #[inline] + pub fn has_code(&self) -> bool { + !self.insns.is_empty() + } + + #[inline(always)] + pub fn from_code_item( + dex: &'a DexFile<'a>, + code_item: &'a CodeItem, + code_off: u32, + ) -> Result> { + let insns = match code_off { + 0 => &[], + _ => dex.get_insns_raw(code_off, code_item.insns_size)?, + }; + Ok(CodeItemAccessor { code_item, insns }) + } + + pub fn code_item(&self) -> &'a CodeItem { + self.code_item + } + + pub fn registers_size(&self) -> u16 { + self.code_item.registers_size + } + + pub fn ins_size(&self) -> u16 { + self.code_item.ins_size + } + + pub fn outs_size(&self) -> u16 { + self.code_item.outs_size + } + + pub fn tries_size(&self) -> u16 { + self.code_item.tries_size + } + + pub fn insn_at(&self, pc: u32) -> Instruction<'a> { + debug_assert!(pc < self.insns_size_in_code_units()); + Instruction::at(&self.insns[pc as usize..]) + + } +} diff --git a/src/file/instruction.rs b/src/file/instruction.rs new file mode 100644 index 0000000..e0b1bbb --- /dev/null +++ b/src/file/instruction.rs @@ -0,0 +1,454 @@ +pub struct Instruction<'a>(&'a [u16]); + +impl<'a> Instruction<'a> { + #[inline(always)] + pub fn at(code: &[u16]) -> Instruction<'_> { + Instruction(code) + } + + #[inline(always)] + pub fn relative_at(&self, offset: u32) -> Instruction<'_> { + Instruction::at(&self.0[offset as usize..]) + } + + #[inline(always)] + pub fn fetch16(&self, offset: usize) -> u16 { + self.0[offset] + } + + #[inline(always)] + pub fn fetch32(&self, offset: usize) -> u32 { + self.fetch16(offset) as u32 | ((self.fetch16(offset + 1) as u32) << 16) + } + + const fn format_desc_of(opcode: Code) -> &'static InstructionDescriptor { + &Instruction::INSN_DESCRIPTORS[opcode as usize] + } + + pub const fn format_of(opcode: Code) -> &'static Format { + &Instruction::format_desc_of(opcode).format + } + + pub const fn index_type_of(opcode: Code) -> &'static IndexType { + &Instruction::format_desc_of(opcode).index_type + } + + pub const fn flags_of(opcode: Code) -> u8 { + Instruction::format_desc_of(opcode).flags + } + + pub const fn verify_flags_of(opcode: Code) -> u32 { + Instruction::format_desc_of(opcode).verify_flags + } + + #[inline] + const fn code_size_in_code_units_by_opcode(opcode: Code, format: Format) -> u8 { + let format_idx = format as u8; + if opcode as u8 == Code::NOP as u8 { + 0xFF // will point to complex type + } else if format_idx >= Format::k10x as u8 && format_idx <= Format::k10t as u8 { + 1 + } else if format_idx >= Format::k20t as u8 && format_idx <= Format::k22c as u8 { + 2 + } else if format_idx >= Format::k30t as u8 && format_idx <= Format::k3rc as u8 { + 3 + } else if format_idx >= Format::k45cc as u8 && format_idx <= Format::k4rcc as u8 { + 4 + } else if format_idx == Format::k51l as u8 { + 5 + } else { + 0xFF + } + } +} + +#[allow(non_camel_case_types)] +#[repr(u8)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Format { + k10x, // op + k12x, // op vA, vB + k11n, // op vA, #+B + k11x, // op vAA + k10t, // op +AA + k20t, // op +AAAA + k22x, // op vAA, vBBBB + k21t, // op vAA, +BBBB + k21s, // op vAA, #+BBBB + k21h, // op vAA, #+BBBB00000[00000000] + k21c, // op vAA, thing@BBBB + k23x, // op vAA, vBB, vCC + k22b, // op vAA, vBB, #+CC + k22t, // op vA, vB, +CCCC + k22s, // op vA, vB, #+CCCC + k22c, // op vA, vB, thing@CCCC + k32x, // op vAAAA, vBBBB + k30t, // op +AAAAAAAA + k31t, // op vAA, +BBBBBBBB + k31i, // op vAA, #+BBBBBBBB + k31c, // op vAA, thing@BBBBBBBB + k35c, // op {vC, vD, vE, vF, vG}, thing@BBBB (B: count, A: vG) + k3rc, // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB + + // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH (A: count) + // format: AG op BBBB FEDC HHHH + k45cc, + + // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count) + // format: AA op BBBB CCCC HHHH + k4rcc, // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count) + + k51l, // op vAA, #+BBBBBBBBBBBBBBBB + kInvalidFormat, +} + +pub enum IndexType { + Unknown = 0, + None, // has no index + TypeRef, // type reference index + StringRef, // string reference index + MethodRef, // method reference index + FieldRef, // field reference index + MethodAndProtoRef, // method and a proto reference index (for invoke-polymorphic) + CallSiteRef, // call site reference index + MethodHandleRef, // constant method handle reference index + ProtoRef, // prototype reference index +} + +#[rustfmt::skip] +#[allow(non_upper_case_globals)] +pub mod flags { + pub const Branch: u8 = 0x01; // conditional or unconditional branch + pub const Continue: u8 = 0x02; // flow can continue to next statement + pub const Switch: u8 = 0x04; // switch statement + pub const Throw: u8 = 0x08; // could cause an exception to be thrown + pub const Return: u8 = 0x10; // returns, no additional statements + pub const Invoke: u8 = 0x20; // a flavor of invoke + pub const Unconditional: u8 = 0x40; // unconditional branch + pub const Experimental: u8 = 0x80; // is an experimental opcode +} + +#[rustfmt::skip] +#[allow(non_upper_case_globals)] +pub mod verify_flags { + pub const VerifyNothing: u32 = 0x0000000; + pub const VerifyRegA: u32 = 0x0000001; + pub const VerifyRegAWide: u32 = 0x0000002; + pub const VerifyRegB: u32 = 0x0000004; + pub const VerifyRegBField: u32 = 0x0000008; + pub const VerifyRegBMethod: u32 = 0x0000010; + pub const VerifyRegBNewInstance: u32 = 0x0000020; + pub const VerifyRegBString: u32 = 0x0000040; + pub const VerifyRegBType: u32 = 0x0000080; + pub const VerifyRegBWide: u32 = 0x0000100; + pub const VerifyRegC: u32 = 0x0000200; + pub const VerifyRegCField: u32 = 0x0000400; + pub const VerifyRegCNewArray: u32 = 0x0000800; + pub const VerifyRegCType: u32 = 0x0001000; + pub const VerifyRegCWide: u32 = 0x0002000; + pub const VerifyArrayData: u32 = 0x0004000; + pub const VerifyBranchTarget: u32 = 0x0008000; + pub const VerifySwitchTargets: u32 = 0x0010000; + pub const VerifyVarArg: u32 = 0x0020000; + pub const VerifyVarArgNonZero: u32 = 0x0040000; + pub const VerifyVarArgRange: u32 = 0x0080000; + pub const VerifyVarArgRangeNonZero: u32 = 0x0100000; + pub const VerifyError: u32 = 0x0200000; + pub const VerifyRegHPrototype: u32 = 0x0400000; + pub const VerifyRegBCallSite: u32 = 0x0800000; + pub const VerifyRegBMethodHandle: u32 = 0x1000000; + pub const VerifyRegBPrototype: u32 = 0x2000000; +} + +pub struct InstructionDescriptor { + pub name: &'static str, + pub format: Format, + pub index_type: IndexType, + pub flags: u8, + pub size_in_code_units: u8, + pub opcode: Code, + pub verify_flags: u32, +} + +macro_rules! insn_desc_table { + ($({$code:ident, $name:literal, $format:ident, $idx_ty:ident, $flags:expr, $verify_flags:expr},)*) => { + impl Instruction<'_> { + const INSN_DESCRIPTORS: &'static [InstructionDescriptor] = &[ + $(InstructionDescriptor { + name: $name, + format: Format::$format, + index_type: IndexType::$idx_ty, + flags: $flags, + size_in_code_units: Instruction::code_size_in_code_units_by_opcode(Code::$code, Format::$format), + opcode: Code::$code, + verify_flags: $verify_flags + },)* + ]; + } + + #[repr(u8)] + #[allow(non_camel_case_types)] + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] + pub enum Code { + $($code,)* + } + }; +} +insn_desc_table!( + /* 0x00 */ {NOP, "nop", k10x, None, flags::Continue, verify_flags::VerifyNothing}, + /* 0x01 */ {MOVE, "move", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x02 */ {MOVE_FROM16, "move/from16", k22x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x03 */ {MOVE_16, "move/16", k32x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x04 */ {MOVE_WIDE, "move-wide", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x05 */ {MOVE_WIDE_FROM16, "move-wide/from16", k22x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x06 */ {MOVE_WIDE_16, "move-wide/16", k32x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x07 */ {MOVE_OBJECT, "move-object", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x08 */ {MOVE_OBJECT_FROM16, "move-object/from16", k22x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x09 */ {MOVE_OBJECT_16, "move-object/16", k32x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x0a */ {MOVE_RESULT, "move-result", k11x, None, flags::Continue, verify_flags::VerifyRegA}, + /* 0x0b */ {MOVE_RESULT_WIDE, "move-result-wide", k11x, None, flags::Continue, verify_flags::VerifyRegAWide}, + /* 0x0c */ {MOVE_RESULT_OBJECT, "move-result-object", k11x, None, flags::Continue, verify_flags::VerifyRegA}, + /* 0x0d */ {MOVE_EXCEPTION, "move-exception", k11x, None, flags::Continue, verify_flags::VerifyRegA}, + /* 0x0e */ {RETURN_VOID, "return-void", k10x, None, flags::Return, verify_flags::VerifyNothing}, + /* 0x0f */ {RETURN, "return", k11x, None, flags::Return, verify_flags::VerifyRegA}, + /* 0x10 */ {RETURN_WIDE, "return-wide", k11x, None, flags::Return, verify_flags::VerifyRegAWide}, + /* 0x11 */ {RETURN_OBJECT, "return-object", k11x, None, flags::Return, verify_flags::VerifyRegA}, + /* 0x12 */ {CONST_4, "const/4", k11n, None, flags::Continue, verify_flags::VerifyRegA}, + /* 0x13 */ {CONST_16, "const/16", k21s, None, flags::Continue, verify_flags::VerifyRegA}, + /* 0x14 */ {CONST, "const", k31i, None, flags::Continue, verify_flags::VerifyRegA}, + /* 0x15 */ {CONST_HIGH16, "const/high16", k21h, None, flags::Continue, verify_flags::VerifyRegA}, + /* 0x16 */ {CONST_WIDE_16, "const-wide/16", k21s, None, flags::Continue, verify_flags::VerifyRegAWide}, + /* 0x17 */ {CONST_WIDE_32, "const-wide/32", k31i, None, flags::Continue, verify_flags::VerifyRegAWide}, + /* 0x18 */ {CONST_WIDE, "const-wide", k51l, None, flags::Continue, verify_flags::VerifyRegAWide}, + /* 0x19 */ {CONST_WIDE_HIGH16, "const-wide/high16", k21h, None, flags::Continue, verify_flags::VerifyRegAWide}, + /* 0x1a */ {CONST_STRING, "const-string", k21c, StringRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBString}, + /* 0x1b */ {CONST_STRING_JUMBO, "const-string/jumbo", k31c, StringRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBString}, + /* 0x1c */ {CONST_CLASS, "const-class", k21c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBType}, + /* 0x1d */ {MONITOR_ENTER, "monitor-enter", k11x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA}, + /* 0x1e */ {MONITOR_EXIT, "monitor-exit", k11x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA}, + /* 0x1f */ {CHECK_CAST, "check-cast", k21c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBType}, + /* 0x20 */ {INSTANCE_OF, "instance-of", k22c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCType}, + /* 0x21 */ {ARRAY_LENGTH, "array-length", k12x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x22 */ {NEW_INSTANCE, "new-instance", k21c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBNewInstance}, + /* 0x23 */ {NEW_ARRAY, "new-array", k22c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCNewArray}, + /* 0x24 */ {FILLED_NEW_ARRAY, "filled-new-array", k35c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegBType | verify_flags::VerifyVarArg}, + /* 0x25 */ {FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegBType | verify_flags::VerifyVarArgRange}, + /* 0x26 */ {FILL_ARRAY_DATA, "fill-array-data", k31t, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyArrayData}, + /* 0x27 */ {THROW, "throw", k11x, None, flags::Throw, verify_flags::VerifyRegA}, + /* 0x28 */ {GOTO, "goto", k10t, None, flags::Branch | flags::Unconditional, verify_flags::VerifyBranchTarget}, + /* 0x29 */ {GOTO_16, "goto/16", k20t, None, flags::Branch | flags::Unconditional, verify_flags::VerifyBranchTarget}, + /* 0x2a */ {GOTO_32, "goto/32", k30t, None, flags::Branch | flags::Unconditional, verify_flags::VerifyBranchTarget}, + /* 0x2b */ {PACKED_SWITCH, "packed-switch", k31t, None, flags::Continue | flags::Switch, verify_flags::VerifyRegA | verify_flags::VerifySwitchTargets}, + /* 0x2c */ {SPARSE_SWITCH, "sparse-switch", k31t, None, flags::Continue | flags::Switch, verify_flags::VerifyRegA | verify_flags::VerifySwitchTargets}, + /* 0x2d */ {CMPL_FLOAT, "cmpl-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x2e */ {CMPG_FLOAT, "cmpg-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x2f */ {CMPL_DOUBLE, "cmpl-double", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x30 */ {CMPG_DOUBLE, "cmpg-double", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x31 */ {CMP_LONG, "cmp-long", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x32 */ {IF_EQ, "if-eq", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x33 */ {IF_NE, "if-ne", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x34 */ {IF_LT, "if-lt", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x35 */ {IF_GE, "if-ge", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x36 */ {IF_GT, "if-gt", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x37 */ {IF_LE, "if-le", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x38 */ {IF_EQZ, "if-eqz", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x39 */ {IF_NEZ, "if-nez", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x3a */ {IF_LTZ, "if-ltz", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x3b */ {IF_GEZ, "if-gez", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x3c */ {IF_GTZ, "if-gtz", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x3d */ {IF_LEZ, "if-lez", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x3e */ {UNUSED_3E, "unused-3e", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0x3f */ {UNUSED_3F, "unused-3f", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0x40 */ {UNUSED_40, "unused-40", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0x41 */ {UNUSED_41, "unused-41", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0x42 */ {UNUSED_42, "unused-42", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0x43 */ {UNUSED_43, "unused-43", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0x44 */ {AGET, "aget", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x45 */ {AGET_WIDE, "aget-wide", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x46 */ {AGET_OBJECT, "aget-object", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x47 */ {AGET_BOOLEAN, "aget-boolean", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x48 */ {AGET_BYTE, "aget-byte", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x49 */ {AGET_CHAR, "aget-char", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4a */ {AGET_SHORT, "aget-short", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4b */ {APUT, "aput", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4c */ {APUT_WIDE, "aput-wide", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4d */ {APUT_OBJECT, "aput-object", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4e */ {APUT_BOOLEAN, "aput-boolean", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4f */ {APUT_BYTE, "aput-byte", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x50 */ {APUT_CHAR, "aput-char", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x51 */ {APUT_SHORT, "aput-short", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x52 */ {IGET, "iget", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x53 */ {IGET_WIDE, "iget-wide", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x54 */ {IGET_OBJECT, "iget-object", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x55 */ {IGET_BOOLEAN, "iget-boolean", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x56 */ {IGET_BYTE, "iget-byte", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x57 */ {IGET_CHAR, "iget-char", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x58 */ {IGET_SHORT, "iget-short", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x59 */ {IPUT, "iput", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x5a */ {IPUT_WIDE, "iput-wide", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x5b */ {IPUT_OBJECT, "iput-object", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x5c */ {IPUT_BOOLEAN, "iput-boolean", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x5d */ {IPUT_BYTE, "iput-byte", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x5e */ {IPUT_CHAR, "iput-char", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x5f */ {IPUT_SHORT, "iput-short", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, + /* 0x60 */ {SGET, "sget", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x61 */ {SGET_WIDE, "sget-wide", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBField}, + /* 0x62 */ {SGET_OBJECT, "sget-object", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x63 */ {SGET_BOOLEAN, "sget-boolean", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x64 */ {SGET_BYTE, "sget-byte", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x65 */ {SGET_CHAR, "sget-char", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x66 */ {SGET_SHORT, "sget-short", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x67 */ {SPUT, "sput", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x68 */ {SPUT_WIDE, "sput-wide", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBField}, + /* 0x69 */ {SPUT_OBJECT, "sput-object", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x6a */ {SPUT_BOOLEAN, "sput-boolean", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x6b */ {SPUT_BYTE, "sput-byte", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x6c */ {SPUT_CHAR, "sput-char", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x6d */ {SPUT_SHORT, "sput-short", k21c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBField}, + /* 0x6e */ {INVOKE_VIRTUAL, "invoke-virtual", k35c, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgNonZero}, + /* 0x6f */ {INVOKE_SUPER, "invoke-super", k35c, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgNonZero}, + /* 0x70 */ {INVOKE_DIRECT, "invoke-direct", k35c, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgNonZero}, + /* 0x71 */ {INVOKE_STATIC, "invoke-static", k35c, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArg}, + /* 0x72 */ {INVOKE_INTERFACE, "invoke-interface", k35c, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgNonZero}, + /* 0x73 */ {UNUSED_73, "unused-73", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0x74 */ {INVOKE_VIRTUAL_RANGE, "invoke-virtual/range", k3rc, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgRangeNonZero}, + /* 0x75 */ {INVOKE_SUPER_RANGE, "invoke-super/range", k3rc, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgRangeNonZero}, + /* 0x76 */ {INVOKE_DIRECT_RANGE, "invoke-direct/range", k3rc, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgRangeNonZero}, + /* 0x77 */ {INVOKE_STATIC_RANGE, "invoke-static/range", k3rc, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgRange}, + /* 0x78 */ {INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgRangeNonZero}, + /* 0x79 */ {UNUSED_79, "unused-79", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0x7a */ {UNUSED_7A, "unused-7a", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0x7b */ {NEG_INT, "neg-int", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x7c */ {NOT_INT, "not-int", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x7d */ {NEG_LONG, "neg-long", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x7e */ {NOT_LONG, "not-long", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x7f */ {NEG_FLOAT, "neg-float", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x80 */ {NEG_DOUBLE, "neg-double", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x81 */ {INT_TO_LONG, "int-to-long", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0x82 */ {INT_TO_FLOAT, "int-to-float", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x83 */ {INT_TO_DOUBLE, "int-to-double", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0x84 */ {LONG_TO_INT, "long-to-int", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, + /* 0x85 */ {LONG_TO_FLOAT, "long-to-float", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, + /* 0x86 */ {LONG_TO_DOUBLE, "long-to-double", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x87 */ {FLOAT_TO_INT, "float-to-int", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x88 */ {FLOAT_TO_LONG, "float-to-long", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0x89 */ {FLOAT_TO_DOUBLE, "float-to-double", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0x8a */ {DOUBLE_TO_INT, "double-to-int", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, + /* 0x8b */ {DOUBLE_TO_LONG, "double-to-long", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x8c */ {DOUBLE_TO_FLOAT, "double-to-float", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, + /* 0x8d */ {INT_TO_BYTE, "int-to-byte", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x8e */ {INT_TO_CHAR, "int-to-char", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x8f */ {INT_TO_SHORT, "int-to-short", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x90 */ {ADD_INT, "add-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x91 */ {SUB_INT, "sub-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x92 */ {MUL_INT, "mul-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x93 */ {DIV_INT, "div-int", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x94 */ {REM_INT, "rem-int", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x95 */ {AND_INT, "and-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x96 */ {OR_INT, "or-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x97 */ {XOR_INT, "xor-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x98 */ {SHL_INT, "shl-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x99 */ {SHR_INT, "shr-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x9a */ {USHR_INT, "ushr-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x9b */ {ADD_LONG, "add-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x9c */ {SUB_LONG, "sub-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x9d */ {MUL_LONG, "mul-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x9e */ {DIV_LONG, "div-long", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x9f */ {REM_LONG, "rem-long", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xa0 */ {AND_LONG, "and-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xa1 */ {OR_LONG, "or-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xa2 */ {XOR_LONG, "xor-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xa3 */ {SHL_LONG, "shl-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegC}, + /* 0xa4 */ {SHR_LONG, "shr-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegC}, + /* 0xa5 */ {USHR_LONG, "ushr-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegC}, + /* 0xa6 */ {ADD_FLOAT, "add-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0xa7 */ {SUB_FLOAT, "sub-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0xa8 */ {MUL_FLOAT, "mul-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0xa9 */ {DIV_FLOAT, "div-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0xaa */ {REM_FLOAT, "rem-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0xab */ {ADD_DOUBLE, "add-double", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xac */ {SUB_DOUBLE, "sub-double", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xad */ {MUL_DOUBLE, "mul-double", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xae */ {DIV_DOUBLE, "div-double", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xaf */ {REM_DOUBLE, "rem-double", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xb0 */ {ADD_INT_2ADDR, "add-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb1 */ {SUB_INT_2ADDR, "sub-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb2 */ {MUL_INT_2ADDR, "mul-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb3 */ {DIV_INT_2ADDR, "div-int/2addr", k12x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb4 */ {REM_INT_2ADDR, "rem-int/2addr", k12x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb5 */ {AND_INT_2ADDR, "and-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb6 */ {OR_INT_2ADDR, "or-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb7 */ {XOR_INT_2ADDR, "xor-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb8 */ {SHL_INT_2ADDR, "shl-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb9 */ {SHR_INT_2ADDR, "shr-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xba */ {USHR_INT_2ADDR, "ushr-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xbb */ {ADD_LONG_2ADDR, "add-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xbc */ {SUB_LONG_2ADDR, "sub-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xbd */ {MUL_LONG_2ADDR, "mul-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xbe */ {DIV_LONG_2ADDR, "div-long/2addr", k12x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xbf */ {REM_LONG_2ADDR, "rem-long/2addr", k12x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xc0 */ {AND_LONG_2ADDR, "and-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xc1 */ {OR_LONG_2ADDR, "or-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xc2 */ {XOR_LONG_2ADDR, "xor-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xc3 */ {SHL_LONG_2ADDR, "shl-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0xc4 */ {SHR_LONG_2ADDR, "shr-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0xc5 */ {USHR_LONG_2ADDR, "ushr-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0xc6 */ {ADD_FLOAT_2ADDR, "add-float/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xc7 */ {SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xc8 */ {MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xc9 */ {DIV_FLOAT_2ADDR, "div-float/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xca */ {REM_FLOAT_2ADDR, "rem-float/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xcb */ {ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xcc */ {SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xcd */ {MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xce */ {DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xcf */ {REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xd0 */ {ADD_INT_LIT16, "add-int/lit16", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd1 */ {RSUB_INT, "rsub-int", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd2 */ {MUL_INT_LIT16, "mul-int/lit16", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd3 */ {DIV_INT_LIT16, "div-int/lit16", k22s, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd4 */ {REM_INT_LIT16, "rem-int/lit16", k22s, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd5 */ {AND_INT_LIT16, "and-int/lit16", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd6 */ {OR_INT_LIT16, "or-int/lit16", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd7 */ {XOR_INT_LIT16, "xor-int/lit16", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd8 */ {ADD_INT_LIT8, "add-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd9 */ {RSUB_INT_LIT8, "rsub-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xda */ {MUL_INT_LIT8, "mul-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xdb */ {DIV_INT_LIT8, "div-int/lit8", k22b, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xdc */ {REM_INT_LIT8, "rem-int/lit8", k22b, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xdd */ {AND_INT_LIT8, "and-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xde */ {OR_INT_LIT8, "or-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xdf */ {XOR_INT_LIT8, "xor-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xe0 */ {SHL_INT_LIT8, "shl-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xe1 */ {SHR_INT_LIT8, "shr-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xe2 */ {USHR_INT_LIT8, "ushr-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xe3 */ {UNUSED_E3, "unused-e3", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xe4 */ {UNUSED_E4, "unused-e4", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xe5 */ {UNUSED_E5, "unused-e5", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xe6 */ {UNUSED_E6, "unused-e6", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xe7 */ {UNUSED_E7, "unused-e7", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xe8 */ {UNUSED_E8, "unused-e8", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xe9 */ {UNUSED_E9, "unused-e9", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xea */ {UNUSED_EA, "unused-ea", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xeb */ {UNUSED_EB, "unused-eb", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xec */ {UNUSED_EC, "unused-ec", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xed */ {UNUSED_ED, "unused-ed", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xee */ {UNUSED_EE, "unused-ee", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xef */ {UNUSED_EF, "unused-ef", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xf0 */ {UNUSED_F0, "unused-f0", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xf1 */ {UNUSED_F1, "unused-f1", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xf2 */ {UNUSED_F2, "unused-f2", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xf3 */ {UNUSED_F3, "unused-f3", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xf4 */ {UNUSED_F4, "unused-f4", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xf5 */ {UNUSED_F5, "unused-f5", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xf6 */ {UNUSED_F6, "unused-f6", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xf7 */ {UNUSED_F7, "unused-f7", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xf8 */ {UNUSED_F8, "unused-f8", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xf9 */ {UNUSED_F9, "unused-f9", k10x, Unknown, 0, verify_flags::VerifyError}, + /* 0xfa */ {INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, MethodAndProtoRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgNonZero | verify_flags::VerifyRegHPrototype}, + /* 0xfb */ {INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, MethodAndProtoRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgRangeNonZero | verify_flags::VerifyRegHPrototype}, + /* 0xfc */ {INVOKE_CUSTOM, "invoke-custom", k35c, CallSiteRef, flags::Continue | flags::Throw, verify_flags::VerifyRegBCallSite | verify_flags::VerifyVarArg}, + /* 0xfd */ {INVOKE_CUSTOM_RANGE, "invoke-custom/range", k3rc, CallSiteRef, flags::Continue | flags::Throw, verify_flags::VerifyRegBCallSite | verify_flags::VerifyVarArgRange}, + /* 0xfe */ {CONST_METHOD_HANDLE, "const-method-handle", k21c, MethodHandleRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBMethodHandle}, + /* 0xff */ {CONST_METHOD_TYPE, "const-method-type", k21c, ProtoRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBPrototype}, +); diff --git a/src/file/mod.rs b/src/file/mod.rs index 3dbebde..5cd148f 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -12,6 +12,8 @@ pub mod modifiers; pub use modifiers::*; pub mod instruction; pub use instruction::*; +pub mod code_item_accessors; +pub use code_item_accessors::*; use crate::{dex_err, error::DexError, leb128::decode_leb128, utf, Result}; @@ -298,6 +300,17 @@ impl<'a> DexFile<'a> { self.data_ptr(offset) } + #[inline(always)] + pub fn get_code_item_accessor(&self, offset: u32) -> Result> { + check_lt_result!(offset, self.file_size(), "code item offset"); + let code_item = self.non_null_data_ptr(offset)?; + CodeItemAccessor::from_code_item( + &self, + code_item, + offset + std::mem::size_of::() as u32, + ) + } + #[inline(always)] pub fn get_insns_raw(&self, code_off: u32, size_in_code_units: u32) -> Result<&'a [u16]> { check_lt_result!(code_off, self.file_size(), "code stream offset"); From 9fa71e8ff2825037cbc1580201c0c51fb625629c Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Thu, 6 Feb 2025 21:12:22 +0100 Subject: [PATCH 10/46] Register access for instructions --- + Added new error: OperandAccessError + A,B,C,H and VarArgs can be extracted using vreg module --- src/error.rs | 7 + src/file/instruction.rs | 357 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 360 insertions(+), 4 deletions(-) diff --git a/src/error.rs b/src/error.rs index 6ca6f00..4b274eb 100644 --- a/src/error.rs +++ b/src/error.rs @@ -77,6 +77,13 @@ pub enum DexError { file_size: usize, }, + #[error( + "Tries to access v{operand} of instruction {insn_name} which has no {operand} operand" + )] + OperandAccessError { + insn_name: &'static str, + operand: &'static str, + }, } #[macro_export] diff --git a/src/file/instruction.rs b/src/file/instruction.rs index e0b1bbb..2d2383f 100644 --- a/src/file/instruction.rs +++ b/src/file/instruction.rs @@ -7,8 +7,8 @@ impl<'a> Instruction<'a> { } #[inline(always)] - pub fn relative_at(&self, offset: u32) -> Instruction<'_> { - Instruction::at(&self.0[offset as usize..]) + pub fn relative_at(&self, offset: usize) -> Instruction<'a> { + Instruction::at(&self.0[offset..]) } #[inline(always)] @@ -41,11 +41,22 @@ impl<'a> Instruction<'a> { Instruction::format_desc_of(opcode).verify_flags } + pub const fn name_of(opcode: Code) -> &'static str { + Instruction::format_desc_of(opcode).name + } + + #[inline(always)] + pub const fn opcode_of(inst_data: u16) -> Code { + // this will always return a valid result as we are limiting the + // input to 0xFF + Instruction::INSN_DESCRIPTORS[(inst_data & 0xFF) as usize].opcode + } + #[inline] const fn code_size_in_code_units_by_opcode(opcode: Code, format: Format) -> u8 { let format_idx = format as u8; if opcode as u8 == Code::NOP as u8 { - 0xFF // will point to complex type + code_flags::Complex // will point to complex type } else if format_idx >= Format::k10x as u8 && format_idx <= Format::k10t as u8 { 1 } else if format_idx >= Format::k20t as u8 && format_idx <= Format::k22c as u8 { @@ -57,7 +68,7 @@ impl<'a> Instruction<'a> { } else if format_idx == Format::k51l as u8 { 5 } else { - 0xFF + code_flags::Custom } } } @@ -115,6 +126,20 @@ pub enum IndexType { ProtoRef, // prototype reference index } +#[rustfmt::skip] +#[allow(non_upper_case_globals)] +pub mod code_flags { + pub const Complex: u8 = 0xFF; + pub const Custom: u8 = 0xFE; +} + +#[rustfmt::skip] +#[allow(non_upper_case_globals)] +pub mod signatures { + pub const PackedSwitchSignature: u16 = 0x0100; + pub const SparseSwitchSignature: u16 = 0x0200; + pub const ArrayDataSignature: u16 = 0x0300; +} #[rustfmt::skip] #[allow(non_upper_case_globals)] pub mod flags { @@ -160,6 +185,330 @@ pub mod verify_flags { pub const VerifyRegBPrototype: u32 = 0x2000000; } +impl<'a> Instruction<'a> { + #[inline(always)] + const fn format_desc(&self) -> &'static InstructionDescriptor { + &Instruction::INSN_DESCRIPTORS[(self.0[0] & 0xFF) as usize] + } + + pub const fn opcode(&self) -> Code { + self.format_desc().opcode + } + + pub const fn format(&self) -> &'static Format { + &self.format_desc().format + } + + pub const fn name(&self) -> &'static str { + &self.format_desc().name + } + + pub fn next(&self) -> Instruction<'a> { + self.relative_at(self.size_in_code_units()) + } + + #[inline(always)] + pub fn size_in_code_units(&self) -> usize { + let size = Instruction::format_desc_of(self.opcode()).size_in_code_units; + match size { + code_flags::Complex => self.size_in_code_units_complex(), + code_flags::Custom => 1, /* TODO */ + _ => size as usize, + } + } + + pub fn size_in_code_units_complex(&self) -> usize { + let inst_data = self.fetch16(0); + debug_assert!(inst_data & 0xFF == 0); + match inst_data { + signatures::PackedSwitchSignature => 4 + self.fetch16(1) as usize * 2, + signatures::SparseSwitchSignature => 2 + self.fetch16(1) as usize * 4, + signatures::ArrayDataSignature => { + let element_size = self.fetch16(1) as usize; + let length = self.fetch32(2) as usize; + // The plus 1 is to round up for odd size and width. + 4 + (element_size * length + 1) / 2 + } + _ => 1, + } + } + + pub fn verify_flags(&self) -> u32 { + Instruction::verify_flags_of(self.opcode()) + } +} + +pub struct VarArgs { + pub count: u8, + pub start_reg: u8, +} +// access to registers of all formats +#[allow(non_snake_case)] +pub mod vreg { + + use super::*; + use crate::{dex_err, error::DexError, Result}; + + // AA|op ... + fn inst_aa(inst: &Instruction<'_>) -> u8 { + (inst.fetch16(0) >> 8) as u8 + } + + // B|A|op ... + fn inst_a(inst: &Instruction<'_>) -> u8 { + (inst.fetch16(0) >> 8) as u8 & 0x0F + } + + // B|A|op ... + fn inst_b(inst: &Instruction<'_>) -> u8 { + (inst.fetch16(0) >> 12) as u8 + } + + //------------------------------------------------------------------------------ + // VRegA + //------------------------------------------------------------------------------ + #[inline] + pub fn has_a(inst: &Instruction<'_>) -> bool { + match &inst.format_desc().format { + Format::k10t + | Format::k10x + | Format::k11n + | Format::k11x + | Format::k12x + | Format::k20t + | Format::k21c + | Format::k21h + | Format::k21s + | Format::k21t + | Format::k22b + | Format::k22c + | Format::k22s + | Format::k22t + | Format::k22x + | Format::k23x + | Format::k30t + | Format::k31c + | Format::k31i + | Format::k31t + | Format::k32x + | Format::k35c + | Format::k3rc + | Format::k45cc + | Format::k4rcc + | Format::k51l => true, + _ => false, + } + } + + #[inline] + pub fn A(inst: &Instruction<'_>) -> Result { + Ok(match inst.format() { + // AA|op + Format::k10t + | Format::k10x + | Format::k11x + | Format::k21c + | Format::k21h + | Format::k21s + | Format::k21t + | Format::k22b + | Format::k22x + | Format::k23x + | Format::k31c + | Format::k31i + | Format::k31t + | Format::k3rc + | Format::k51l + | Format::k4rcc => inst_aa(inst) as i32, + // B|A|op + Format::k11n | Format::k12x | Format::k22c | Format::k22s | Format::k22t => { + inst_a(inst) as i32 + } + // op AAAA + Format::k32x | Format::k20t => inst.fetch16(1) as i32, + // op AAAAAAAA + Format::k30t => inst.fetch32(1) as i32, + // A|G|op + Format::k35c | Format::k45cc => inst_b(inst) as i32, + _ => { + return dex_err!(OperandAccessError { + insn_name: inst.name(), + operand: "A" + }) + } + }) + } + + //------------------------------------------------------------------------------ + // VRegB + //------------------------------------------------------------------------------ + #[inline] + pub fn has_b(inst: &Instruction<'_>) -> bool { + match &inst.format_desc().format { + Format::k11n + | Format::k12x + | Format::k21c + | Format::k21h + | Format::k21s + | Format::k21t + | Format::k22b + | Format::k22c + | Format::k22s + | Format::k22t + | Format::k22x + | Format::k23x + | Format::k31c + | Format::k31i + | Format::k31t + | Format::k32x + | Format::k35c + | Format::k3rc + | Format::k45cc + | Format::k4rcc + | Format::k51l => true, + _ => false, + } + } + + pub fn has_wide_b(inst: &Instruction<'_>) -> bool { + *inst.format() == Format::k51l + } + + #[inline] + pub fn wide_b(inst: &Instruction<'_>) -> u64 { + debug_assert!(*inst.format() == Format::k51l); + inst.fetch32(1) as u64 | ((inst.fetch32(3) as u64) << 32) + } + + #[inline] + pub fn B(inst: &Instruction<'_>) -> Result { + Ok(match inst.format() { + // B|A|op with #+B + Format::k11n => ((inst_b(inst) as i32) << 28) >> 28, + // op BBBB + Format::k21c + | Format::k21t + | Format::k21s + | Format::k21h + | Format::k22x + | Format::k35c + | Format::k3rc + | Format::k45cc + | Format::k4rcc => inst.fetch16(1) as i32, + // B|A|op + Format::k12x | Format::k22c | Format::k22s | Format::k22t => inst_b(inst) as i32, + // op CC|BB + Format::k22b | Format::k23x => (inst.fetch16(1) & 0xFF) as i32, + // op BBBBBBBB + Format::k31c | Format::k31i | Format::k31t => inst.fetch32(1) as i32, + // op AAAA BBBB + Format::k32x => inst.fetch16(2) as i32, + // op BBBBBBBBBBBBBBBBB + Format::k51l => wide_b(inst) as i32, + _ => { + return dex_err!(OperandAccessError { + insn_name: inst.name(), + operand: "B" + }) + } + }) + } + + //------------------------------------------------------------------------------ + // VRegC + //------------------------------------------------------------------------------ + #[inline] + pub fn has_c(inst: &Instruction<'_>) -> bool { + match &inst.format_desc().format { + Format::k22b + | Format::k22c + | Format::k22s + | Format::k22t + | Format::k23x + | Format::k35c + | Format::k3rc + | Format::k45cc + | Format::k4rcc => true, + _ => false, + } + } + + #[inline] + pub fn C(inst: &Instruction<'_>) -> Result { + Ok(match inst.format() { + // op CCCC + Format::k22c | Format::k22s | Format::k22t => inst.fetch16(1) as i32, + // op CC|BB + Format::k22b | Format::k23x => ((inst.fetch16(1) >> 8) & 0xFF) as i32, + // op BBBB CCCC + Format::k3rc | Format::k4rcc => inst.fetch16(2) as i32, + // op BBBB HH|CC + Format::k35c | Format::k45cc => (inst.fetch16(2) & 0x0F) as i32, + _ => { + return dex_err!(OperandAccessError { + insn_name: inst.name(), + operand: "C" + }) + } + }) + } + + //------------------------------------------------------------------------------ + // VRegH + //------------------------------------------------------------------------------ + #[inline] + pub fn has_h(inst: &Instruction<'_>) -> bool { + match &inst.format_desc().format { + Format::k45cc | Format::k4rcc => true, + _ => false, + } + } + + #[inline] + pub fn H(inst: &Instruction<'_>) -> Result { + Ok(match &inst.format_desc().format { + Format::k45cc | Format::k4rcc => inst.fetch16(3) as i32, + _ => { + return dex_err!(OperandAccessError { + insn_name: inst.name(), + operand: "H" + }) + } + }) + } + + //------------------------------------------------------------------------------ + // VarArgs + //------------------------------------------------------------------------------ + #[inline] + pub fn has_var_args(inst: &Instruction<'_>) -> bool { + match &inst.format_desc().format { + Format::k35c | Format::k45cc => true, + _ => false, + } + } + + #[inline] + pub fn var_args(inst: &Instruction<'_>) -> VarArgs { + let reg_list = inst.fetch16(2); + let count = inst_b(inst); + // TODO: why only 5? + debug_assert!( + count <= 5, + "Invalid arg count in {:?} ({count})", + inst.format() + ); + + VarArgs { + count, + start_reg: reg_list as u8, + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////// +// instruction descriptors +//////////////////////////////////////////////////////////////////////////////////////////////////////////// pub struct InstructionDescriptor { pub name: &'static str, pub format: Format, From c39b8b5e5448006cc37b56aedd54144ab392b5dc Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Fri, 7 Feb 2025 22:40:32 +0100 Subject: [PATCH 11/46] Implementation for dumping instructions --- + Instruction iterator and IntoIter support CodeItemAccessor + DexFile now implements methods for describing fields, methods, types and strings --- src/desc_names.rs | 32 +++ src/file/class_accessor.rs | 13 +- src/file/code_item_accessors.rs | 59 ++++- src/file/dump.rs | 396 ++++++++++++++++++++++++++++++++ src/file/instruction.rs | 56 ++++- src/file/mod.rs | 46 ++-- src/lib.rs | 2 + 7 files changed, 579 insertions(+), 25 deletions(-) create mode 100644 src/desc_names.rs create mode 100644 src/file/dump.rs diff --git a/src/desc_names.rs b/src/desc_names.rs new file mode 100644 index 0000000..aba7c1c --- /dev/null +++ b/src/desc_names.rs @@ -0,0 +1,32 @@ +pub fn pretty_desc(desc: &str) -> String { + let dim = desc.chars().filter(|c| *c == '[').count(); + let name = &desc[dim..]; + let mut output = String::new(); + + if name.starts_with("L") { + let end_idx = if name.ends_with(";") { + name.len() - 1 + } else { + name.len() + }; + output.push_str(&name[1..end_idx].replace("/", ".")); + } else { + output.push_str(match name.as_bytes()[0] { + b'B' => "byte", + b'C' => "char", + b'D' => "double", + b'F' => "float", + b'I' => "int", + b'J' => "long", + b'S' => "short", + b'Z' => "boolean", + b'V' => "void", + _ => name, + }); + } + + if dim > 0 { + output.push_str(&"[]".repeat(dim)); + } + output +} diff --git a/src/file/class_accessor.rs b/src/file/class_accessor.rs index 322e07d..ede5189 100644 --- a/src/file/class_accessor.rs +++ b/src/file/class_accessor.rs @@ -1,4 +1,4 @@ -use super::{modifiers, ClassDef, DexFile, FieldId, InvokeType, MethodId, ACC_STATIC}; +use super::{ClassDef, DexFile, FieldId, InvokeType, MethodId, ACC_STATIC}; use crate::{ file::{ACC_CONSTRUCTOR, ACC_INTERFACE}, leb128::decode_leb128_off, @@ -43,7 +43,7 @@ impl<'a> Method<'a> { debug_assert!(self.access_flags & ACC_STATIC == 0x00); if class_access_flags & ACC_INTERFACE != 0 { InvokeType::Interface - } else if (self.access_flags & ACC_CONSTRUCTOR != 0) { + } else if self.access_flags & ACC_CONSTRUCTOR != 0 { InvokeType::Super } else { InvokeType::Virtual @@ -160,9 +160,12 @@ impl<'a> ClassAccessor<'a> { static_fields_off: 0, }; accessor.num_static_fields = decode_leb128_off(&class_data, &mut accessor.ptr_pos); - accessor.num_instance_fields = decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); - accessor.num_direct_methods = decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); - accessor.num_virtual_methods = decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); + accessor.num_instance_fields = + decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); + accessor.num_direct_methods = + decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); + accessor.num_virtual_methods = + decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); accessor.static_fields_off = accessor.ptr_pos as u32; accessor } diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index 9691a99..cc14028 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -61,9 +61,66 @@ impl<'a> CodeItemAccessor<'a> { self.code_item.tries_size } - pub fn insn_at(&self, pc: u32) -> Instruction<'a> { + pub fn inst_at(&self, pc: u32) -> Instruction<'a> { debug_assert!(pc < self.insns_size_in_code_units()); Instruction::at(&self.insns[pc as usize..]) + } +} + +impl<'a> IntoIterator for CodeItemAccessor<'a> { + type Item = Instruction<'a>; + type IntoIter = DexInstructionIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + // iterator will be valid on empty input + DexInstructionIterator::new(self.insns) + } +} + +pub struct DexInstructionIterator<'a> { + instructions: &'a [u16], + pc: usize, +} + +impl<'a> DexInstructionIterator<'a> { + pub fn new(instructions: &'a [u16]) -> Self { + Self { + instructions, + pc: 0, + } + } + + pub fn inst(&self) -> Instruction<'a> { + debug_assert!(self.pc < self.instructions.len()); + Instruction::at(&self.instructions[self.pc..]) + } + + // REVISIT: make mutable? + pub fn dex_pc(&self) -> usize { + self.pc + } + + pub fn advance(&mut self) { + if self.pc >= self.instructions.len() { + return; + } + + let size = self.inst().size_in_code_units(); + self.pc += size; + debug_assert!(self.pc <= self.instructions.len()); + } +} + +impl<'a> Iterator for DexInstructionIterator<'a> { + type Item = Instruction<'a>; + fn next(&mut self) -> Option { + if self.pc < self.instructions.len() { + let inst = self.inst(); + self.pc += inst.size_in_code_units(); + Some(inst) + } else { + None + } } } diff --git a/src/file/dump.rs b/src/file/dump.rs new file mode 100644 index 0000000..13addf6 --- /dev/null +++ b/src/file/dump.rs @@ -0,0 +1,396 @@ +use crate::{ + desc_names::pretty_desc, + file::{ProtoIndex, TypeIndex}, + Result, +}; + +use super::{vreg, Code, DexFile, Format, Instruction, MethodId, StringId, TypeId}; + +impl<'a> DexFile<'a> { + pub fn pretty_field(&self, field_idx: u32, with_type: bool) -> String { + match self.pretty_field_opt(field_idx, with_type) { + Ok(s) => s, + Err(_) => format!("<>"), + } + } + + pub fn pretty_field_opt(&self, field_idx: u32, with_type: bool) -> Result { + let field_id = self.get_field_id(field_idx)?; + let mut result = String::new(); + if with_type { + result.push_str(&self.pretty_type_opt_at(field_id.type_idx)?); + result.push_str(" "); + } + + result.push_str(&self.pretty_type_opt_at(field_id.class_idx)?); + result.push_str("."); + + result.push_str(&self.get_utf16_str_lossy_at(field_id.name_idx)?); + Ok(result) + } + + pub fn pretty_type_at(&self, type_idx: TypeIndex) -> String { + match self.pretty_type_opt_at(type_idx) { + Ok(s) => s, + Err(_) => format!("<>"), + } + } + + pub fn pretty_type_opt_at(&self, type_idx: TypeIndex) -> Result { + self.pretty_type_opt(self.get_type_id(type_idx)?) + } + + pub fn pretty_type_opt(&self, type_id: &TypeId) -> Result { + Ok(pretty_desc(&self.get_type_desc(type_id)?)) + } + + pub fn pretty_utf16(&self, string_id: &StringId) -> String { + match self.get_utf16_str_lossy(string_id) { + Ok(str_data) => str_data, + Err(_) => format!("<>", string_id.string_data_off), + } + } + + pub fn pretty_utf16_at(&self, idx: u32) -> String { + self.pretty_utf16(&StringId { + string_data_off: idx, + }) + } + + pub fn pretty_method_at(&self, method_idx: u32, with_sig: bool) -> String { + match self.pretty_method_opt_at(method_idx, with_sig) { + Ok(s) => s, + Err(_) => format!("<>"), + } + } + + pub fn pretty_method_opt_at(&self, idx: u32, with_sig: bool) -> Result { + self.pretty_method_opt(self.get_method_id(idx)?, with_sig) + } + + pub fn pretty_method_opt(&self, method_id: &MethodId, with_sig: bool) -> Result { + let mut result = String::new(); + let proto_id = if with_sig { + Some(self.get_proto_id(method_id.proto_idx)?) + } else { + None + }; + + if let Some(proto_id) = proto_id { + result.push_str(&self.pretty_type_at(proto_id.return_type_idx)); + result.push(' '); + } + + result.push_str(&self.pretty_type_at(method_id.class_idx)); + result.push('.'); + result.push_str(&self.get_utf16_str_lossy_at(method_id.name_idx)?); + + if let Some(proto_id) = proto_id { + result.push('('); + if let Some(params) = self.get_type_list(proto_id.parameters_off)? { + // REVISIT: we could use map().collect().join() here + for (i, param) in params.iter().enumerate() { + if i > 0 { + result.push_str(", "); + } + result.push_str(&self.pretty_type_at(param.type_idx)); + } + } + result.push(')'); + } + Ok(result) + } +} + +impl<'a> Instruction<'a> { + pub fn to_string(&self, dex_file: Option<&DexFile<'_>>) -> Result { + let opcode = self.name(); + Ok(match self.format() { + &Format::k10x => format!("{opcode}"), + Format::k12x => format!("{opcode} v{}, v{}", vreg::A(self)?, vreg::B(self)?), + Format::k11n => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::B(self)?), + Format::k11x => format!("{opcode} v{}", vreg::A(self)?), + Format::k10t => format!("{opcode} {:+}", vreg::A(self)?), + Format::k20t => format!("{opcode} {:+}", vreg::A(self)?), + Format::k22x => format!("{opcode} v{}, v{}", vreg::A(self)?, vreg::B(self)?), + Format::k21t => format!("{opcode} v{}, {:+}", vreg::A(self)?, vreg::B(self)?), + Format::k21s => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::B(self)?), + Format::k21h => { + // op vAA, #+BBBB0000[00000000] + if self.opcode() == Code::CONST_HIGH16 { + let value = (vreg::B(self)? as u32) << 16; + format!( + "{opcode} v{}, #int {:+} // {:#x}", + vreg::A(self)?, + value, + value + ) + } else { + let value = (vreg::B(self)? as u64) << 48; + format!( + "{opcode} v{}, #long {:+} // {:#x}", + vreg::A(self)?, + value, + value + ) + } + } + Format::k21c => { + // op vAA, type@BBBB check-cast + // op vAA, field@BBBB const-class + // op vAA, method_handle@BBBB const-method-handle + // op vAA, proto@BBBB const-method-type + // op vAA, string@BBBB const-string + match (dex_file, self.opcode()) { + (Some(dex), Code::CONST_STRING) => { + let index = vreg::B(self)?; + format!( + "{opcode} v{}, {:?} // string@{}", + vreg::A(self)?, + dex.pretty_utf16_at(index as u32), + index + ) + } + (Some(dex), Code::CHECK_CAST | Code::CONST_CLASS | Code::NEW_INSTANCE) => { + let type_idx = vreg::B(self)? as TypeIndex; + format!( + "{opcode} v{}, {} // type@{}", + vreg::A(self)?, + dex.pretty_type_at(type_idx), + type_idx + ) + } + ( + Some(dex), + Code::SGET + | Code::SGET_WIDE + | Code::SGET_OBJECT + | Code::SGET_BOOLEAN + | Code::SGET_BYTE + | Code::SGET_CHAR + | Code::SGET_SHORT + | Code::SPUT + | Code::SPUT_WIDE + | Code::SPUT_OBJECT + | Code::SPUT_BOOLEAN + | Code::SPUT_BYTE + | Code::SPUT_CHAR + | Code::SPUT_SHORT, + ) => { + let field_idx = vreg::B(self)? as u32; + format!( + "{opcode} v{}, {} // field@{}", + vreg::A(self)?, + dex.pretty_field(field_idx, true), + field_idx + ) + } + _ => format!("{opcode} v{}, thing@{}", vreg::A(self)?, vreg::B(self)?), + } + } + #[rustfmt::skip] + &Format::k23x => format!("{opcode} v{}, v{}, v{}", vreg::A(self)?, vreg::B(self)?, vreg::C(self)?), + #[rustfmt::skip] + Format::k22b => format!("{opcode} v{}, v{}, #{:+}", vreg::A(self)?, vreg::B(self)?, vreg::C(self)?), + #[rustfmt::skip] + Format::k22t => format!("{opcode} v{}, v{}, {:+}", vreg::A(self)?, vreg::B(self)?, vreg::C(self)?), + #[rustfmt::skip] + Format::k22s => format!("{opcode} v{}, v{}, #{:+}", vreg::A(self)?, vreg::B(self)?, vreg::C(self)?), + Format::k22c => { + let index = vreg::C(self)? as u32; + match (dex_file, self.opcode()) { + ( + Some(dex), + Code::IGET + | Code::IGET_WIDE + | Code::IGET_OBJECT + | Code::IGET_BOOLEAN + | Code::IGET_BYTE + | Code::IGET_CHAR + | Code::IGET_SHORT + | Code::IPUT + | Code::IPUT_WIDE + | Code::IPUT_OBJECT + | Code::IPUT_BOOLEAN + | Code::IPUT_BYTE + | Code::IPUT_CHAR + | Code::IPUT_SHORT, + ) => { + format!( + "{opcode} v{}, v{}, {} // field@{}", + vreg::A(self)?, + vreg::B(self)?, + dex.pretty_field(index, true), + index + ) + } + (Some(dex), Code::NEW_ARRAY | Code::INSTANCE_OF) => { + format!( + "{opcode} v{}, v{}, {} // type@{}", + vreg::A(self)?, + vreg::B(self)?, + dex.pretty_type_at(index as TypeIndex), + index + ) + } + _ => { + format!( + "{opcode} v{}, v{}, thing@{}", + vreg::A(self)?, + vreg::B(self)?, + index, + ) + } + } + } + Format::k30t => format!("{opcode} {:+}", vreg::A(self)?), + Format::k32x => format!("{opcode} v{}, v{}", vreg::A(self)?, vreg::B(self)?), + Format::k31i => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::B(self)?), + Format::k31t => format!("{opcode} v{}, {:+}", vreg::A(self)?, vreg::B(self)?), + Format::k31c => { + let index = vreg::B(self)? as u32; + if let (Some(dex), Code::CONST_STRING_JUMBO) = (dex_file, self.opcode()) { + format!( + "{opcode} v{}, {:?} // string@{}", + vreg::A(self)?, + dex.pretty_utf16_at(index), + index + ) + } else { + format!("{opcode} v{}, thing@{}", vreg::A(self)?, index,) + } + } + Format::k35c => { + let var_args = vreg::var_args(self); + let args_str = var_args + .arg + .iter() + .map(|reg| format!("v{}", reg)) + .collect::>() + .join(", "); + let index = vreg::B(self)? as u32; + match (dex_file, self.opcode()) { + (Some(dex), Code::FILLED_NEW_ARRAY) => { + format!( + "{opcode} {{{args_str}}}, {} // type@{}", + dex.pretty_type_at(index as TypeIndex), + index + ) + } + ( + Some(dex), + Code::INVOKE_VIRTUAL + | Code::INVOKE_SUPER + | Code::INVOKE_DIRECT + | Code::INVOKE_STATIC + | Code::INVOKE_INTERFACE, + ) => { + format!( + "{opcode} {{{args_str}}}, {} // method@{}", + dex.pretty_method_at(index, true), + index + ) + } + (_, Code::INVOKE_CUSTOM) => { + format!("{opcode} {{{args_str}}}, // call_site@{}", index) + } + _ => { + format!("{opcode} {{{args_str}}}, thing@{}", index,) + } + } + } + Format::k3rc => { + let var_range = vreg::args_range(self)?; + let index = vreg::B(self)? as u32; + match (dex_file, self.opcode()) { + ( + Some(dex), + Code::INVOKE_VIRTUAL_RANGE + | Code::INVOKE_SUPER_RANGE + | Code::INVOKE_DIRECT_RANGE + | Code::INVOKE_STATIC_RANGE + | Code::INVOKE_INTERFACE_RANGE, + ) => { + format!( + "{opcode} {{v{} .. v{}}}, {} // method@{}", + var_range.start(), + var_range.end(), + dex.pretty_method_at(index, true), + index + ) + } + (_, Code::INVOKE_CUSTOM_RANGE) => { + format!( + "{opcode} {{v{} .. v{}}}, // call_site@{}", + var_range.start(), + var_range.end(), + index + ) + } + _ => { + format!( + "{opcode} {{v{} .. v{}}} thing@{}", + var_range.start(), + var_range.end(), + index + ) + } + } + } + Format::k45cc => { + let var_args = vreg::var_args(self); + let args_str = var_args + .arg + .iter() + .map(|reg| format!("v{}", reg)) + .collect::>() + .join(", "); + let method_idx = vreg::B(self)? as u32; + let proto_idx = vreg::H(self)? as u32; + if let Some(dex) = dex_file { + format!( + "{opcode} {{{args_str}}}, {}, {} // method@{}, proto@{}", + dex.pretty_method_at(method_idx, true), + dex.get_shorty_lossy_at(proto_idx as ProtoIndex)?, + method_idx, + proto_idx + ) + } else { + format!( + "{opcode} {{{args_str}}}, method@{}, proto@{}", + method_idx, proto_idx + ) + } + } + Format::k4rcc => { + let args_range = vreg::args_range(self)?; + let method_idx = vreg::B(self)? as u32; + let proto_idx = vreg::H(self)? as u32; + match (dex_file, self.opcode()) { + (Some(dex), Code::INVOKE_POLYMORPHIC_RANGE) => { + format!( + "{opcode} {{v{} .. v{}}}, {}, {} // method@{}, proto@{}", + args_range.start(), + args_range.end(), + dex.pretty_method_at(method_idx, true), + dex.get_shorty_lossy_at(proto_idx as ProtoIndex)?, + method_idx, + proto_idx + ) + } + _ => { + format!( + "{opcode} {{v{} .. v{}}}, method@{}, proto@{}", + args_range.start(), + args_range.end(), + method_idx, + proto_idx + ) + } + } + } + Format::k51l => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::wide_b(self)), + Format::kInvalidFormat => "".to_string(), + }) + } +} diff --git a/src/file/instruction.rs b/src/file/instruction.rs index 2d2383f..a0af43e 100644 --- a/src/file/instruction.rs +++ b/src/file/instruction.rs @@ -153,6 +153,7 @@ pub mod flags { pub const Experimental: u8 = 0x80; // is an experimental opcode } +// These flags may be used later to verify instructions #[rustfmt::skip] #[allow(non_upper_case_globals)] pub mod verify_flags { @@ -191,14 +192,17 @@ impl<'a> Instruction<'a> { &Instruction::INSN_DESCRIPTORS[(self.0[0] & 0xFF) as usize] } + #[inline(always)] pub const fn opcode(&self) -> Code { self.format_desc().opcode } + #[inline(always)] pub const fn format(&self) -> &'static Format { &self.format_desc().format } + #[inline(always)] pub const fn name(&self) -> &'static str { &self.format_desc().name } @@ -240,12 +244,23 @@ impl<'a> Instruction<'a> { pub struct VarArgs { pub count: u8, - pub start_reg: u8, + pub arg: Vec, +} + +impl VarArgs { + pub fn new(count: u8) -> VarArgs { + VarArgs { + count, + arg: vec![0; count as usize], + } + } } // access to registers of all formats #[allow(non_snake_case)] pub mod vreg { + use std::ops::RangeInclusive; + use super::*; use crate::{dex_err, error::DexError, Result}; @@ -492,17 +507,48 @@ pub mod vreg { pub fn var_args(inst: &Instruction<'_>) -> VarArgs { let reg_list = inst.fetch16(2); let count = inst_b(inst); - // TODO: why only 5? + let mut var_args = VarArgs::new(count); + + // NOTE only five as maximum debug_assert!( count <= 5, "Invalid arg count in {:?} ({count})", inst.format() ); - VarArgs { - count, - start_reg: reg_list as u8, + if count > 4 { + var_args.arg[4] = inst_a(inst); + } + if count > 3 { + var_args.arg[3] = ((reg_list >> 12) & 0x0F) as u8; } + if count > 2 { + var_args.arg[2] = ((reg_list >> 8) & 0x0F) as u8; + } + if count > 1 { + var_args.arg[1] = ((reg_list >> 4) & 0x0F) as u8; + } + if count > 0 { + var_args.arg[0] = (reg_list & 0x0F) as u8; + } + var_args + } + + //------------------------------------------------------------------------------ + // ArgsRange + //------------------------------------------------------------------------------ + #[inline] + pub fn has_args_range(inst: &Instruction<'_>) -> bool { + match &inst.format_desc().format { + Format::k3rc | Format::k4rcc => true, + _ => false, + } + } + + pub fn args_range(inst: &Instruction<'_>) -> Result> { + let first_reg = vreg::C(inst)? as u16; + let last_reg = first_reg + (vreg::A(inst)? - 1) as u16; + Ok(first_reg..=last_reg) } } diff --git a/src/file/mod.rs b/src/file/mod.rs index 5cd148f..00ce65a 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -14,6 +14,7 @@ pub mod instruction; pub use instruction::*; pub mod code_item_accessors; pub use code_item_accessors::*; +pub mod dump; use crate::{dex_err, error::DexError, leb128::decode_leb128, utf, Result}; @@ -249,7 +250,6 @@ impl<'a> DexFile<'a> { self.get_utf16_str(string_id) } - // -- types #[inline(always)] pub fn get_type_id(&self, idx: TypeIndex) -> Result<&'a TypeId> { check_lt_result!(idx as u32, self.num_type_ids(), TypeId); @@ -266,13 +266,14 @@ impl<'a> DexFile<'a> { self.type_ids } - pub fn get_type_desc(&self, type_id: &TypeId) -> Result<(u32, &'a [u8])> { - self.get_string_data(self.get_string_id(type_id.descriptor_idx)?) + #[inline(always)] + pub fn get_type_desc(&self, type_id: &TypeId) -> Result { + self.get_utf16_str_lossy_at(type_id.descriptor_idx) } - pub fn get_type_desc_at(&self, idx: TypeIndex) -> Result<(u32, &'a [u8])> { - let type_id = self.get_type_id(idx)?; - self.get_string_data(self.get_string_id(type_id.descriptor_idx)?) + #[inline(always)] + pub fn get_type_desc_at(&self, idx: TypeIndex) -> Result { + self.get_type_desc(self.get_type_id(idx)?) } pub fn get_type_desc_utf16_lossy_at(&self, idx: TypeIndex) -> Result { @@ -329,10 +330,14 @@ impl<'a> DexFile<'a> { self.field_ids } + pub fn get_field_name(&self, field_id: &FieldId) -> Result { + self.get_utf16_str_lossy_at(field_id.name_idx) + } + // Proto related methods - pub fn get_proto_id(&self, idx: u32) -> &'a ProtoId { - check_lt!(idx, self.header.proto_ids_size, ProtoId); - &self.proto_ids[idx as usize] + pub fn get_proto_id(&self, idx: ProtoIndex) -> Result<&'a ProtoId> { + check_lt_result!(idx, self.header.proto_ids_size, ProtoId); + Ok(&self.proto_ids[idx as usize]) } pub fn num_proto_ids(&self) -> u32 { @@ -343,6 +348,24 @@ impl<'a> DexFile<'a> { self.proto_ids } + pub fn get_shorty_at(&self, idx: ProtoIndex) -> Result { + let proto_id = self.get_proto_id(idx)?; + self.get_shorty(proto_id) + } + + pub fn get_shorty_lossy_at(&self, idx: ProtoIndex) -> Result { + let proto_id = self.get_proto_id(idx)?; + self.get_shorty_lossy(proto_id) + } + + pub fn get_shorty(&self, proto_id: &ProtoId) -> Result { + self.get_utf16_str_at(proto_id.shorty_idx) + } + + pub fn get_shorty_lossy(&self, proto_id: &ProtoId) -> Result { + self.get_utf16_str_lossy_at(proto_id.shorty_idx) + } + // method ids related methods #[inline(always)] pub fn get_method_id(&self, idx: u32) -> Result<&'a MethodId> { @@ -377,11 +400,6 @@ impl<'a> DexFile<'a> { self.class_defs } - #[inline] - pub fn get_class_desc(&self, class_def: &ClassDef) -> Result<(u32, &'a [u8])> { - self.get_type_desc_at(class_def.class_idx) - } - #[inline] pub fn get_class_desc_utf16_lossy(&self, class_def: &ClassDef) -> Result { self.get_type_desc_utf16_lossy_at(class_def.class_idx) diff --git a/src/lib.rs b/src/lib.rs index 23aa1b7..a06f313 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,4 +5,6 @@ pub mod file; pub mod leb128; pub mod utf; +pub mod desc_names; + pub type Result = result::Result; From cc24a59273c680083b5ef0fd2318a563fcead84e Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 8 Feb 2025 11:52:02 +0100 Subject: [PATCH 12/46] Rewrite DexFile internal buffer to support inmemory data --- + location changed to enum --- src/file/code_item_accessors.rs | 11 ++- src/file/container.rs | 92 +++++++++++++++++++ src/file/instruction.rs | 5 +- src/file/mod.rs | 156 +++++++++++++++++--------------- src/file/verifier.rs | 24 +++-- 5 files changed, 201 insertions(+), 87 deletions(-) create mode 100644 src/file/container.rs diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index cc14028..699e30d 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -1,6 +1,6 @@ use crate::Result; -use super::{CodeItem, DexFile, Instruction}; +use super::{CodeItem, DexContainer, DexFile, Instruction}; pub struct CodeItemAccessor<'a> { code_item: &'a CodeItem, @@ -29,11 +29,14 @@ impl<'a> CodeItemAccessor<'a> { } #[inline(always)] - pub fn from_code_item( - dex: &'a DexFile<'a>, + pub fn from_code_item( + dex: &'a DexFile<'a, C>, code_item: &'a CodeItem, code_off: u32, - ) -> Result> { + ) -> Result> + where + C: DexContainer<'a>, + { let insns = match code_off { 0 => &[], _ => dex.get_insns_raw(code_off, code_item.insns_size)?, diff --git a/src/file/container.rs b/src/file/container.rs new file mode 100644 index 0000000..da2fc7a --- /dev/null +++ b/src/file/container.rs @@ -0,0 +1,92 @@ +use std::ops::Deref; + +use memmap2::MmapAsRawDesc; + +use crate::Result; + +use super::MmapDexFile; + +pub trait DexContainer<'a>: AsRef<[u8]> + Deref + 'a { + fn data(&'a self) -> &'a [u8] { + self.as_ref() + } + + fn file_size(&'a self) -> usize { + self.data().len() + } +} + +impl<'a> DexContainer<'a> for memmap2::Mmap {} + +pub struct InMemoryDexContainer<'a>(&'a [u8]); + +impl<'a> InMemoryDexContainer<'a> { + pub fn new(data: &'a [u8]) -> Self { + Self(data) + } +} + +impl<'a> Deref for InMemoryDexContainer<'a> { + type Target = [u8]; + fn deref(&self) -> &'a Self::Target { + &self.0 + } +} + +impl<'a> AsRef<[u8]> for InMemoryDexContainer<'a> { + fn as_ref(&self) -> &'a [u8] { + &self.0 + } +} + +impl<'a> DexContainer<'a> for InMemoryDexContainer<'a> {} + +impl<'a> DexContainer<'a> for &'a [u8] {} + +pub struct DexFileContainer { + mmap: memmap2::Mmap, + location: String, + pub verify: bool, + pub verify_checksum: bool, +} + +impl DexFileContainer { + pub fn new(file: T) -> Self + where + T: MmapAsRawDesc, + { + Self { + mmap: unsafe { memmap2::Mmap::map(file).unwrap() }, + verify: false, + verify_checksum: false, + location: "[anonymous]".to_string(), + } + } + + pub fn location(&mut self, location: String) -> &mut Self { + self.location = location; + self + } + + pub fn verify(mut self, verify: bool) -> Self { + self.verify = verify; + self + } + + pub fn verify_checksum(mut self, verify_checksum: bool) -> Self { + self.verify_checksum = verify_checksum; + self + } + + pub fn open<'a>(&'a self) -> Result> { + MmapDexFile::open(self) + } + + pub fn get_location(&self) -> &str { + &self.location + } + + pub fn data(&self) -> &memmap2::Mmap { + &self.mmap + } +} diff --git a/src/file/instruction.rs b/src/file/instruction.rs index a0af43e..a45bf50 100644 --- a/src/file/instruction.rs +++ b/src/file/instruction.rs @@ -8,16 +8,19 @@ impl<'a> Instruction<'a> { #[inline(always)] pub fn relative_at(&self, offset: usize) -> Instruction<'a> { + debug_assert!(offset < self.0.len()); Instruction::at(&self.0[offset..]) } #[inline(always)] pub fn fetch16(&self, offset: usize) -> u16 { + debug_assert!(offset < self.0.len()); self.0[offset] } #[inline(always)] pub fn fetch32(&self, offset: usize) -> u32 { + debug_assert!(offset + 1 < self.0.len()); self.fetch16(offset) as u32 | ((self.fetch16(offset + 1) as u32) << 16) } @@ -216,7 +219,7 @@ impl<'a> Instruction<'a> { let size = Instruction::format_desc_of(self.opcode()).size_in_code_units; match size { code_flags::Complex => self.size_in_code_units_complex(), - code_flags::Custom => 1, /* TODO */ + code_flags::Custom => 1, /* TODO? */ _ => size as usize, } } diff --git a/src/file/mod.rs b/src/file/mod.rs index 00ce65a..55ec78a 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -1,4 +1,4 @@ -use memmap2::{Mmap, MmapAsRawDesc}; +use memmap2::Mmap; use plain::Plain; pub mod structs; @@ -14,7 +14,9 @@ pub mod instruction; pub use instruction::*; pub mod code_item_accessors; pub use code_item_accessors::*; +pub mod container; pub mod dump; +pub use container::*; use crate::{dex_err, error::DexError, leb128::decode_leb128, utf, Result}; @@ -29,56 +31,32 @@ pub const DEX_MAGIC_VERSIONS: &[&[u8]] = &[ pub const DEX_ENDIAN_CONSTANT: u32 = 0x12345678; -pub struct DexFileContainer { - mmap: memmap2::Mmap, - location: String, - pub verify: bool, - pub verify_checksum: bool, +#[derive(Debug)] +pub enum DexLocation { + InMemory, + Path(String), } -impl DexFileContainer { - pub fn new(file: T) -> Self - where - T: MmapAsRawDesc, - { - Self { - mmap: unsafe { memmap2::Mmap::map(file).unwrap() }, - verify: false, - verify_checksum: false, - location: "[anonymous]".to_string(), - } - } - - pub fn location(&mut self, location: String) -> &mut Self { - self.location = location; - self - } - - pub fn verify(mut self, verify: bool) -> Self { - self.verify = verify; - self - } - - pub fn verify_checksum(mut self, verify_checksum: bool) -> Self { - self.verify_checksum = verify_checksum; - self - } - - pub fn open(&self) -> Result> { - DexFile::open(self) - } - - pub fn get_location(&self) -> &str { - &self.location +impl From<&'static str> for DexLocation { + fn from(s: &'static str) -> Self { + DexLocation::Path(s.to_string()) } +} - pub fn data(&self) -> &memmap2::Mmap { - &self.mmap +impl ToString for DexLocation { + fn to_string(&self) -> String { + match self { + DexLocation::InMemory => "[in-memory]".to_string(), + DexLocation::Path(path) => path.to_string(), + } } } -pub struct DexFile<'a> { - mmap: &'a memmap2::Mmap, +pub type InMemoryDexFile<'a> = DexFile<'a, InMemoryDexContainer<'a>>; +pub type MmapDexFile<'a> = DexFile<'a, Mmap>; + +pub struct DexFile<'a, T: DexContainer<'a> = Mmap> { + mmap: &'a T, header: &'a Header, string_ids: &'a [StringId], @@ -92,7 +70,7 @@ pub struct DexFile<'a> { hiddenapi_data: Option<&'a HiddenapiClassData<'a>>, - location: String, + location: DexLocation, } macro_rules! check_lt { @@ -120,10 +98,21 @@ macro_rules! check_lt_result { }; } -impl<'a> DexFile<'a> { - pub fn get_section(base: &'a Mmap, offset: u32, len: u32) -> &'a [T] { +impl<'a, C: DexContainer<'a>> DexFile<'a, C> { + #[inline] + fn header_available(base: &'a C) -> bool { let size = base.len(); - if size < std::mem::size_of::
() || len == 0 { + size >= std::mem::size_of::
() && plain::is_aligned::
(base) + } + + pub fn get_section(base: &'a C, offset: u32, len: u32) -> &'a [T] { + if len == 0 { + return &[]; + } + // sanity checks so that this funtion will always return a valid slice + let size = base.len(); + let section_size = len as usize * std::mem::size_of::(); + if (offset as usize + section_size) >= size || offset as usize >= size { return &[]; } @@ -134,8 +123,16 @@ impl<'a> DexFile<'a> { } } - pub fn from_raw_parts(base: &'a Mmap, location: &str) -> Self { - let header = Header::from_bytes(&base).unwrap(); + pub fn from_raw_parts(base: &'a C, location: DexLocation) -> Result> { + if !DexFile::header_available(base) { + return dex_err!(TruncatedFile); + } + + let header = match Header::from_bytes(&base) { + Ok(header) => header, + // REVISIT: we already checked the header + Err(_) => return dex_err!(TruncatedFile), + }; let mut dex = Self { mmap: base, header, @@ -148,25 +145,21 @@ impl<'a> DexFile<'a> { method_handles: &[], call_site_ids: &[], hiddenapi_data: None, - location: location.to_string(), + location, }; - if dex.file_size() < std::mem::size_of::
() { - return dex; // don't parse data - } - dex.init_sections_from_maplist(); - dex + Ok(dex) } - pub fn open(container: &DexFileContainer) -> Result> { + pub fn open(container: &DexFileContainer) -> Result> { let loc = container.get_location(); let size = container.data().len(); if size < std::mem::size_of::
() { return dex_err!(DexFileError, "Invalid or truncated file {:?}", loc); } - let dex = DexFile::from_raw_parts(container.data(), &loc); + let dex = DexFile::from_raw_parts(container.data(), DexLocation::Path(loc.to_string()))?; dex.init()?; if container.verify { DexFile::verify(&dex, container.verify_checksum)?; @@ -187,7 +180,7 @@ impl<'a> DexFile<'a> { } } - pub fn get_location(&self) -> &str { + pub fn get_location(&self) -> &DexLocation { &self.location } @@ -302,11 +295,11 @@ impl<'a> DexFile<'a> { } #[inline(always)] - pub fn get_code_item_accessor(&self, offset: u32) -> Result> { + pub fn get_code_item_accessor(&'a self, offset: u32) -> Result> { check_lt_result!(offset, self.file_size(), "code item offset"); let code_item = self.non_null_data_ptr(offset)?; CodeItemAccessor::from_code_item( - &self, + self, code_item, offset + std::mem::size_of::() as u32, ) @@ -544,16 +537,30 @@ impl<'a> DexFile<'a> { Ok(()) } + #[inline] + fn maplist_available(&self) -> bool { + if self.header.map_off == 0x00 { + return false; + } + + let size = self.file_size(); + let end = (self.header.map_off as usize) + std::mem::size_of::(); + end as usize > size || !plain::is_aligned::(&self.mmap[0..end as usize]) + } + fn init_sections_from_maplist(&mut self) { - if self.header.map_off == 0x00 - || self.header.map_off as usize > self.file_size() - std::mem::size_of::() - { + if !self.maplist_available() { // bad offset return; } let map_list_size_off = self.header.map_off; - let map_list_off = self.header.map_off + std::mem::size_of::() as u32; + let map_list_off = (self.header.map_off as usize) + std::mem::size_of::(); + if map_list_off >= self.file_size() as usize { + // bad offset + return; + } + let count: &u32 = match self.non_null_data_ptr(map_list_size_off) { Ok(v) => v, Err(_) => { @@ -571,22 +578,23 @@ impl<'a> DexFile<'a> { } // we should unwrap this here - let items = match self.non_null_array_data_ptr::(map_list_off, *count as usize) { - Ok(v) => v, - Err(_) => { - // bad file will be reported through verifier - return; - } - }; + let items = + match self.non_null_array_data_ptr::(map_list_off as u32, *count as usize) { + Ok(v) => v, + Err(_) => { + // bad file will be reported through verifier + return; + } + }; for map_item in items { match map_item.type_ { MapItemType::MethodHandleItem => { self.method_handles = - DexFile::get_section(&self.mmap, map_item.off, map_item.size) + DexFile::get_section(self.mmap, map_item.off, map_item.size) } MapItemType::CallSiteIdItem => { self.call_site_ids = - DexFile::get_section(&self.mmap, map_item.off, map_item.size) + DexFile::get_section(self.mmap, map_item.off, map_item.size) } MapItemType::HiddenapiClassData => { let item_off = map_item.off as usize; diff --git a/src/file/verifier.rs b/src/file/verifier.rs index 65637d2..e13916b 100644 --- a/src/file/verifier.rs +++ b/src/file/verifier.rs @@ -3,10 +3,10 @@ use adler32; use crate::{dex_err, error::DexError, Result}; use super::{ - DexFile, Header, HeaderV41, DEX_ENDIAN_CONSTANT, DEX_MAGIC, DEX_MAGIC_VERSIONS, + DexContainer, DexFile, Header, HeaderV41, DEX_ENDIAN_CONSTANT, DEX_MAGIC, DEX_MAGIC_VERSIONS, }; -impl<'a> DexFile<'a> { +impl<'a, C: DexContainer<'a>> DexFile<'a, C> { pub fn is_magic_valid(&self) -> bool { &self.header.get_magic()[..4] == DEX_MAGIC } @@ -16,7 +16,7 @@ impl<'a> DexFile<'a> { DEX_MAGIC_VERSIONS.contains(&version_raw) } - pub fn verify(dex: &DexFile<'_>, verify_checksum: bool) -> Result<()> { + pub fn verify(dex: &DexFile<'a, C>, verify_checksum: bool) -> Result<()> { check_header(dex, verify_checksum)?; // REVISIT: maybe validate map list items Ok(()) @@ -29,7 +29,10 @@ impl<'a> DexFile<'a> { } } -fn check_header(dex: &DexFile<'_>, verify_checksum: bool) -> Result<()> { +fn check_header<'a, C>(dex: &DexFile<'a, C>, verify_checksum: bool) -> Result<()> +where + C: DexContainer<'a>, +{ let size = dex.file_size(); if size < std::mem::size_of::
() { return dex_err!(TruncatedFile); @@ -40,7 +43,9 @@ fn check_header(dex: &DexFile<'_>, verify_checksum: bool) -> Result<()> { } if !dex.is_version_valid() { - return dex_err!(UnknownDexVersion { version: dex.header.get_version() }); + return dex_err!(UnknownDexVersion { + version: dex.header.get_version() + }); } // check file size from header @@ -131,12 +136,15 @@ fn check_header(dex: &DexFile<'_>, verify_checksum: bool) -> Result<()> { Ok(()) } -fn check_valid_offset_and_size( - dex: &DexFile<'_>, +fn check_valid_offset_and_size<'a, C>( + dex: &DexFile<'a, C>, offset: u32, size: u32, label: &'static str, -) -> Result<()> { +) -> Result<()> +where + C: DexContainer<'a>, +{ if size == 0 { if offset != 0 { return dex_err!(BadOffsetNoSize { From ac4ea430d5c1197649b72aea54fc0687524f296a Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 8 Feb 2025 11:52:37 +0100 Subject: [PATCH 13/46] Fuzzing test for main DEX parsing method --- fuzz/.gitignore | 4 ++++ fuzz/Cargo.toml | 21 +++++++++++++++++++++ fuzz/fuzz_targets/from_raw_parts.rs | 14 ++++++++++++++ 3 files changed, 39 insertions(+) create mode 100644 fuzz/.gitignore create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/fuzz_targets/from_raw_parts.rs diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..1a45eee --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..4ee7be8 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "dexrs-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.dexrs] +path = ".." + +[[bin]] +name = "from_raw_parts" +path = "fuzz_targets/from_raw_parts.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_targets/from_raw_parts.rs b/fuzz/fuzz_targets/from_raw_parts.rs new file mode 100644 index 0000000..647c232 --- /dev/null +++ b/fuzz/fuzz_targets/from_raw_parts.rs @@ -0,0 +1,14 @@ +#![no_main] +#![allow(non_snake_case)] + +use dexrs::file::{DexFile, DexLocation, InMemoryDexContainer}; + +extern crate libfuzzer_sys; +extern crate dexrs; + +libfuzzer_sys::fuzz_target!(|data: &[u8]| { + // this must not panic + if let Ok(dex) = DexFile::from_raw_parts(&data, DexLocation::InMemory) { + let _ = dex; + } +}); From b1b8868ec22bccf5263feb68220c691ae76e66db Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 8 Feb 2025 11:57:37 +0100 Subject: [PATCH 14/46] updated dump.rs to match updated DexFile implementation --- src/file/dump.rs | 60 +++++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/src/file/dump.rs b/src/file/dump.rs index 13addf6..034864b 100644 --- a/src/file/dump.rs +++ b/src/file/dump.rs @@ -4,20 +4,35 @@ use crate::{ Result, }; -use super::{vreg, Code, DexFile, Format, Instruction, MethodId, StringId, TypeId}; +use super::{vreg, Code, DexContainer, DexFile, Format, Instruction, MethodId, StringId, TypeId}; -impl<'a> DexFile<'a> { - pub fn pretty_field(&self, field_idx: u32, with_type: bool) -> String { - match self.pretty_field_opt(field_idx, with_type) { +pub mod pretty_opts { + + #[derive(Copy, Clone, PartialEq, Eq)] + pub enum Field { + WithType, + NoType, + } + + #[derive(Copy, Clone, PartialEq, Eq)] + pub enum Method { + WithSig, + NoSig, + } +} + +impl<'a, C: DexContainer<'a>> DexFile<'a, C> { + pub fn pretty_field(&self, field_idx: u32, opts: pretty_opts::Field) -> String { + match self.pretty_field_opt(field_idx, opts) { Ok(s) => s, Err(_) => format!("<>"), } } - pub fn pretty_field_opt(&self, field_idx: u32, with_type: bool) -> Result { + pub fn pretty_field_opt(&self, field_idx: u32, opts: pretty_opts::Field) -> Result { let field_id = self.get_field_id(field_idx)?; let mut result = String::new(); - if with_type { + if opts == pretty_opts::Field::WithType { result.push_str(&self.pretty_type_opt_at(field_id.type_idx)?); result.push_str(" "); } @@ -57,23 +72,26 @@ impl<'a> DexFile<'a> { }) } - pub fn pretty_method_at(&self, method_idx: u32, with_sig: bool) -> String { - match self.pretty_method_opt_at(method_idx, with_sig) { + pub fn pretty_method_at(&self, method_idx: u32, opts: pretty_opts::Method) -> String { + match self.pretty_method_opt_at(method_idx, opts) { Ok(s) => s, Err(_) => format!("<>"), } } - pub fn pretty_method_opt_at(&self, idx: u32, with_sig: bool) -> Result { - self.pretty_method_opt(self.get_method_id(idx)?, with_sig) + pub fn pretty_method_opt_at(&self, idx: u32, opts: pretty_opts::Method) -> Result { + self.pretty_method_opt(self.get_method_id(idx)?, opts) } - pub fn pretty_method_opt(&self, method_id: &MethodId, with_sig: bool) -> Result { + pub fn pretty_method_opt( + &self, + method_id: &MethodId, + opts: pretty_opts::Method, + ) -> Result { let mut result = String::new(); - let proto_id = if with_sig { - Some(self.get_proto_id(method_id.proto_idx)?) - } else { - None + let proto_id = match opts { + pretty_opts::Method::WithSig => Some(self.get_proto_id(method_id.proto_idx)?), + pretty_opts::Method::NoSig => None, }; if let Some(proto_id) = proto_id { @@ -181,7 +199,7 @@ impl<'a> Instruction<'a> { format!( "{opcode} v{}, {} // field@{}", vreg::A(self)?, - dex.pretty_field(field_idx, true), + dex.pretty_field(field_idx, pretty_opts::Field::WithType), field_idx ) } @@ -220,7 +238,7 @@ impl<'a> Instruction<'a> { "{opcode} v{}, v{}, {} // field@{}", vreg::A(self)?, vreg::B(self)?, - dex.pretty_field(index, true), + dex.pretty_field(index, pretty_opts::Field::WithType), index ) } @@ -287,7 +305,7 @@ impl<'a> Instruction<'a> { ) => { format!( "{opcode} {{{args_str}}}, {} // method@{}", - dex.pretty_method_at(index, true), + dex.pretty_method_at(index, pretty_opts::Method::WithSig), index ) } @@ -315,7 +333,7 @@ impl<'a> Instruction<'a> { "{opcode} {{v{} .. v{}}}, {} // method@{}", var_range.start(), var_range.end(), - dex.pretty_method_at(index, true), + dex.pretty_method_at(index, pretty_opts::Method::WithSig), index ) } @@ -350,7 +368,7 @@ impl<'a> Instruction<'a> { if let Some(dex) = dex_file { format!( "{opcode} {{{args_str}}}, {}, {} // method@{}, proto@{}", - dex.pretty_method_at(method_idx, true), + dex.pretty_method_at(method_idx, pretty_opts::Method::WithSig), dex.get_shorty_lossy_at(proto_idx as ProtoIndex)?, method_idx, proto_idx @@ -372,7 +390,7 @@ impl<'a> Instruction<'a> { "{opcode} {{v{} .. v{}}}, {}, {} // method@{}, proto@{}", args_range.start(), args_range.end(), - dex.pretty_method_at(method_idx, true), + dex.pretty_method_at(method_idx, pretty_opts::Method::WithSig), dex.get_shorty_lossy_at(proto_idx as ProtoIndex)?, method_idx, proto_idx From 7e3a65df5358f2d95ccb3be73b990bc9911cdebc Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 8 Feb 2025 11:58:59 +0100 Subject: [PATCH 15/46] renamed pretty_opts to prettify --- src/file/dump.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/file/dump.rs b/src/file/dump.rs index 034864b..d4209d1 100644 --- a/src/file/dump.rs +++ b/src/file/dump.rs @@ -6,7 +6,7 @@ use crate::{ use super::{vreg, Code, DexContainer, DexFile, Format, Instruction, MethodId, StringId, TypeId}; -pub mod pretty_opts { +pub mod prettify { #[derive(Copy, Clone, PartialEq, Eq)] pub enum Field { @@ -22,17 +22,17 @@ pub mod pretty_opts { } impl<'a, C: DexContainer<'a>> DexFile<'a, C> { - pub fn pretty_field(&self, field_idx: u32, opts: pretty_opts::Field) -> String { + pub fn pretty_field(&self, field_idx: u32, opts: prettify::Field) -> String { match self.pretty_field_opt(field_idx, opts) { Ok(s) => s, Err(_) => format!("<>"), } } - pub fn pretty_field_opt(&self, field_idx: u32, opts: pretty_opts::Field) -> Result { + pub fn pretty_field_opt(&self, field_idx: u32, opts: prettify::Field) -> Result { let field_id = self.get_field_id(field_idx)?; let mut result = String::new(); - if opts == pretty_opts::Field::WithType { + if opts == prettify::Field::WithType { result.push_str(&self.pretty_type_opt_at(field_id.type_idx)?); result.push_str(" "); } @@ -72,26 +72,26 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { }) } - pub fn pretty_method_at(&self, method_idx: u32, opts: pretty_opts::Method) -> String { + pub fn pretty_method_at(&self, method_idx: u32, opts: prettify::Method) -> String { match self.pretty_method_opt_at(method_idx, opts) { Ok(s) => s, Err(_) => format!("<>"), } } - pub fn pretty_method_opt_at(&self, idx: u32, opts: pretty_opts::Method) -> Result { + pub fn pretty_method_opt_at(&self, idx: u32, opts: prettify::Method) -> Result { self.pretty_method_opt(self.get_method_id(idx)?, opts) } pub fn pretty_method_opt( &self, method_id: &MethodId, - opts: pretty_opts::Method, + opts: prettify::Method, ) -> Result { let mut result = String::new(); let proto_id = match opts { - pretty_opts::Method::WithSig => Some(self.get_proto_id(method_id.proto_idx)?), - pretty_opts::Method::NoSig => None, + prettify::Method::WithSig => Some(self.get_proto_id(method_id.proto_idx)?), + prettify::Method::NoSig => None, }; if let Some(proto_id) = proto_id { @@ -199,7 +199,7 @@ impl<'a> Instruction<'a> { format!( "{opcode} v{}, {} // field@{}", vreg::A(self)?, - dex.pretty_field(field_idx, pretty_opts::Field::WithType), + dex.pretty_field(field_idx, prettify::Field::WithType), field_idx ) } @@ -238,7 +238,7 @@ impl<'a> Instruction<'a> { "{opcode} v{}, v{}, {} // field@{}", vreg::A(self)?, vreg::B(self)?, - dex.pretty_field(index, pretty_opts::Field::WithType), + dex.pretty_field(index, prettify::Field::WithType), index ) } @@ -305,7 +305,7 @@ impl<'a> Instruction<'a> { ) => { format!( "{opcode} {{{args_str}}}, {} // method@{}", - dex.pretty_method_at(index, pretty_opts::Method::WithSig), + dex.pretty_method_at(index, prettify::Method::WithSig), index ) } @@ -333,7 +333,7 @@ impl<'a> Instruction<'a> { "{opcode} {{v{} .. v{}}}, {} // method@{}", var_range.start(), var_range.end(), - dex.pretty_method_at(index, pretty_opts::Method::WithSig), + dex.pretty_method_at(index, prettify::Method::WithSig), index ) } @@ -368,7 +368,7 @@ impl<'a> Instruction<'a> { if let Some(dex) = dex_file { format!( "{opcode} {{{args_str}}}, {}, {} // method@{}, proto@{}", - dex.pretty_method_at(method_idx, pretty_opts::Method::WithSig), + dex.pretty_method_at(method_idx, prettify::Method::WithSig), dex.get_shorty_lossy_at(proto_idx as ProtoIndex)?, method_idx, proto_idx @@ -390,7 +390,7 @@ impl<'a> Instruction<'a> { "{opcode} {{v{} .. v{}}}, {}, {} // method@{}, proto@{}", args_range.start(), args_range.end(), - dex.pretty_method_at(method_idx, pretty_opts::Method::WithSig), + dex.pretty_method_at(method_idx, prettify::Method::WithSig), dex.get_shorty_lossy_at(proto_idx as ProtoIndex)?, method_idx, proto_idx From b202c67f772c87fd7c3feb676b104d327f0b3170 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 8 Feb 2025 12:37:26 +0100 Subject: [PATCH 16/46] TryItem implementation and mutable dex file preparations --- src/file/code_item_accessors.rs | 11 ++++- src/file/container.rs | 20 ++++++-- src/file/mod.rs | 86 +++++++++++++++++++++++++++------ src/file/structs.rs | 12 ++++- 4 files changed, 108 insertions(+), 21 deletions(-) diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index 699e30d..ac3f702 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -3,6 +3,7 @@ use crate::Result; use super::{CodeItem, DexContainer, DexFile, Instruction}; pub struct CodeItemAccessor<'a> { + code_off: u32, code_item: &'a CodeItem, insns: &'a [u16], } @@ -41,7 +42,15 @@ impl<'a> CodeItemAccessor<'a> { 0 => &[], _ => dex.get_insns_raw(code_off, code_item.insns_size)?, }; - Ok(CodeItemAccessor { code_item, insns }) + Ok(CodeItemAccessor { + code_off, + code_item, + insns, + }) + } + + pub fn code_off(&self) -> u32 { + self.code_off } pub fn code_item(&self) -> &'a CodeItem { diff --git a/src/file/container.rs b/src/file/container.rs index da2fc7a..7919f71 100644 --- a/src/file/container.rs +++ b/src/file/container.rs @@ -1,6 +1,5 @@ -use std::ops::Deref; - -use memmap2::MmapAsRawDesc; +use std::ops::{Deref, DerefMut}; +use memmap2::{MmapAsRawDesc, MmapMut}; use crate::Result; @@ -16,6 +15,12 @@ pub trait DexContainer<'a>: AsRef<[u8]> + Deref + 'a { } } +pub trait DexContainerMut<'a>: DexContainer<'a> + DerefMut { + fn data_mut(&'a mut self) -> &'a mut [u8] { + self.deref_mut() + } +} + impl<'a> DexContainer<'a> for memmap2::Mmap {} pub struct InMemoryDexContainer<'a>(&'a [u8]); @@ -90,3 +95,12 @@ impl DexFileContainer { &self.mmap } } + +impl DexContainer<'_> for MmapMut {} +impl DexContainerMut<'_> for MmapMut {} + +impl<'a> DexContainer<'a> for &'a mut [u8] {} +impl<'a> DexContainerMut<'a> for &'a mut [u8] {} + +impl DexContainer<'_> for Vec {} +impl DexContainerMut<'_> for Vec {} diff --git a/src/file/mod.rs b/src/file/mod.rs index 55ec78a..b8ca20c 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -73,19 +73,6 @@ pub struct DexFile<'a, T: DexContainer<'a> = Mmap> { location: DexLocation, } -macro_rules! check_lt { - ($idx:expr, $count:expr, $item_ty:tt) => { - if $idx >= $count { - panic!( - "Index({}) of {} is bigger than maximum({})", - $idx, - stringify!($item_ty), - $count - ); - } - }; -} - macro_rules! check_lt_result { ($idx:expr, $count:expr, $item_ty:tt) => { if ($idx as usize) >= ($count as usize) { @@ -98,6 +85,11 @@ macro_rules! check_lt_result { }; } +// writer +impl<'a, C: DexContainerMut<'a>> DexFile<'a, C> { + //TODO +} + impl<'a, C: DexContainer<'a>> DexFile<'a, C> { #[inline] fn header_available(base: &'a C) -> bool { @@ -360,6 +352,9 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } // method ids related methods + //------------------------------------------------------------------------------ + // Method Ids + //------------------------------------------------------------------------------ #[inline(always)] pub fn get_method_id(&self, idx: u32) -> Result<&'a MethodId> { check_lt_result!(idx, self.header.method_ids_size, MethodId); @@ -378,11 +373,70 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { // classdef related methods #[inline(always)] - pub fn get_class_def(&self, idx: u32) -> &'a ClassDef { - check_lt!(idx, self.header.class_defs_size, ClassDef); - &self.class_defs[idx as usize] + pub fn get_class_def(&self, idx: u32) -> Result<&'a ClassDef> { + check_lt_result!(idx, self.header.class_defs_size, ClassDef); + Ok(&self.class_defs[idx as usize]) + } + + //------------------------------------------------------------------------------ + // Method Handles + //------------------------------------------------------------------------------ + #[inline(always)] + pub fn get_method_handle(&self, idx: u32) -> Result<&'a MethodHandleItem> { + check_lt_result!(idx, self.method_handles.len(), MethodHandleItem); + Ok(&self.method_handles[idx as usize]) + } + + #[inline(always)] + pub fn num_method_handles(&self) -> u32 { + self.method_handles.len() as u32 + } + + #[inline(always)] + pub fn get_method_handles(&self) -> &'a [MethodHandleItem] { + self.method_handles + } + + //------------------------------------------------------------------------------ + // CallSites + //------------------------------------------------------------------------------ + #[inline(always)] + pub fn get_call_site_id(&self, idx: u32) -> Result<&'a CallSiteIdItem> { + check_lt_result!(idx, self.call_site_ids.len(), CallSiteIdItem); + Ok(&self.call_site_ids[idx as usize]) + } + + #[inline(always)] + pub fn num_call_site_ids(&self) -> u32 { + self.call_site_ids.len() as u32 + } + + #[inline(always)] + pub fn get_call_site_ids(&self) -> &'a [CallSiteIdItem] { + self.call_site_ids + } + + //------------------------------------------------------------------------------ + // TryItem + //------------------------------------------------------------------------------ + pub fn get_try_item(&'a self, ca: &CodeItemAccessor<'_>) -> Result<&'a [TryItem]> { + let offset = (ca.code_off() as usize) + + std::mem::size_of::() + + ca.insns_size_in_code_units() as usize; + // must be 4-byte aligned + let offset = (offset + 3) & !3; + self.get_try_items_raw(offset as u32, ca.tries_size() as u16) + } + + #[inline] + pub fn get_try_items_raw(&'a self, tries_off: u32, tries_size: u16) -> Result<&'a [TryItem]> { + check_lt_result!(tries_off, self.file_size(), TryItem); + self.non_null_array_data_ptr(tries_off, tries_size as usize) } + //------------------------------------------------------------------------------ + // ClassDefs + //------------------------------------------------------------------------------ #[inline(always)] pub fn num_class_defs(&self) -> u32 { self.header.class_defs_size diff --git a/src/file/structs.rs b/src/file/structs.rs index 28f9377..cf3e400 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -178,4 +178,14 @@ pub struct CodeItem { } -unsafe impl plain::Plain for CodeItem {} \ No newline at end of file +unsafe impl plain::Plain for CodeItem {} + +#[repr(C)] +#[derive(Debug)] +pub struct TryItem { + pub start_addr: u32, + pub insn_count: u16, + pub handler_off: u16, +} + +unsafe impl plain::Plain for TryItem {} \ No newline at end of file From 7e8119d9cd9e2e8c2061f6f8bde498544e1ae820 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 8 Feb 2025 16:19:48 +0100 Subject: [PATCH 17/46] Add benchmark support --- + Changed leb128 parsing to return a Result + Removed unused DexFile field in Method, ClassDataAccessor and Field structs + ClassDataAccessor can be used on all DexFile variants --- Cargo.toml | 7 ++ benches/parse.rs | 34 ++++++ fuzz/Cargo.toml | 7 ++ fuzz/fuzz_targets/class_accessor.rs | 21 ++++ fuzz/fuzz_targets/from_raw_parts.rs | 8 +- src/error.rs | 19 ++- src/file/class_accessor.rs | 176 +++++++++++++++------------- src/file/mod.rs | 15 ++- src/file/structs.rs | 41 ++++++- src/leb128.rs | 28 ++--- 10 files changed, 254 insertions(+), 102 deletions(-) create mode 100644 benches/parse.rs create mode 100644 fuzz/fuzz_targets/class_accessor.rs diff --git a/Cargo.toml b/Cargo.toml index 6f0ec3d..8daa2be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,10 @@ openssl = "0.10.64" plain = "0.2.3" thiserror = "2.0.11" varint-simd = "0.4.1" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } + +[[bench]] +name = "parse" +harness = false \ No newline at end of file diff --git a/benches/parse.rs b/benches/parse.rs new file mode 100644 index 0000000..a05d5a7 --- /dev/null +++ b/benches/parse.rs @@ -0,0 +1,34 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use dexrs::file::{DexFile, DexLocation, InMemoryDexContainer}; +use std::hint::black_box; + +fn parse_and_verify_small_file(c: &mut Criterion) { + let data = include_bytes!("../tests/prime/prime.dex"); + c.bench_function("parse_and_verify_small_file", |b| { + b.iter(|| { + let buf = black_box(data); + let container = InMemoryDexContainer::new(buf); + if let Ok(dex) = DexFile::from_raw_parts(&container, DexLocation::InMemory) { + if DexFile::verify(&dex, true).is_ok() { + black_box(dex); + } + } + }) + }); +} + +fn parse_small_file(c: &mut Criterion) { + let data = include_bytes!("../tests/prime/prime.dex"); + c.bench_function("parse_small_file", |b| { + b.iter(|| { + let buf = black_box(data); + let container = InMemoryDexContainer::new(buf); + if let Ok(dex) = DexFile::from_raw_parts(&container, DexLocation::InMemory) { + black_box(dex); + } + }) + }); +} + +criterion_group!(benches, parse_and_verify_small_file, parse_small_file); +criterion_main!(benches); diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 4ee7be8..3989198 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -19,3 +19,10 @@ path = "fuzz_targets/from_raw_parts.rs" test = false doc = false bench = false + +[[bin]] +name = "class_accessor" +path = "fuzz_targets/class_accessor.rs" +test = false +doc = false +bench = false \ No newline at end of file diff --git a/fuzz/fuzz_targets/class_accessor.rs b/fuzz/fuzz_targets/class_accessor.rs new file mode 100644 index 0000000..8539f54 --- /dev/null +++ b/fuzz/fuzz_targets/class_accessor.rs @@ -0,0 +1,21 @@ +#![no_main] +#![allow(non_snake_case)] + +use dexrs::file::{DexFile, DexLocation}; + +extern crate dexrs; +extern crate libfuzzer_sys; + +libfuzzer_sys::fuzz_target!(|data: &[u8]| { + // this must not panic + if let Ok(dex) = DexFile::from_raw_parts(&data, DexLocation::InMemory) { + // we skip verification to test class_accessor here + if let Ok(class_def) = dex.get_class_def(0) { + if let Ok(Some(ca)) = dex.get_class_accessor(&class_def) { + // must not panic + let _fields = ca.get_fields(); + let _methods = ca.get_methods(); + } + } + } +}); diff --git a/fuzz/fuzz_targets/from_raw_parts.rs b/fuzz/fuzz_targets/from_raw_parts.rs index 647c232..8242b02 100644 --- a/fuzz/fuzz_targets/from_raw_parts.rs +++ b/fuzz/fuzz_targets/from_raw_parts.rs @@ -1,14 +1,16 @@ #![no_main] #![allow(non_snake_case)] -use dexrs::file::{DexFile, DexLocation, InMemoryDexContainer}; +use dexrs::file::{DexFile, DexLocation}; -extern crate libfuzzer_sys; extern crate dexrs; +extern crate libfuzzer_sys; libfuzzer_sys::fuzz_target!(|data: &[u8]| { // this must not panic if let Ok(dex) = DexFile::from_raw_parts(&data, DexLocation::InMemory) { - let _ = dex; + if DexFile::verify(&dex, true).is_ok() { + let _ = dex; + } } }); diff --git a/src/error.rs b/src/error.rs index 4b274eb..155f5ec 100644 --- a/src/error.rs +++ b/src/error.rs @@ -63,7 +63,7 @@ pub enum DexError { }, #[error("Bad string data({0}) does not end with a null byte!")] - BadStringData(usize), + BadStringDataMissingNullByte(usize), #[error("{0}")] Mutf8DecodeError(#[from] std::string::FromUtf16Error), @@ -84,6 +84,23 @@ pub enum DexError { insn_name: &'static str, operand: &'static str, }, + + #[error("Failed to parse varint: {0}")] + VarIntError(#[from] varint_simd::VarIntDecodeError), + + #[error("Bad string data({offset}) contains invalid LEB128({kind:?}) which can't be converted to a valid u32")] + BadStringData { + offset: usize, + #[source] + kind: varint_simd::VarIntDecodeError, + }, + + #[error("Encountered invalid encoded index that would overflow: index({index}) + next index({next_index}) > u32::MAX for {item_ty}")] + BadEncodedIndex { + index: u32, + next_index: u32, + item_ty: &'static str, + }, } #[macro_export] diff --git a/src/file/class_accessor.rs b/src/file/class_accessor.rs index ede5189..3ba2e02 100644 --- a/src/file/class_accessor.rs +++ b/src/file/class_accessor.rs @@ -1,29 +1,27 @@ -use super::{ClassDef, DexFile, FieldId, InvokeType, MethodId, ACC_STATIC}; +use super::{ClassDef, DexContainer, DexFile, InvokeType, ACC_STATIC}; use crate::{ + dex_err, + error::DexError, file::{ACC_CONSTRUCTOR, ACC_INTERFACE}, leb128::decode_leb128_off, Result, }; -pub trait ClassItemBase<'a>: Copy + Clone { - fn read(&mut self, data: &'a [u8], pos: &mut usize); - - fn init(dex: &'a DexFile<'a>) -> Self; +pub trait ClassItemBase: Copy + Clone + Default { + fn read(&mut self, data: &[u8], pos: &mut usize) -> Result<()>; fn next_section(&mut self); } #[derive(Copy, Clone)] -pub struct Method<'a> { - dex: &'a DexFile<'a>, - +pub struct Method { pub index: u32, pub access_flags: u32, pub code_offset: u32, pub is_static_or_direct: bool, } -impl<'a> Method<'a> { +impl<'a> Method { #[inline] pub fn get_direct_invoke_type(&self) -> InvokeType { if self.access_flags & ACC_STATIC != 0 { @@ -33,11 +31,6 @@ impl<'a> Method<'a> { } } - #[inline(always)] - pub fn get_method_id(&self) -> Result<&'a MethodId> { - self.dex.get_method_id(self.index) - } - #[inline(always)] pub fn get_virtual_invoke_type(&self, class_access_flags: u32) -> InvokeType { debug_assert!(self.access_flags & ACC_STATIC == 0x00); @@ -51,70 +44,84 @@ impl<'a> Method<'a> { } } -impl<'a> ClassItemBase<'a> for Method<'a> { - fn read(&mut self, data: &'a [u8], pos: &mut usize) { - self.index += decode_leb128_off::(&data[*pos..], pos); - self.access_flags = decode_leb128_off::(&data[*pos..], pos); - self.code_offset = decode_leb128_off::(&data[*pos..], pos); +impl<'a> ClassItemBase for Method { + fn read(&mut self, data: &'_ [u8], pos: &mut usize) -> Result<()> { + let target = self.index as usize; + let value = decode_leb128_off::(&data[*pos..], pos)?; + if target + value as usize > u32::MAX as usize { + return dex_err!(BadEncodedIndex { + index: self.index, + next_index: value, + item_ty: "Method" + }); + } + self.index += value; + self.access_flags = decode_leb128_off::(&data[*pos..], pos)?; + self.code_offset = decode_leb128_off::(&data[*pos..], pos)?; + Ok(()) + } + + fn next_section(&mut self) { + self.is_static_or_direct = true; } +} - fn init(dex: &'a DexFile<'a>) -> Self { +impl Default for Method { + fn default() -> Self { Self { - dex, index: 0, access_flags: 0, code_offset: 0, is_static_or_direct: false, } } - - fn next_section(&mut self) { - self.is_static_or_direct = true; - } } - #[derive(Copy, Clone)] -pub struct Field<'a> { - dex: &'a DexFile<'a>, - +pub struct Field { pub index: u32, pub access_flags: u32, pub is_static: bool, } -impl<'a> Field<'a> { +impl<'a> Field { #[inline(always)] pub fn is_static(&self) -> bool { self.is_static } +} - pub fn get_field_id(&self) -> Result<&'a FieldId> { - self.dex.get_field_id(self.index) +impl<'a> ClassItemBase for Field { + fn read(&mut self, data: &'_ [u8], pos: &mut usize) -> Result<()> { + let target = self.index as usize; + let value = decode_leb128_off::(&data[*pos..], pos)?; + if target + value as usize > u32::MAX as usize { + return dex_err!(BadEncodedIndex { + index: self.index, + next_index: value, + item_ty: "Field" + }); + } + self.index += value; + self.access_flags = decode_leb128_off::(&data[*pos..], pos)?; + Ok(()) } -} -impl<'a> ClassItemBase<'a> for Field<'a> { - fn read(&mut self, data: &'a [u8], pos: &mut usize) { - self.index += decode_leb128_off::(&data[*pos..], pos); - self.access_flags = decode_leb128_off::(&data[*pos..], pos); + fn next_section(&mut self) { + self.is_static = false; } +} - fn init(dex: &'a DexFile<'a>) -> Self { +impl Default for Field { + fn default() -> Self { Self { - dex, index: 0, access_flags: 0, is_static: true, } } - - fn next_section(&mut self) { - self.is_static = false; - } } pub struct ClassAccessor<'a> { - dex: &'a DexFile<'a>, ptr_pos: usize, class_data: &'a [u8], @@ -127,30 +134,38 @@ pub struct ClassAccessor<'a> { static_fields_off: u32, } -impl<'a> DexFile<'a> { - pub fn get_class_accessor(&self, class_def: &ClassDef) -> Option> { +impl<'a, C: DexContainer<'a>> DexFile<'a, C> { + pub fn get_class_accessor(&self, class_def: &ClassDef) -> Result>> { match class_def.class_data_off { - 0 => None, - off => Some(ClassAccessor::from_raw(self, &self.mmap[off as usize..])), + 0 => Ok(None), + off => { + if off as usize >= self.file_size() { + return dex_err!(BadOffsetTooLarge { + offset: off, + size: self.file_size(), + section: "class_data_off" + }); + } + Ok(Some(ClassAccessor::from_raw(&self.mmap[off as usize..])?)) + } } } } -type FieldVisitor = fn(&Field<'_>) -> Result<()>; -type MethodVisitor = fn(&Method<'_>) -> Result<()>; +type FieldVisitor = fn(&Field) -> Result<()>; +type MethodVisitor = fn(&Method) -> Result<()>; -fn null_method_visitor(_method: &Method<'_>) -> Result<()> { +fn null_method_visitor(_method: &Method) -> Result<()> { Ok(()) } -fn null_field_visitor(_field: &Field<'_>) -> Result<()> { +fn null_field_visitor(_field: &Field) -> Result<()> { Ok(()) } impl<'a> ClassAccessor<'a> { - pub fn from_raw(dex: &'a DexFile<'a>, class_data: &'a [u8]) -> Self { + pub fn from_raw(class_data: &'a [u8]) -> Result { let mut accessor = Self { - dex, ptr_pos: 0, class_data, num_direct_methods: 0, @@ -159,15 +174,15 @@ impl<'a> ClassAccessor<'a> { num_instance_fields: 0, static_fields_off: 0, }; - accessor.num_static_fields = decode_leb128_off(&class_data, &mut accessor.ptr_pos); + accessor.num_static_fields = decode_leb128_off(&class_data, &mut accessor.ptr_pos)?; accessor.num_instance_fields = - decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); + decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos)?; accessor.num_direct_methods = - decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); + decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos)?; accessor.num_virtual_methods = - decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos); + decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos)?; accessor.static_fields_off = accessor.ptr_pos as u32; - accessor + Ok(accessor) } #[inline(always)] @@ -216,7 +231,7 @@ impl<'a> ClassAccessor<'a> { direct_method_visitor: MethodVisitor, virtual_method_visitor: MethodVisitor, ) -> Result<()> { - let mut field = Field::init(self.dex); + let mut field = Field::default(); let mut offset = self.static_fields_off as usize; if offset == 0 { panic!("Static fields offset is zero which means there is no class data associated with this class"); @@ -237,7 +252,7 @@ impl<'a> ClassAccessor<'a> { &mut field, )?; - let mut method = Method::init(self.dex); + let mut method = Method::default(); self.visit_members( self.num_direct_methods, &mut offset, @@ -254,9 +269,8 @@ impl<'a> ClassAccessor<'a> { } #[inline(always)] - pub fn get_fields(&self) -> DataIterator<'a, Field<'a>> { + pub fn get_fields(&'a self) -> DataIterator<'a, Field> { DataIterator::new( - self.dex, self.class_data, self.static_fields_off as usize, self.num_static_fields as usize, @@ -265,9 +279,8 @@ impl<'a> ClassAccessor<'a> { } #[inline(always)] - pub fn get_static_fieds(&self) -> impl Iterator> { + pub fn get_static_fieds(&'a self) -> DataIterator<'a, Field> { DataIterator::new( - self.dex, self.class_data, self.static_fields_off as usize, self.num_static_fields as usize, @@ -276,13 +289,13 @@ impl<'a> ClassAccessor<'a> { } #[inline(always)] - pub fn get_instance_fields(&self) -> impl Iterator> { + pub fn get_instance_fields(&'a self) -> impl Iterator + 'a { self.get_fields().skip(self.num_static_fields as usize) } #[inline(always)] - pub fn get_methods(&self) -> Result>> { - let mut field = Field::init(self.dex); + pub fn get_methods(&self) -> Result + 'a> { + let mut field = Field::default(); let mut offset = self.static_fields_off as usize; self.visit_members( self.num_fields() as u32, @@ -292,7 +305,6 @@ impl<'a> ClassAccessor<'a> { )?; // switch to instance fields Ok(DataIterator::new( - self.dex, self.class_data, offset as usize, self.num_direct_methods as usize, @@ -301,12 +313,12 @@ impl<'a> ClassAccessor<'a> { } #[inline(always)] - pub fn get_direct_methods(&self) -> Result>> { + pub fn get_direct_methods(&self) -> Result + 'a> { Ok(self.get_methods()?.take(self.num_direct_methods as usize)) } #[inline(always)] - pub fn get_virtual_methods(&self) -> Result>> { + pub fn get_virtual_methods(&self) -> Result + 'a> { Ok(self.get_methods()?.skip(self.num_direct_methods as usize)) } @@ -319,18 +331,18 @@ impl<'a> ClassAccessor<'a> { iter: &mut T, ) -> Result<()> where - T: ClassItemBase<'a>, + T: ClassItemBase, F: Fn(&T) -> Result<()>, { for _ in 0..count { - iter.read(&self.class_data, offset); + iter.read(&self.class_data, offset)?; visitor(&iter)?; } Ok(()) } } -pub struct DataIterator<'a, T: ClassItemBase<'a>> { +pub struct DataIterator<'a, T: ClassItemBase> { class_data: &'a [u8], value: T, @@ -340,9 +352,8 @@ pub struct DataIterator<'a, T: ClassItemBase<'a>> { end_pos: usize, // const } -impl<'a, T: ClassItemBase<'a>> DataIterator<'a, T> { +impl<'a, T: ClassItemBase> DataIterator<'a, T> { pub fn new( - dex: &'a DexFile<'a>, class_data: &'a [u8], start_pos: usize, partition_pos: usize, @@ -350,7 +361,7 @@ impl<'a, T: ClassItemBase<'a>> DataIterator<'a, T> { ) -> Self { Self { class_data, - value: T::init(dex), + value: T::default(), pos: 0, partition_pos, off: start_pos, @@ -367,7 +378,7 @@ impl<'a, T: ClassItemBase<'a>> DataIterator<'a, T> { } } -impl<'a, T: ClassItemBase<'a>> Iterator for DataIterator<'a, T> { +impl<'a, T: ClassItemBase> Iterator for DataIterator<'a, T> { type Item = T; fn next(&mut self) -> Option { @@ -375,7 +386,14 @@ impl<'a, T: ClassItemBase<'a>> Iterator for DataIterator<'a, T> { if self.pos == self.partition_pos { self.value.next_section(); } - self.value.read(&self.class_data, &mut self.off); + match self.value.read(&self.class_data, &mut self.off) { + Ok(()) => {} + Err(e) => { + self.pos = self.end_pos; + // REVISIT: error propagation + return None; + } + } self.pos += 1; return Some(self.value); } diff --git a/src/file/mod.rs b/src/file/mod.rs index b8ca20c..52543aa 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -201,13 +201,22 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { #[inline] pub fn get_string_data(&self, string_id: &StringId) -> Result<(u32, &'a [u8])> { check_lt_result!(string_id.offset(), self.file_size(), "string-id"); - let (utf16_len, size) = decode_leb128(&self.mmap[string_id.offset()..]); + let (utf16_len, size) = match decode_leb128(&self.mmap[string_id.offset()..]) { + Ok((utf16_len, size)) => (utf16_len, size), + Err(DexError::VarIntError(e)) => { + return dex_err!(BadStringData { + offset: string_id.offset(), + kind: e + }); + } + _ => unreachable!(), + }; let start = string_id.offset() + size; check_lt_result!(start, self.file_size(), "string-data"); match &self.mmap[start..].iter().position(|x| *x == 0) { Some(pos) => Ok((utf16_len, &self.mmap[start..start + pos + 1])), - None => dex_err!(BadStringData, start), + None => dex_err!(BadStringDataMissingNullByte, start), } } @@ -374,7 +383,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { // classdef related methods #[inline(always)] pub fn get_class_def(&self, idx: u32) -> Result<&'a ClassDef> { - check_lt_result!(idx, self.header.class_defs_size, ClassDef); + check_lt_result!(idx, self.class_defs.len(), ClassDef); Ok(&self.class_defs[idx as usize]) } diff --git a/src/file/structs.rs b/src/file/structs.rs index cf3e400..cf5ae2c 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -90,6 +90,7 @@ pub type TypeList<'a> = &'a [TypeItem]; #[repr(C)] #[derive(Debug)] pub struct MapItem { + // REVISIT: this may cause a panic on invalid input pub type_: MapItemType, unused_: u16, pub size: u32, @@ -188,4 +189,42 @@ pub struct TryItem { pub handler_off: u16, } -unsafe impl plain::Plain for TryItem {} \ No newline at end of file +unsafe impl plain::Plain for TryItem {} + +#[repr(C)] +#[derive(Debug)] +pub struct AnnotationsDirectoryItem { + pub class_annotations_off: u32, + pub fields_size: u32, + pub methods_size: u32, + pub parameters_size: u32, +} + +unsafe impl plain::Plain for AnnotationsDirectoryItem {} + +#[repr(C)] +#[derive(Debug)] +pub struct FieldAnnotationsItem { + pub field_idx: u32, + pub annotations_off: u32, +} + +unsafe impl plain::Plain for FieldAnnotationsItem {} + +#[repr(C)] +#[derive(Debug)] +pub struct MethodAnnotationsItem { + pub method_idx: u32, + pub annotations_off: u32, +} + +unsafe impl plain::Plain for MethodAnnotationsItem {} + +#[repr(C)] +#[derive(Debug)] +pub struct ParameterAnnotationsItem { + pub method_idx: u32, + pub annotations_off: u32, +} + +unsafe impl plain::Plain for ParameterAnnotationsItem {} \ No newline at end of file diff --git a/src/leb128.rs b/src/leb128.rs index 64984a6..5690b65 100644 --- a/src/leb128.rs +++ b/src/leb128.rs @@ -1,27 +1,25 @@ use varint_simd; +use crate::Result; + #[inline(always)] -pub fn decode_leb128(data_in: &[u8]) -> (T, usize) { +pub fn decode_leb128(data_in: &[u8]) -> Result<(T, usize)> { // TODO: convert to result - match varint_simd::decode::(data_in) { - Ok((value, size)) => (value, size), - Err(err) => panic!( - "Error decoding LEB128: {:?}. Data: {:?}", - err, - data_in.as_ptr() - ), - } + Ok(varint_simd::decode::(data_in)?) } #[inline(always)] -pub fn decode_leb128p1(data_in: &[u8]) -> (i32, usize) { - let (result, size) = decode_leb128::(data_in); - ((result - 1) as i32, size) +pub fn decode_leb128p1(data_in: &[u8]) -> Result<(i32, usize)> { + let (result, size) = decode_leb128::(data_in)?; + Ok(((result - 1) as i32, size)) } #[inline(always)] -pub fn decode_leb128_off(data_in: &[u8], ptr_pos: &mut usize) -> T { - let (value, size) = decode_leb128(data_in); +pub fn decode_leb128_off( + data_in: &[u8], + ptr_pos: &mut usize, +) -> Result { + let (value, size) = decode_leb128(data_in)?; *ptr_pos += size; - value + Ok(value) } From c446ac28948a1dabdda1e56f1bc35e9d7bdff41e Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 8 Feb 2025 17:37:19 +0100 Subject: [PATCH 18/46] Annotation accessor for class data --- benches/parse.rs | 19 ++++--- src/file/annotations.rs | 109 +++++++++++++++++++++++++++++++++++++ src/file/class_accessor.rs | 2 +- src/file/mod.rs | 61 +++++++++++++++++---- src/file/structs.rs | 5 +- src/file/verifier.rs | 1 + 6 files changed, 176 insertions(+), 21 deletions(-) create mode 100644 src/file/annotations.rs diff --git a/benches/parse.rs b/benches/parse.rs index a05d5a7..8d47f3c 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -1,16 +1,17 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use dexrs::file::{DexFile, DexLocation, InMemoryDexContainer}; -use std::hint::black_box; +use dexrs::file::{DexFile, DexLocation, Header, InMemoryDexContainer}; fn parse_and_verify_small_file(c: &mut Criterion) { let data = include_bytes!("../tests/prime/prime.dex"); c.bench_function("parse_and_verify_small_file", |b| { b.iter(|| { - let buf = black_box(data); - let container = InMemoryDexContainer::new(buf); + let container = InMemoryDexContainer::new(data); if let Ok(dex) = DexFile::from_raw_parts(&container, DexLocation::InMemory) { if DexFile::verify(&dex, true).is_ok() { - black_box(dex); + assert_eq!( + dex.expected_header_size(), + std::mem::size_of::
() as u32 + ); } } }) @@ -21,10 +22,12 @@ fn parse_small_file(c: &mut Criterion) { let data = include_bytes!("../tests/prime/prime.dex"); c.bench_function("parse_small_file", |b| { b.iter(|| { - let buf = black_box(data); - let container = InMemoryDexContainer::new(buf); + let container = InMemoryDexContainer::new(data); if let Ok(dex) = DexFile::from_raw_parts(&container, DexLocation::InMemory) { - black_box(dex); + assert_eq!( + dex.expected_header_size(), + std::mem::size_of::
() as u32 + ); } }) }); diff --git a/src/file/annotations.rs b/src/file/annotations.rs new file mode 100644 index 0000000..1957ab1 --- /dev/null +++ b/src/file/annotations.rs @@ -0,0 +1,109 @@ +use crate::Result; + +use super::{ + AnnotationSetItem, AnnotationsDirectoryItem, ClassDef, DexContainer, DexFile, + FieldAnnotationsItem, MethodAnnotationsItem, ParameterAnnotationsItem, +}; + +pub struct ClassAnnotationsAccessor<'a> { + class_def: &'a ClassDef, + + field_annotations: &'a [FieldAnnotationsItem], + method_annotations: &'a [MethodAnnotationsItem], + parameter_annotations: &'a [ParameterAnnotationsItem], + class_annotations: AnnotationSetItem<'a>, +} + +impl<'a, C: DexContainer<'a>> DexFile<'a, C> { + pub fn class_annotations( + &'a self, + class_def: &'a ClassDef, + ) -> Result> { + ClassAnnotationsAccessor::new(self, class_def) + } +} + +macro_rules! read_annotations { + ($dex:ident, $offset:ident, $size:expr, $ty:ty) => { + match $size { + 0 => &[], + s => $dex.non_null_array_data_ptr::<$ty>($offset as u32, s as usize)?, + } + }; +} + +impl<'a> ClassAnnotationsAccessor<'a> { + pub fn new(dex: &'a DexFile<'a, C>, class_def: &'a ClassDef) -> Result + where + C: DexContainer<'a>, + { + match dex.data_ptr::(class_def.annotations_off)? { + None => Ok(ClassAnnotationsAccessor::new_empty(class_def)), + Some(item) => { + let mut start_offset = class_def.annotations_off as usize + + std::mem::size_of::(); + + let field_annotations = + read_annotations!(dex, start_offset, item.fields_size, FieldAnnotationsItem); + start_offset += + item.fields_size as usize * std::mem::size_of::(); + + let method_annotations = + read_annotations!(dex, start_offset, item.methods_size, MethodAnnotationsItem); + start_offset += + item.methods_size as usize * std::mem::size_of::(); + + let parameter_annotations = read_annotations!( + dex, + start_offset, + item.parameters_size, + ParameterAnnotationsItem + ); + + let class_annotations = dex.get_annotation_set(item.class_annotations_off)?; + Ok(Self { + class_def, + field_annotations, + method_annotations, + parameter_annotations, + class_annotations, + }) + } + } + } + + pub fn new_empty(class_def: &'a ClassDef) -> Self { + Self { + class_def, + field_annotations: &[], + method_annotations: &[], + parameter_annotations: &[], + class_annotations: &[], + } + } + + #[inline] + pub fn get_class_def(&self) -> &'a ClassDef { + self.class_def + } + + #[inline] + pub fn get_field_annotations(&self) -> &'a [FieldAnnotationsItem] { + self.field_annotations + } + + #[inline] + pub fn get_method_annotations(&self) -> &'a [MethodAnnotationsItem] { + self.method_annotations + } + + #[inline] + pub fn get_parameter_annotations(&self) -> &'a [ParameterAnnotationsItem] { + self.parameter_annotations + } + + #[inline] + pub fn get_class_annotations(&self) -> AnnotationSetItem<'a> { + self.class_annotations + } +} diff --git a/src/file/class_accessor.rs b/src/file/class_accessor.rs index 3ba2e02..87b3386 100644 --- a/src/file/class_accessor.rs +++ b/src/file/class_accessor.rs @@ -388,7 +388,7 @@ impl<'a, T: ClassItemBase> Iterator for DataIterator<'a, T> { } match self.value.read(&self.class_data, &mut self.off) { Ok(()) => {} - Err(e) => { + Err(_) => { self.pos = self.end_pos; // REVISIT: error propagation return None; diff --git a/src/file/mod.rs b/src/file/mod.rs index 52543aa..91fef88 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -17,6 +17,8 @@ pub use code_item_accessors::*; pub mod container; pub mod dump; pub use container::*; +pub mod annotations; +pub use annotations::*; use crate::{dex_err, error::DexError, leb128::decode_leb128, utf, Result}; @@ -184,7 +186,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { // -- strings #[inline(always)] pub fn get_string_id(&self, idx: u32) -> Result<&'a StringId> { - check_lt_result!(idx, self.num_string_ids(), StringId); + check_lt_result!(idx, self.string_ids.len(), StringId); Ok(&self.string_ids[idx as usize]) } @@ -315,7 +317,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { // -- fields #[inline] pub fn get_field_id(&self, idx: u32) -> Result<&'a FieldId> { - check_lt_result!(idx, self.header.field_ids_size, FieldId); + check_lt_result!(idx, self.field_ids.len(), FieldId); Ok(&self.field_ids[idx as usize]) } @@ -330,7 +332,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { // Proto related methods pub fn get_proto_id(&self, idx: ProtoIndex) -> Result<&'a ProtoId> { - check_lt_result!(idx, self.header.proto_ids_size, ProtoId); + check_lt_result!(idx, self.proto_ids.len(), ProtoId); Ok(&self.proto_ids[idx as usize]) } @@ -366,7 +368,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { //------------------------------------------------------------------------------ #[inline(always)] pub fn get_method_id(&self, idx: u32) -> Result<&'a MethodId> { - check_lt_result!(idx, self.header.method_ids_size, MethodId); + check_lt_result!(idx, self.method_ids.len(), MethodId); Ok(&self.method_ids[idx as usize]) } @@ -443,6 +445,47 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.non_null_array_data_ptr(tries_off, tries_size as usize) } + //------------------------------------------------------------------------------ + // Annotations + //------------------------------------------------------------------------------ + // see implementation in annotations.rs for accessor + pub fn get_annotation_set(&'a self, off: u32) -> Result> { + // this will not panic if offset is zero + check_lt_result!(off, self.file_size(), AnnotationSetItem); + match self.data_ptr::(off)? { + None => Ok(&[]), + Some(size) => { + let off = off as usize + std::mem::size_of::(); + check_lt_result!(off, self.file_size(), AnnotationSetItem); + self.non_null_array_data_ptr(off as u32, *size as usize) + } + } + } + + #[inline(always)] + pub fn get_field_annotation_set( + &'a self, + anno_item: &FieldAnnotationsItem, + ) -> Result> { + self.get_annotation_set(anno_item.annotations_off) + } + + #[inline(always)] + pub fn get_method_annotation_set( + &'a self, + anno_item: &MethodAnnotationsItem, + ) -> Result> { + self.get_annotation_set(anno_item.annotations_off) + } + + #[inline(always)] + pub fn get_parameter_annotation_set( + &'a self, + anno_item: &ParameterAnnotationsItem, + ) -> Result> { + self.get_annotation_set(anno_item.annotations_off) + } + //------------------------------------------------------------------------------ // ClassDefs //------------------------------------------------------------------------------ @@ -487,15 +530,14 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { // private methods #[inline] - fn data_ptr(&self, offset: u32) -> Result> { + pub fn data_ptr(&self, offset: u32) -> Result> { match offset { 0 => Ok(None), _ => Ok(Some(self.non_null_data_ptr(offset)?)), } } - #[inline] - fn non_null_data_ptr(&self, offset: u32) -> Result<&'a T> { + pub fn non_null_data_ptr(&self, offset: u32) -> Result<&'a T> { if offset == 0 { panic!( "Attempted to read a null pointer for data type {:?}.", @@ -516,15 +558,14 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } #[inline] - fn array_data_ptr(&self, offset: u32, len: usize) -> Result> { + pub fn array_data_ptr(&self, offset: u32, len: usize) -> Result> { match offset { 0 => Ok(None), _ => Ok(Some(self.non_null_array_data_ptr(offset, len)?)), } } - #[inline] - fn non_null_array_data_ptr(&self, offset: u32, len: usize) -> Result<&'a [T]> { + pub fn non_null_array_data_ptr(&self, offset: u32, len: usize) -> Result<&'a [T]> { if offset == 0 { panic!( "Attempted to read a null pointer for data type {:?}.", diff --git a/src/file/structs.rs b/src/file/structs.rs index cf5ae2c..4216c66 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -178,7 +178,6 @@ pub struct CodeItem { pub insns_size: u32, } - unsafe impl plain::Plain for CodeItem {} #[repr(C)] @@ -227,4 +226,6 @@ pub struct ParameterAnnotationsItem { pub annotations_off: u32, } -unsafe impl plain::Plain for ParameterAnnotationsItem {} \ No newline at end of file +unsafe impl plain::Plain for ParameterAnnotationsItem {} + +pub type AnnotationSetItem<'a> = &'a [u32]; diff --git a/src/file/verifier.rs b/src/file/verifier.rs index e13916b..1fd958c 100644 --- a/src/file/verifier.rs +++ b/src/file/verifier.rs @@ -16,6 +16,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { DEX_MAGIC_VERSIONS.contains(&version_raw) } + // TODO: can be changed into enum pub fn verify(dex: &DexFile<'a, C>, verify_checksum: bool) -> Result<()> { check_header(dex, verify_checksum)?; // REVISIT: maybe validate map list items From f92dc80d91bb5504585018d60e4bf1559b125067 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 8 Feb 2025 18:28:23 +0100 Subject: [PATCH 19/46] fuzzung for instruction related operations --- fuzz/Cargo.toml | 10 +- fuzz/fuzz_targets/instructions.rs | 20 ++++ src/error.rs | 34 +++++++ src/file/code_item_accessors.rs | 16 ++++ src/file/dump.rs | 6 +- src/file/instruction.rs | 151 +++++++++++++++++++----------- src/main.rs | 92 ++++++++++++++++++ 7 files changed, 268 insertions(+), 61 deletions(-) create mode 100644 fuzz/fuzz_targets/instructions.rs create mode 100644 src/main.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 3989198..cf59b0d 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -9,6 +9,7 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4" +plain = "0.2.3" [dependencies.dexrs] path = ".." @@ -25,4 +26,11 @@ name = "class_accessor" path = "fuzz_targets/class_accessor.rs" test = false doc = false -bench = false \ No newline at end of file +bench = false + +[[bin]] +name = "instructions" +path = "fuzz_targets/instructions.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_targets/instructions.rs b/fuzz/fuzz_targets/instructions.rs new file mode 100644 index 0000000..4b6adf0 --- /dev/null +++ b/fuzz/fuzz_targets/instructions.rs @@ -0,0 +1,20 @@ +#![no_main] +#![allow(non_snake_case)] + +use dexrs::file::DexInstructionIterator; + +extern crate dexrs; +extern crate libfuzzer_sys; +extern crate plain; + +libfuzzer_sys::fuzz_target!(|data: &[u8]| { + if let Ok(bytes) = plain::slice_from_bytes::(data) { + // Two aspects + let iter = DexInstructionIterator::new(bytes); + for inst in iter { + if let Ok(inst_dump) = inst.to_string(None) { + assert!(inst_dump.len() > 0); + } + } + } +}); \ No newline at end of file diff --git a/src/error.rs b/src/error.rs index 155f5ec..f42d8c1 100644 --- a/src/error.rs +++ b/src/error.rs @@ -2,6 +2,8 @@ use std::fmt::Debug; use thiserror::Error; +use crate::file::Format; + #[derive(Error)] pub enum DexError { #[error("Empty or truncated file")] @@ -101,6 +103,38 @@ pub enum DexError { next_index: u32, item_ty: &'static str, }, + + #[error( + "{opcode}: Offset({offset}, relative code unit) should be within code stream size({size})" + )] + BadInstructionOffset { + opcode: &'static str, + offset: usize, + size: usize, + }, + + #[error("{opcode}: Could not fetch {target_type} at offset {offset} - code stream too small({size})")] + BadInstruction { + opcode: &'static str, + offset: usize, + size: usize, + target_type: &'static str, + }, + + #[error("{opcode}: Invalid argument count {count} for format {format:?}")] + InvalidArgCount { + opcode: &'static str, + format: &'static Format, + count: u8, + }, + + #[error("{opcode}: Invalid argument range {start}..{start}+{end} for format {format:?} - the range must at least cover one register")] + InvalidArgRange { + opcode: &'static str, + format: &'static Format, + start: u16, + end: u16, + }, } #[macro_export] diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index ac3f702..ee51829 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -77,6 +77,12 @@ impl<'a> CodeItemAccessor<'a> { debug_assert!(pc < self.insns_size_in_code_units()); Instruction::at(&self.insns[pc as usize..]) } + + pub fn get_inst_offset_in_code_units(&self, inst: &Instruction<'_>) -> usize { + let code_ptr = self.insns.as_ptr() as usize; + let inst_ptr = inst.raw().as_ptr() as usize; + inst_ptr - code_ptr + } } impl<'a> IntoIterator for CodeItemAccessor<'a> { @@ -89,6 +95,16 @@ impl<'a> IntoIterator for CodeItemAccessor<'a> { } } +impl<'a> IntoIterator for &'a CodeItemAccessor<'a> { + type Item = Instruction<'a>; + type IntoIter = DexInstructionIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + // iterator will be valid on empty input + DexInstructionIterator::new(self.insns) + } +} + pub struct DexInstructionIterator<'a> { instructions: &'a [u16], pc: usize, diff --git a/src/file/dump.rs b/src/file/dump.rs index d4209d1..cacb078 100644 --- a/src/file/dump.rs +++ b/src/file/dump.rs @@ -279,7 +279,7 @@ impl<'a> Instruction<'a> { } } Format::k35c => { - let var_args = vreg::var_args(self); + let var_args = vreg::var_args(self)?; let args_str = var_args .arg .iter() @@ -356,7 +356,7 @@ impl<'a> Instruction<'a> { } } Format::k45cc => { - let var_args = vreg::var_args(self); + let var_args = vreg::var_args(self)?; let args_str = var_args .arg .iter() @@ -407,7 +407,7 @@ impl<'a> Instruction<'a> { } } } - Format::k51l => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::wide_b(self)), + Format::k51l => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::wide_b(self)?), Format::kInvalidFormat => "".to_string(), }) } diff --git a/src/file/instruction.rs b/src/file/instruction.rs index a45bf50..d8b2036 100644 --- a/src/file/instruction.rs +++ b/src/file/instruction.rs @@ -1,3 +1,5 @@ +use crate::{dex_err, error::DexError, Result}; + pub struct Instruction<'a>(&'a [u16]); impl<'a> Instruction<'a> { @@ -6,22 +8,47 @@ impl<'a> Instruction<'a> { Instruction(code) } - #[inline(always)] - pub fn relative_at(&self, offset: usize) -> Instruction<'a> { - debug_assert!(offset < self.0.len()); - Instruction::at(&self.0[offset..]) + pub fn raw(&self) -> &'a [u16] { + self.0 + } + + #[inline] + pub fn relative_at(&self, offset: usize) -> Result> { + if offset + 2 >= self.0.len() { + return dex_err!(BadInstructionOffset { + opcode: self.name(), + offset: offset, + size: self.0.len() + }); + } else { + Ok(Instruction::at(&self.0[offset..])) + } } #[inline(always)] - pub fn fetch16(&self, offset: usize) -> u16 { - debug_assert!(offset < self.0.len()); - self.0[offset] + pub fn fetch16(&self, offset: usize) -> Result { + if offset >= self.0.len() { + return dex_err!(BadInstruction { + opcode: self.name(), + offset: offset, + size: self.0.len(), + target_type: "u16" + }); + } + Ok(self.0[offset]) } #[inline(always)] - pub fn fetch32(&self, offset: usize) -> u32 { - debug_assert!(offset + 1 < self.0.len()); - self.fetch16(offset) as u32 | ((self.fetch16(offset + 1) as u32) << 16) + pub fn fetch32(&self, offset: usize) -> Result { + if offset >= self.0.len() { + return dex_err!(BadInstruction { + opcode: self.name(), + offset: offset, + size: self.0.len(), + target_type: "u32" + }); + } + Ok(self.fetch16(offset)? as u32 | ((self.fetch16(offset + 1)? as u32) << 16)) } const fn format_desc_of(opcode: Code) -> &'static InstructionDescriptor { @@ -210,7 +237,7 @@ impl<'a> Instruction<'a> { &self.format_desc().name } - pub fn next(&self) -> Instruction<'a> { + pub fn next(&self) -> Result> { self.relative_at(self.size_in_code_units()) } @@ -218,26 +245,26 @@ impl<'a> Instruction<'a> { pub fn size_in_code_units(&self) -> usize { let size = Instruction::format_desc_of(self.opcode()).size_in_code_units; match size { - code_flags::Complex => self.size_in_code_units_complex(), + code_flags::Complex => self.size_in_code_units_complex().unwrap_or(1), code_flags::Custom => 1, /* TODO? */ _ => size as usize, } } - pub fn size_in_code_units_complex(&self) -> usize { - let inst_data = self.fetch16(0); + pub fn size_in_code_units_complex(&self) -> Result { + let inst_data = self.fetch16(0)?; debug_assert!(inst_data & 0xFF == 0); - match inst_data { - signatures::PackedSwitchSignature => 4 + self.fetch16(1) as usize * 2, - signatures::SparseSwitchSignature => 2 + self.fetch16(1) as usize * 4, + Ok(match inst_data { + signatures::PackedSwitchSignature => 4 + self.fetch16(1)? as usize * 2, + signatures::SparseSwitchSignature => 2 + self.fetch16(1)? as usize * 4, signatures::ArrayDataSignature => { - let element_size = self.fetch16(1) as usize; - let length = self.fetch32(2) as usize; + let element_size = self.fetch16(1)? as usize; + let length = self.fetch32(2)? as usize; // The plus 1 is to round up for odd size and width. 4 + (element_size * length + 1) / 2 } _ => 1, - } + }) } pub fn verify_flags(&self) -> u32 { @@ -268,18 +295,18 @@ pub mod vreg { use crate::{dex_err, error::DexError, Result}; // AA|op ... - fn inst_aa(inst: &Instruction<'_>) -> u8 { - (inst.fetch16(0) >> 8) as u8 + fn inst_aa(inst: &Instruction<'_>) -> Result { + Ok((inst.fetch16(0)? >> 8) as u8) } // B|A|op ... - fn inst_a(inst: &Instruction<'_>) -> u8 { - (inst.fetch16(0) >> 8) as u8 & 0x0F + fn inst_a(inst: &Instruction<'_>) -> Result { + Ok((inst.fetch16(0)? >> 8) as u8 & 0x0F) } // B|A|op ... - fn inst_b(inst: &Instruction<'_>) -> u8 { - (inst.fetch16(0) >> 12) as u8 + fn inst_b(inst: &Instruction<'_>) -> Result { + Ok((inst.fetch16(0)? >> 12) as u8) } //------------------------------------------------------------------------------ @@ -337,17 +364,17 @@ pub mod vreg { | Format::k31t | Format::k3rc | Format::k51l - | Format::k4rcc => inst_aa(inst) as i32, + | Format::k4rcc => inst_aa(inst)? as i32, // B|A|op Format::k11n | Format::k12x | Format::k22c | Format::k22s | Format::k22t => { - inst_a(inst) as i32 + inst_a(inst)? as i32 } // op AAAA - Format::k32x | Format::k20t => inst.fetch16(1) as i32, + Format::k32x | Format::k20t => inst.fetch16(1)? as i32, // op AAAAAAAA - Format::k30t => inst.fetch32(1) as i32, + Format::k30t => inst.fetch32(1)? as i32, // A|G|op - Format::k35c | Format::k45cc => inst_b(inst) as i32, + Format::k35c | Format::k45cc => inst_b(inst)? as i32, _ => { return dex_err!(OperandAccessError { insn_name: inst.name(), @@ -393,16 +420,16 @@ pub mod vreg { } #[inline] - pub fn wide_b(inst: &Instruction<'_>) -> u64 { + pub fn wide_b(inst: &Instruction<'_>) -> Result { debug_assert!(*inst.format() == Format::k51l); - inst.fetch32(1) as u64 | ((inst.fetch32(3) as u64) << 32) + Ok(inst.fetch32(1)? as u64 | ((inst.fetch32(3)? as u64) << 32)) } #[inline] pub fn B(inst: &Instruction<'_>) -> Result { Ok(match inst.format() { // B|A|op with #+B - Format::k11n => ((inst_b(inst) as i32) << 28) >> 28, + Format::k11n => ((inst_b(inst)? as i32) << 28) >> 28, // op BBBB Format::k21c | Format::k21t @@ -412,17 +439,17 @@ pub mod vreg { | Format::k35c | Format::k3rc | Format::k45cc - | Format::k4rcc => inst.fetch16(1) as i32, + | Format::k4rcc => inst.fetch16(1)? as i32, // B|A|op - Format::k12x | Format::k22c | Format::k22s | Format::k22t => inst_b(inst) as i32, + Format::k12x | Format::k22c | Format::k22s | Format::k22t => inst_b(inst)? as i32, // op CC|BB - Format::k22b | Format::k23x => (inst.fetch16(1) & 0xFF) as i32, + Format::k22b | Format::k23x => (inst.fetch16(1)? & 0xFF) as i32, // op BBBBBBBB - Format::k31c | Format::k31i | Format::k31t => inst.fetch32(1) as i32, + Format::k31c | Format::k31i | Format::k31t => inst.fetch32(1)? as i32, // op AAAA BBBB - Format::k32x => inst.fetch16(2) as i32, + Format::k32x => inst.fetch16(2)? as i32, // op BBBBBBBBBBBBBBBBB - Format::k51l => wide_b(inst) as i32, + Format::k51l => wide_b(inst)? as i32, _ => { return dex_err!(OperandAccessError { insn_name: inst.name(), @@ -455,13 +482,13 @@ pub mod vreg { pub fn C(inst: &Instruction<'_>) -> Result { Ok(match inst.format() { // op CCCC - Format::k22c | Format::k22s | Format::k22t => inst.fetch16(1) as i32, + Format::k22c | Format::k22s | Format::k22t => inst.fetch16(1)? as i32, // op CC|BB - Format::k22b | Format::k23x => ((inst.fetch16(1) >> 8) & 0xFF) as i32, + Format::k22b | Format::k23x => ((inst.fetch16(1)? >> 8) & 0xFF) as i32, // op BBBB CCCC - Format::k3rc | Format::k4rcc => inst.fetch16(2) as i32, + Format::k3rc | Format::k4rcc => inst.fetch16(2)? as i32, // op BBBB HH|CC - Format::k35c | Format::k45cc => (inst.fetch16(2) & 0x0F) as i32, + Format::k35c | Format::k45cc => (inst.fetch16(2)? & 0x0F) as i32, _ => { return dex_err!(OperandAccessError { insn_name: inst.name(), @@ -485,7 +512,7 @@ pub mod vreg { #[inline] pub fn H(inst: &Instruction<'_>) -> Result { Ok(match &inst.format_desc().format { - Format::k45cc | Format::k4rcc => inst.fetch16(3) as i32, + Format::k45cc | Format::k4rcc => inst.fetch16(3)? as i32, _ => { return dex_err!(OperandAccessError { insn_name: inst.name(), @@ -507,20 +534,22 @@ pub mod vreg { } #[inline] - pub fn var_args(inst: &Instruction<'_>) -> VarArgs { - let reg_list = inst.fetch16(2); - let count = inst_b(inst); + pub fn var_args(inst: &Instruction<'_>) -> Result { + let reg_list = inst.fetch16(2)?; + let count = inst_b(inst)?; let mut var_args = VarArgs::new(count); // NOTE only five as maximum - debug_assert!( - count <= 5, - "Invalid arg count in {:?} ({count})", - inst.format() - ); + if count > 5 { + return dex_err!(InvalidArgCount { + opcode: inst.name(), + format: inst.format(), + count + }); + } if count > 4 { - var_args.arg[4] = inst_a(inst); + var_args.arg[4] = inst_a(inst)?; } if count > 3 { var_args.arg[3] = ((reg_list >> 12) & 0x0F) as u8; @@ -534,7 +563,7 @@ pub mod vreg { if count > 0 { var_args.arg[0] = (reg_list & 0x0F) as u8; } - var_args + Ok(var_args) } //------------------------------------------------------------------------------ @@ -550,8 +579,16 @@ pub mod vreg { pub fn args_range(inst: &Instruction<'_>) -> Result> { let first_reg = vreg::C(inst)? as u16; - let last_reg = first_reg + (vreg::A(inst)? - 1) as u16; - Ok(first_reg..=last_reg) + let last_reg = vreg::A(inst)? as u16; + if last_reg == 0 || first_reg as usize + last_reg as usize > u16::MAX as usize { + return dex_err!(InvalidArgRange { + opcode: inst.name(), + format: inst.format(), + start: first_reg, + end: last_reg + }); + } + Ok(first_reg..=(first_reg + last_reg - 1)) } } diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..509353d --- /dev/null +++ b/src/main.rs @@ -0,0 +1,92 @@ +use dexrs::file::dump::prettify; +use dexrs::file::{vreg, DexFile, DexFileContainer, Field, InMemoryDexContainer, Method}; +use dexrs::Result; +// use dexrs::art::dex::file::Header; + +// fn main() -> Result<(), dexrs::art::error::Error> { + +// let file = std::fs::File::open(".vscode/classes.dex").unwrap(); +// let mmap = unsafe { memmap2::Mmap::map(&file)? }; + +// let header = Header::from_bytes(&mmap).unwrap(); +// println!("Version: {}", header.get_version_or(0)); +// println!("{:?}", header); +// Ok(()) +// } + +fn main() -> Result<()> { + let path = ".vscode/classes.dex"; + let file = std::fs::File::open(&path).unwrap(); + let container = DexFileContainer::new(&file) + .verify(true) + .verify_checksum(true); + + let dex = container.open()?; + println!("{:?}", dex.get_string_id(0)?); + + // println!("=== Types ==="); + // for type_id in dex.get_type_ids() { + // let name = dex.get_type_desc_utf16_lossy(type_id); + // println!("{}", name); + // } + + // println!("=== Fields ==="); + // for field_id in dex.get_field_ids() { + // let cls_name = dex.get_type_desc_utf16_lossy_at(field_id.class_idx)?; + // let type_name = dex.get_type_desc_utf16_lossy_at(field_id.type_idx)?; + // let name = dex.get_utf16_str_lossy_at(field_id.name_idx)?; + + // println!(".field {}->{}:{}", cls_name, name, type_name); + // } + + let class_def = dex.get_class_def(122)?; + let name = dex.get_type_desc_utf16_lossy_at(class_def.class_idx)?; + println!("Class name: {}", name); + + if let Some(interfaces) = dex.get_type_list(class_def.interfaces_off)? { + println!("Interfaces:"); + for interface in interfaces { + let name = dex.get_type_desc_utf16_lossy_at(interface.type_idx)?; + println!(".implements {}", name); + } + } + + let class_data = dex + .get_class_accessor(class_def) + .expect("msg") + .expect("msg"); + println!("Static Methods: {}", class_data.num_direct_methods); + let fields: Vec = class_data.get_fields().collect(); + + for field in fields { + println!( + ".field {}", + dex.pretty_field(field.index, prettify::Field::WithType) + ); + } + + // for method in fields { + // let ca = dex.get_code_item_accessor(method.code_offset)?; + // let insn = ca.insn_at(0); + // println!("Insn: {:?}", insn.to_string(Some(&dex))?); + // } + + let methods: Vec = class_data.get_methods()?.collect(); + for method in methods { + println!( + ".method {}", + dex.pretty_method_at(method.index, prettify::Method::WithSig) + ); + + let ca = dex.get_code_item_accessor(method.code_offset)?; + println!(" .registers {}\n", ca.registers_size()); + + for inst in &ca { + println!("|{:#08x}| {}", ca.get_inst_offset_in_code_units(&inst), inst.to_string(Some(&dex))?); + } + println!(".end method\n"); + break; + } + + Ok(()) +} From cf7d74c507c07da3e0fc0885a15e54aa9d513bea Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 8 Feb 2025 21:53:43 +0100 Subject: [PATCH 20/46] Updated fuzzing tests for class accessor --- + updated verification flags in DExVerfier --- fuzz/fuzz_targets/class_accessor.rs | 32 +++++++++++++++++++++-------- src/file/mod.rs | 11 +++++++++- src/file/verifier.rs | 31 ++++++++++++++++++---------- 3 files changed, 53 insertions(+), 21 deletions(-) diff --git a/fuzz/fuzz_targets/class_accessor.rs b/fuzz/fuzz_targets/class_accessor.rs index 8539f54..ba644ec 100644 --- a/fuzz/fuzz_targets/class_accessor.rs +++ b/fuzz/fuzz_targets/class_accessor.rs @@ -1,21 +1,35 @@ #![no_main] #![allow(non_snake_case)] -use dexrs::file::{DexFile, DexLocation}; +use dexrs::file::{ClassAccessor, Field}; extern crate dexrs; extern crate libfuzzer_sys; +fn null_field_visitor(_field: &dexrs::file::Field) -> Result<(), dexrs::error::DexError> { + Ok(()) +} + +fn null_method_visitor(_method: &dexrs::file::Method) -> Result<(), dexrs::error::DexError> { + Ok(()) +} + libfuzzer_sys::fuzz_target!(|data: &[u8]| { // this must not panic - if let Ok(dex) = DexFile::from_raw_parts(&data, DexLocation::InMemory) { - // we skip verification to test class_accessor here - if let Ok(class_def) = dex.get_class_def(0) { - if let Ok(Some(ca)) = dex.get_class_accessor(&class_def) { - // must not panic - let _fields = ca.get_fields(); - let _methods = ca.get_methods(); - } + if let Ok(ca) = ClassAccessor::from_raw(data) { + let _fields: Vec = ca.get_fields().collect(); + if let Ok(methods) = ca.get_methods() { + let _methods: Vec<_> = methods.collect(); } + + // visitors shouldn't panic too + let _res = ca + .visit_fields_and_methods( + null_field_visitor, + null_field_visitor, + null_method_visitor, + null_method_visitor, + ) + .is_ok(); } }); diff --git a/src/file/mod.rs b/src/file/mod.rs index 91fef88..5b7457d 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -19,6 +19,7 @@ pub mod dump; pub use container::*; pub mod annotations; pub use annotations::*; +use verifier::VerifyPreset; use crate::{dex_err, error::DexError, leb128::decode_leb128, utf, Result}; @@ -156,7 +157,15 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { let dex = DexFile::from_raw_parts(container.data(), DexLocation::Path(loc.to_string()))?; dex.init()?; if container.verify { - DexFile::verify(&dex, container.verify_checksum)?; + DexFile::verify( + &dex, + if container.verify_checksum { + // currenlty supports only checksum + VerifyPreset::ChecksumOnly + } else { + VerifyPreset::None + }, + )?; } Ok(dex) } diff --git a/src/file/verifier.rs b/src/file/verifier.rs index 1fd958c..2a73867 100644 --- a/src/file/verifier.rs +++ b/src/file/verifier.rs @@ -6,6 +6,12 @@ use super::{ DexContainer, DexFile, Header, HeaderV41, DEX_ENDIAN_CONSTANT, DEX_MAGIC, DEX_MAGIC_VERSIONS, }; +pub enum VerifyPreset { + None, + All, + ChecksumOnly, +} + impl<'a, C: DexContainer<'a>> DexFile<'a, C> { pub fn is_magic_valid(&self) -> bool { &self.header.get_magic()[..4] == DEX_MAGIC @@ -17,8 +23,8 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } // TODO: can be changed into enum - pub fn verify(dex: &DexFile<'a, C>, verify_checksum: bool) -> Result<()> { - check_header(dex, verify_checksum)?; + pub fn verify(dex: &DexFile<'a, C>, preset: VerifyPreset) -> Result<()> { + check_header(dex, preset)?; // REVISIT: maybe validate map list items Ok(()) } @@ -30,7 +36,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } } -fn check_header<'a, C>(dex: &DexFile<'a, C>, verify_checksum: bool) -> Result<()> +fn check_header<'a, C>(dex: &DexFile<'a, C>, preset: VerifyPreset) -> Result<()> where C: DexContainer<'a>, { @@ -84,15 +90,18 @@ where return dex_err!(UnexpectedEndianess, dex.header.endian_tag); } - if verify_checksum { - let checksum = dex.calculate_checksum(); - if checksum != dex.header.checksum { - return dex_err!(BadChecksum { - actual: checksum, - expected: dex.header.checksum - }); + match &preset { + VerifyPreset::All | VerifyPreset::ChecksumOnly => { + let checksum = dex.calculate_checksum(); + if checksum != dex.header.checksum { + return dex_err!(BadChecksum { + actual: checksum, + expected: dex.header.checksum + }); + } } - } + _ => {} + }; let header = dex.header; check_valid_offset_and_size(dex, header.link_off, header.link_size, "link")?; From ac0aa5b5bbc53f5a9d8cd2c4b24829419e823d28 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 9 Feb 2025 15:55:39 +0100 Subject: [PATCH 21/46] EncodedValue implementation and annotation accessor changes --- + EncodedValue implementation with fuzzing tests + DexFIle is now able to return an UTF8 String without validating the mUTF8 --- benches/parse.rs | 4 +- fuzz/Cargo.toml | 7 + fuzz/fuzz_targets/encoded_item.rs | 16 ++ fuzz/fuzz_targets/from_raw_parts.rs | 4 +- src/error.rs | 28 +++ src/file/annotations.rs | 340 +++++++++++++++++++++++++++- src/file/mod.rs | 19 ++ src/file/structs.rs | 43 ++++ 8 files changed, 449 insertions(+), 12 deletions(-) create mode 100644 fuzz/fuzz_targets/encoded_item.rs diff --git a/benches/parse.rs b/benches/parse.rs index 8d47f3c..c4140a9 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use dexrs::file::{DexFile, DexLocation, Header, InMemoryDexContainer}; +use dexrs::file::{verifier::VerifyPreset, DexFile, DexLocation, Header, InMemoryDexContainer}; fn parse_and_verify_small_file(c: &mut Criterion) { let data = include_bytes!("../tests/prime/prime.dex"); @@ -7,7 +7,7 @@ fn parse_and_verify_small_file(c: &mut Criterion) { b.iter(|| { let container = InMemoryDexContainer::new(data); if let Ok(dex) = DexFile::from_raw_parts(&container, DexLocation::InMemory) { - if DexFile::verify(&dex, true).is_ok() { + if DexFile::verify(&dex, VerifyPreset::All).is_ok() { assert_eq!( dex.expected_header_size(), std::mem::size_of::
() as u32 diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index cf59b0d..289f3b1 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -34,3 +34,10 @@ path = "fuzz_targets/instructions.rs" test = false doc = false bench = false + +[[bin]] +name = "encoded_item" +path = "fuzz_targets/encoded_item.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_targets/encoded_item.rs b/fuzz/fuzz_targets/encoded_item.rs new file mode 100644 index 0000000..81754e2 --- /dev/null +++ b/fuzz/fuzz_targets/encoded_item.rs @@ -0,0 +1,16 @@ +#![no_main] +#![allow(non_snake_case)] + +use dexrs::file::EncodedValue; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + // fuzzed code goes here + if let Ok(value) = EncodedValue::new(data) { + match value { + EncodedValue::Annotation(annotation) => for _ in annotation.elements() {}, + EncodedValue::Array(array) => for _ in array {}, + _ => {} + } + } +}); diff --git a/fuzz/fuzz_targets/from_raw_parts.rs b/fuzz/fuzz_targets/from_raw_parts.rs index 8242b02..64d13fb 100644 --- a/fuzz/fuzz_targets/from_raw_parts.rs +++ b/fuzz/fuzz_targets/from_raw_parts.rs @@ -1,7 +1,7 @@ #![no_main] #![allow(non_snake_case)] -use dexrs::file::{DexFile, DexLocation}; +use dexrs::file::{verifier::VerifyPreset, DexFile, DexLocation}; extern crate dexrs; extern crate libfuzzer_sys; @@ -9,7 +9,7 @@ extern crate libfuzzer_sys; libfuzzer_sys::fuzz_target!(|data: &[u8]| { // this must not panic if let Ok(dex) = DexFile::from_raw_parts(&data, DexLocation::InMemory) { - if DexFile::verify(&dex, true).is_ok() { + if DexFile::verify(&dex, VerifyPreset::All).is_ok() { let _ = dex; } } diff --git a/src/error.rs b/src/error.rs index f42d8c1..77be00e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -135,6 +135,34 @@ pub enum DexError { start: u16, end: u16, }, + + #[error("Encountered an encoded value with no size which is not allowed")] + EmptyEncodedValue, + + #[error("Invalid encoded value({0:#x})")] + BadEncodedValueType(u8), + + #[error("Invalid encoded value({value_type:#x}) requested byte at offset({offset}) which is out of bounds (size: {size})")] + InvalidEncodedValue { + value_type: u8, + offset: usize, + size: usize, + }, + + #[error("Invalid encoded value({value_type:#x}) requested size({size}) which is too big for data type (size: {max})")] + BadEncodedValueSize { + value_type: u8, + size: usize, + max: usize, + }, + + #[error("Invalid encoded value({value_type:#x}) requested array length({size}) which does not fit into value buffer (size: {max})")] + BadEncodedArrayLength { + value_type: u8, + size: usize, + offset: usize, + max: usize, + }, } #[macro_export] diff --git a/src/file/annotations.rs b/src/file/annotations.rs index 1957ab1..3424e0b 100644 --- a/src/file/annotations.rs +++ b/src/file/annotations.rs @@ -1,10 +1,14 @@ -use crate::Result; +use crate::{dex_err, error::DexError, leb128::decode_leb128, Result}; use super::{ - AnnotationSetItem, AnnotationsDirectoryItem, ClassDef, DexContainer, DexFile, - FieldAnnotationsItem, MethodAnnotationsItem, ParameterAnnotationsItem, + AnnotationElement, AnnotationItem, AnnotationSetItem, AnnotationsDirectoryItem, ClassDef, + DexContainer, DexFile, EncodedAnnotation, EncodedArray, EncodedValue, FieldAnnotationsItem, + MethodAnnotationsItem, ParameterAnnotationsItem, }; +//------------------------------------------------------------------------------ +// ClassAnnotationsAccessor +//------------------------------------------------------------------------------ pub struct ClassAnnotationsAccessor<'a> { class_def: &'a ClassDef, @@ -15,7 +19,7 @@ pub struct ClassAnnotationsAccessor<'a> { } impl<'a, C: DexContainer<'a>> DexFile<'a, C> { - pub fn class_annotations( + pub fn get_class_ann_accessor( &'a self, class_def: &'a ClassDef, ) -> Result> { @@ -88,22 +92,342 @@ impl<'a> ClassAnnotationsAccessor<'a> { } #[inline] - pub fn get_field_annotations(&self) -> &'a [FieldAnnotationsItem] { + pub fn get_field_ann(&self) -> &'a [FieldAnnotationsItem] { self.field_annotations } #[inline] - pub fn get_method_annotations(&self) -> &'a [MethodAnnotationsItem] { + pub fn get_method_ann(&self) -> &'a [MethodAnnotationsItem] { self.method_annotations } #[inline] - pub fn get_parameter_annotations(&self) -> &'a [ParameterAnnotationsItem] { + pub fn get_parameter_ann(&self) -> &'a [ParameterAnnotationsItem] { self.parameter_annotations } #[inline] - pub fn get_class_annotations(&self) -> AnnotationSetItem<'a> { + pub fn get_class_ann(&self) -> AnnotationSetItem<'a> { self.class_annotations } } + +// Encoded values require special handling and they can't be parsed using +// zero-copy. +#[repr(u8)] +#[derive(Debug, PartialEq, Eq)] +#[rustfmt::skip] +pub enum EncodedValueType { + Byte = 0x00, + Short = 0x02, + Char = 0x03, + Int = 0x04, + Long = 0x06, + Float = 0x10, + Double = 0x11, + MethodType = 0x15, + MethodHandle = 0x16, + String = 0x17, + Type = 0x18, + Field = 0x19, + Method = 0x1a, + Enum = 0x1b, + Array = 0x1c, + Annotation = 0x1d, + Null = 0x1e, + Boolean = 0x1f, +} + +#[derive(Debug, PartialEq, Eq)] +enum FillStrategy { + Left, + Right, +} + +impl AnnotationItem { + pub fn from_raw_parts(value: &[u8]) -> Result { + Ok(Self { + visibility: value[0], + annotation: EncodedAnnotation::new(&value[1..])?, + }) + } +} + +//------------------------------------------------------------------------------ +// AnnotationElement +//------------------------------------------------------------------------------ +impl AnnotationElement { + pub fn value(&self) -> &EncodedValue { + &self.value + } + + fn from_raw_parts(value: &[u8], offset: &mut usize) -> Result { + let (name_idx, size) = decode_leb128::(&value[*offset..])?; + *offset += size; + let value = EncodedValue::from_raw_parts(value, offset)?; + Ok(AnnotationElement { name_idx, value }) + } +} + +//------------------------------------------------------------------------------ +// EncodedAnnotation +//------------------------------------------------------------------------------ +impl EncodedAnnotation { + pub fn elements(&self) -> &[AnnotationElement] { + &self.elements + } + + pub fn new(value: &'_ [u8]) -> Result { + let mut offset = 0; + EncodedAnnotation::from_raw_parts(value, &mut offset) + } + + fn from_raw_parts(value: &[u8], offset: &mut usize) -> Result { + let (type_idx, size) = decode_leb128::(&value[*offset..])?; + *offset += size; + let (length, size) = decode_leb128::(&value[*offset..])?; + *offset += size; + // the value must not overflow assuming each item occupies at least two bytes + if *offset + (length as usize * 2) >= value.len() { + return dex_err!(BadEncodedArrayLength { + value_type: EncodedValueType::Annotation as u8, + size: value.len(), + offset: *offset, + max: value.len() + }); + } + + let mut elements = Vec::with_capacity(length as usize); + for _ in 0..length { + elements.push(AnnotationElement::from_raw_parts(value, offset)?); + } + Ok(EncodedAnnotation { type_idx, elements }) + } +} + +//------------------------------------------------------------------------------ +// wrapper +//------------------------------------------------------------------------------ +fn check_size( + value_arg: u8, + value_type: u8, + width: usize, + offset: usize, + value: &[u8], +) -> Result<()> { + let size = std::mem::size_of::(); + if value_arg as usize + 1 >= size { + return dex_err!(BadEncodedValueSize { + value_type: value_type, + size: value_arg as usize, + max: size + }); + } + + if offset + width >= value.len() { + return dex_err!(InvalidEncodedValue { + value_type: value_type, + offset: offset + width, + size: value.len() + }); + } + Ok(()) +} + +macro_rules! as_int { + // signed + ($target:ty, $value:ident, $value_arg:ident, $value_type:ident, $offset:ident, $target_unsigned:ty) => {{ + let width = $value_arg as usize + 1; + let bytes = std::mem::size_of::<$target>() as u8; + let bits = bytes * 8; + check_size::<$target>($value_arg, $value_type, width, *$offset, $value)?; + let mut val: $target = 0; + for i in (0..width).rev() { + val = ((val as $target_unsigned) >> 8) as $target + | (($value[i + *$offset] as $target) << (bits - 8)); + } + val >>= ((bytes - 1) - $value_arg) * 8; + *$offset += width; + val + }}; + + // unsigned + ($target:ty, $value:ident, $value_arg:ident, $value_type:ident, $offset:ident, strategy: $strategy:ident) => {{ + let width = $value_arg as usize + 1; + let bytes = std::mem::size_of::<$target>() as u8; + let bits = bytes * 8; + check_size::<$target>($value_arg, $value_type, width, *$offset, $value)?; + let mut val: $target = 0; + for i in (0..width).rev() { + val = ((val as $target) >> 8) as $target + | (($value[i + *$offset] as $target) << (bits - 8)); + } + val >>= ((bytes - 1) - $value_arg) * 8; + *$offset += width; + match $strategy { + FillStrategy::Left => val, + FillStrategy::Right => val >> ((bits - 1) - $value_arg * 8), + } + }}; +} + +#[inline] +fn as_signed_int(value: &[u8], value_arg: u8, value_type: u8, offset: &mut usize) -> Result { + Ok(as_int!(i32, value, value_arg, value_type, offset, u32)) +} + +#[inline] +fn as_signed_long(value: &[u8], value_arg: u8, value_type: u8, offset: &mut usize) -> Result { + Ok(as_int!(i64, value, value_arg, value_type, offset, u64)) +} + +#[inline] +fn as_unsigned_int( + value: &[u8], + value_arg: u8, + value_type: u8, + offset: &mut usize, + fill_strategy: FillStrategy, +) -> Result { + Ok(as_int!(u32, value, value_arg, value_type, offset, strategy: fill_strategy)) +} + +#[inline] +fn as_unsigned_long( + value: &[u8], + value_arg: u8, + value_type: u8, + offset: &mut usize, + fill_strategy: FillStrategy, +) -> Result { + Ok(as_int!(u64, value, value_arg, value_type, offset, strategy: fill_strategy)) +} + +//------------------------------------------------------------------------------ +// EncodedValue +//------------------------------------------------------------------------------ +impl EncodedValue { + pub fn new(value: &'_ [u8]) -> Result { + let mut offset = 0; + EncodedValue::from_raw_parts(value, &mut offset) + } + + #[rustfmt::skip] + fn from_raw_parts(value: &'_ [u8], offset: &mut usize) -> Result { + if *offset >= value.len() { + return dex_err!(EmptyEncodedValue); + } + + let header_byte = value[*offset]; + let value_type = header_byte & 0x1F_u8 as u8; + let value_arg = ((header_byte & 0xE0) >> 5) as u8; + if !EncodedValueType::is_valid(value_type) { + return dex_err!(BadEncodedValueType, value_type); + } + + *offset += 1; + Ok(match EncodedValueType::from(value_type) { + EncodedValueType::Byte => EncodedValue::Byte(as_signed_int(value, value_arg, value_type, offset)? as i8), + EncodedValueType::Short => EncodedValue::Short(as_signed_int(value, value_arg, value_type, offset)? as i16), + EncodedValueType::Char => EncodedValue::Char(as_unsigned_int(value, value_arg, value_type, offset, FillStrategy::Left)? as u16), + EncodedValueType::Int => EncodedValue::Int(as_signed_int(value, value_arg, value_type, offset)?), + EncodedValueType::Long => EncodedValue::Long(as_signed_long(value, value_arg, value_type, offset)?), + EncodedValueType::Float => EncodedValue::Float(as_unsigned_int(value, value_arg, value_type, offset, FillStrategy::Right)? as f32), + EncodedValueType::Double => EncodedValue::Double(as_unsigned_long(value, value_arg, value_type, offset, FillStrategy::Right)? as f64), + EncodedValueType::MethodType => EncodedValue::MethodType(as_unsigned_int(value, value_arg, value_type, offset, FillStrategy::Left)?), + EncodedValueType::MethodHandle => EncodedValue::MethodHandle(as_unsigned_int(value, value_arg, value_type, offset, FillStrategy::Left)?), + EncodedValueType::String => EncodedValue::String(as_unsigned_int(value, value_arg, value_type, offset, FillStrategy::Left)?), + EncodedValueType::Type => EncodedValue::Type(as_unsigned_int(value, value_arg, value_type, offset, FillStrategy::Left)?), + EncodedValueType::Field => EncodedValue::Field(as_unsigned_int(value, value_arg, value_type, offset, FillStrategy::Left)?), + EncodedValueType::Method => EncodedValue::Method(as_unsigned_int(value, value_arg, value_type, offset, FillStrategy::Left)?), + EncodedValueType::Enum => EncodedValue::Enum(as_unsigned_int(value, value_arg, value_type, offset, FillStrategy::Left)?), + EncodedValueType::Array => EncodedValue::Array(EncodedValue::from_encoded_array(value, offset)?), + EncodedValueType::Annotation => EncodedValue::Annotation(EncodedValue::from_encoded_annotation(value, offset)?), + EncodedValueType::Null => EncodedValue::Null, + EncodedValueType::Boolean => { + if value_arg == 0 { + EncodedValue::False + } else { + EncodedValue::True + } + } + }) + } + + fn from_encoded_array(value: &'_ [u8], offset: &mut usize) -> Result { + let (length, size) = decode_leb128::(&value[*offset..])?; + *offset += size; + // make sure we don't parse bogus data + if *offset >= value.len() { + return dex_err!(InvalidEncodedValue { + value_type: EncodedValueType::Array as u8, + offset: *offset, + size: value.len() + }); + } + + // the value must not overflow assuming each item occupies at least two bytes + if *offset + (length as usize * 2) >= value.len() { + return dex_err!(BadEncodedArrayLength { + value_type: EncodedValueType::Array as u8, + size: value.len(), + offset: *offset, + max: value.len() + }); + } + + let mut values = Vec::with_capacity(length as usize); + for _ in 0..length { + values.push(EncodedValue::from_raw_parts(value, offset)?); + } + Ok(values) + } + + fn from_encoded_annotation(value: &'_ [u8], offset: &mut usize) -> Result { + EncodedAnnotation::from_raw_parts(value, offset) + } +} + +impl EncodedValueType { + #[inline] + pub fn is_valid(value_type: u8) -> bool { + match value_type { + 0x00 | 0x02..=0x04 | 0x06 | 0x10 | 0x11 | 0x15..=0x1F => true, + _ => false, + } + } + + #[inline] + pub fn is_primitive(value_type: u8) -> bool { + match value_type { + 0x00 | 0x02..=0x06 | 0x10 | 0x11 => true, + _ => false, + } + } +} + +impl From for EncodedValueType { + fn from(value: u8) -> Self { + match value { + 0x00 => EncodedValueType::Byte, + 0x02 => EncodedValueType::Short, + 0x03 => EncodedValueType::Char, + 0x04 => EncodedValueType::Int, + 0x06 => EncodedValueType::Long, + 0x10 => EncodedValueType::Float, + 0x11 => EncodedValueType::Double, + 0x15 => EncodedValueType::MethodType, + 0x16 => EncodedValueType::MethodHandle, + 0x17 => EncodedValueType::String, + 0x18 => EncodedValueType::Type, + 0x19 => EncodedValueType::Field, + 0x1a => EncodedValueType::Method, + 0x1b => EncodedValueType::Enum, + 0x1c => EncodedValueType::Array, + 0x1d => EncodedValueType::Annotation, + 0x1e => EncodedValueType::Null, + 0x1f => EncodedValueType::Boolean, + _ => unreachable!(), + } + } +} diff --git a/src/file/mod.rs b/src/file/mod.rs index 5b7457d..4a3f0a5 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -231,6 +231,12 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } } + #[inline] + pub unsafe fn fast_get_utf8_str(&self, string_id: &StringId) -> Result { + let (_, data) = self.get_string_data(string_id)?; + Ok(String::from_utf8_unchecked(data.to_vec())) + } + #[inline(always)] pub fn get_utf16_str_lossy(&self, string_id: &StringId) -> Result { let (_, data) = self.get_string_data(string_id)?; @@ -371,6 +377,19 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.get_utf16_str_lossy_at(proto_id.shorty_idx) } + //------------------------------------------------------------------------------ + // EncodedValue + //------------------------------------------------------------------------------ + pub fn get_encoded_value(&self, off: u32) -> Result { + check_lt_result!(off, self.file_size(), EncodedValue); + EncodedValue::new(&self.mmap[off as usize..]) + } + + pub fn get_annotation(&self, off: u32) -> Result { + check_lt_result!(off, self.file_size(), Annotation); + AnnotationItem::from_raw_parts(&self.mmap[off as usize..]) + } + // method ids related methods //------------------------------------------------------------------------------ // Method Ids diff --git a/src/file/structs.rs b/src/file/structs.rs index 4216c66..1ed445b 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -229,3 +229,46 @@ pub struct ParameterAnnotationsItem { unsafe impl plain::Plain for ParameterAnnotationsItem {} pub type AnnotationSetItem<'a> = &'a [u32]; + +pub type EncodedArray = Vec; + +#[derive(Debug)] +pub enum EncodedValue { + Byte(i8), + Short(i16), + Char(u16), + Int(i32), + Long(i64), + Float(f32), + Double(f64), + MethodType(u32), + MethodHandle(u32), + String(u32), + Type(u32), + Field(u32), + Method(u32), + Enum(u32), + Array(EncodedArray), + Annotation(EncodedAnnotation), + Null, + True, + False, +} + +#[derive(Debug)] +pub struct AnnotationElement { + pub name_idx: u32, + pub(crate) value: EncodedValue, +} + +#[derive(Debug)] +pub struct EncodedAnnotation { + pub type_idx: u32, + pub(crate) elements: Vec, +} + +#[derive(Debug)] +pub struct AnnotationItem { + pub visibility: u8, + pub annotation: EncodedAnnotation, +} \ No newline at end of file From b7b9ac4f90365414a0752e68c6c09d96e95b3d6a Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 9 Feb 2025 22:28:40 +0100 Subject: [PATCH 22/46] Added some examples along with MUTF8 fuzzing --- + added support to get index from item in dex file --- examples/dex_basic_ops.rs | 89 ++++++++++++++++++++++++++ examples/dex_strings.rs | 53 ++++++++++++++++ examples/parse_dex_file.rs | 62 ++++++++++++++++++ fuzz/Cargo.toml | 7 +++ fuzz/fuzz_targets/from_raw_parts.rs | 2 +- fuzz/fuzz_targets/mutf8.rs | 14 +++++ src/error.rs | 9 +++ src/file/container.rs | 6 +- src/file/dump.rs | 27 +++++--- src/file/mod.rs | 97 ++++++++++++++++++++++------- src/file/verifier.rs | 1 + src/utf.rs | 28 +++++---- 12 files changed, 349 insertions(+), 46 deletions(-) create mode 100644 examples/dex_basic_ops.rs create mode 100644 examples/dex_strings.rs create mode 100644 examples/parse_dex_file.rs create mode 100644 fuzz/fuzz_targets/mutf8.rs diff --git a/examples/dex_basic_ops.rs b/examples/dex_basic_ops.rs new file mode 100644 index 0000000..41fa6eb --- /dev/null +++ b/examples/dex_basic_ops.rs @@ -0,0 +1,89 @@ +#![allow(unused)] + +use dexrs::file::dump::prettify; +use dexrs::file::DexFile; +use dexrs::Result; +use openssl::string; + +fn dex_get_method(dex: &DexFile<'_>) -> Result<()> { + // the DexFile struct does not provide an interface to query all + // method related information all at once, so we have to fetch + // all method information one by one. + let method_id = dex.get_method_id(0)?; + + // index can be retrieved from the MethodIdItem + assert!(dex.method_id_idx(method_id)? == 0); + + // name is a string. To resolve everything manually, you would need + // to fetch the string id first + let name = dex.get_utf16_str_at(method_id.name_idx)?; + let proto_id = dex.get_proto_id(method_id.proto_idx)?; + + // the declaring class name is just a TypeId, which points to a + // string id + let class_name = dex.get_type_desc_utf16_at(method_id.class_idx)?; + + // you can either print the method by yourself or get a prettified + // version. You can specify whether to print the signature or not + let pretty_method = dex.pretty_method_opt(method_id, prettify::Method::WithSig)?; + + // the references to all methods are exposed, so you can iterate + // over all of them + for method_id in dex.get_method_ids() { + // ... + } + + // NOTE: the result of num_method_ids is the number given in the + // file header, NOT the length of get_method_ids. + assert_eq!(dex.num_method_ids() as usize, dex.get_method_ids().len()); + Ok(()) +} + +fn dex_get_field(dex: &DexFile<'_>) -> Result<()> { + // the interface for fields is pretty much the same as for methods + let field_id = dex.get_field_id(0)?; + assert!(dex.field_id_idx(field_id)? == 0); + + // same as for methods + let name = dex.get_utf16_str_at(field_id.name_idx)?; + let type_name = dex.get_type_desc_utf16_at(field_id.type_idx)?; + let class_name = dex.get_type_desc_utf16_at(field_id.class_idx)?; + + // prettified version is also available + let pretty_field = dex.pretty_field_opt(field_id, prettify::Field::WithType)?; + // the '_opt' method will return an error on invalid input or if the string + // can't be created. However, if you want to get a string regardless of the + // input, just use: + let pretty_field = dex.pretty_field(field_id, prettify::Field::WithType); + + // all fields are exposed, so you can iterate over them + for field_id in dex.get_field_ids() { + // ... + } + Ok(()) +} + +fn dex_types(dex: &DexFile<'_>) -> Result<()> { + // types are somewhat different from methods and fields as they are just references + // to their names in the string ids section. + let type_id = dex.get_type_id(0)?; + assert!(dex.type_id_idx(type_id)? == 0); + + // name can be retrieved in various ways + let name = dex.get_type_desc_utf16(type_id)?; + // see dex_strings.rs for more examples on the strings used in dex files + let name = dex.get_type_desc_utf16_lossy(type_id)?; + + // you can even skip all verification and get the string as + // fast as possible + let string_id = dex.get_string_id(type_id.descriptor_idx)?; + let name = unsafe { dex.fast_get_utf8_str(string_id)? }; + + // same as above, all types are exposed + for _ in dex.get_type_ids() {} + Ok(()) +} + +fn main() { + // ... +} diff --git a/examples/dex_strings.rs b/examples/dex_strings.rs new file mode 100644 index 0000000..6bc3dee --- /dev/null +++ b/examples/dex_strings.rs @@ -0,0 +1,53 @@ +#![allow(unused)] + +use dexrs::{file::DexFile, utf, Result}; + +fn dex_strings(dex: &DexFile<'_>) -> Result<()> { + // strings can be retrieved in various ways + let string_id = dex.get_string_id(0)?; + assert!(dex.string_id_idx(string_id)? == 0); + + // name can be retrieved in various ways: + // + // 1. modified utf8 -> utf16 with checks + let name = dex.get_utf16_str(string_id)?; + // + // 2. modified utf8 -> utf16 lossy + let name = dex.get_utf16_str_lossy(string_id)?; + // + // 3. modified utf8 -> utf8 unsafe (but fast) + let name = unsafe { dex.fast_get_utf8_str(string_id)? }; + + // there's also a function to query the raw string + // data without conversion + let (utf16_len, data) = dex.get_string_data(string_id)?; + + // all of the operations above can be done with the + // index directly + let name = dex.get_utf16_str_lossy_at(0)?; + + Ok(()) +} + +pub fn mutf8_strings() -> Result<()> { + // the conversion of MUTF8 strings is not stable yet (fails fuzzing) + + // The only contraint on input data is that it must be null-terminated + let data = b"Hello, World!\0"; + // conversion from MUTF8 to UTF16 is provided in two ways: + // + // 1. modified utf8 -> utf16 with checks + let name = utf::mutf8_to_str(data)?; + // + // 2. modified utf8 -> utf16 lossy + let name = utf::mutf8_to_str_lossy(data); + + // conversion back is also supported + let mutf8_data = utf::str_to_mutf8(&name); + assert_eq!(data.to_vec(), mutf8_data); + Ok(()) +} + +fn main() { + // ... +} diff --git a/examples/parse_dex_file.rs b/examples/parse_dex_file.rs new file mode 100644 index 0000000..2dca8a3 --- /dev/null +++ b/examples/parse_dex_file.rs @@ -0,0 +1,62 @@ +#![allow(unused)] + +use dexrs::file::{verifier::VerifyPreset, DexFile, DexFileContainer, DexLocation}; + +fn parse_dex_file(path: &str) -> Result<(), Box> { + // external files should be opened through a DexFileContainer + let file = std::fs::File::open(path)?; + // you can configure whether to verify the dex file + let container = DexFileContainer::new(&file) + .verify(true) + .verify_checksum(true); + + let dex = container.open()?; + // ... + Ok(()) +} + +fn parse_in_memory_file(data: &[u8]) -> Result<(), Box> { + // everything that implements a DexContainer can be used + let file = DexFile::open(&data, DexLocation::InMemory, VerifyPreset::None)?; + + Ok(()) +} + +fn open_mutable_memory(data: &mut [u8]) -> Result<(), Box> { + // mutable files are still WIP, but their interface will + // be the same. However, this file will return a valid DexFile + // only if the given data already contains a valid header definition. + let mut file = DexFile::open(&data, DexLocation::InMemory, VerifyPreset::None)?; + + Ok(()) +} + +fn open_mutable_file(path: &str) -> Result<(), Box> { + // The interface for opening files that should be manipulated should + // be the same as parse_dex_file but with open_mut at the end + let file = std::fs::File::open(path)?; + let mmap = unsafe { memmap2::MmapMut::map_mut(&file)? }; + // will be updated + let mut file = DexFile::open( + &mmap, + DexLocation::Path(path.to_string()), + VerifyPreset::None, + )?; + + Ok(()) +} + +fn parse_dex_file_unchecked(data: &[u8]) -> Result<(), Box> { + // files can be created without further initialization and checks. NOTE: + // This operation will still try to iterate over the MapList and collect + // additional items. + let file = DexFile::from_raw_parts(&data, DexLocation::InMemory)?; + + // verification can be done now + DexFile::verify(&file, VerifyPreset::All)?; + Ok(()) +} + +fn main() { + // ... +} diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 289f3b1..ef07ad9 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -41,3 +41,10 @@ path = "fuzz_targets/encoded_item.rs" test = false doc = false bench = false + +[[bin]] +name = "mutf8" +path = "fuzz_targets/mutf8.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_targets/from_raw_parts.rs b/fuzz/fuzz_targets/from_raw_parts.rs index 64d13fb..8873568 100644 --- a/fuzz/fuzz_targets/from_raw_parts.rs +++ b/fuzz/fuzz_targets/from_raw_parts.rs @@ -10,7 +10,7 @@ libfuzzer_sys::fuzz_target!(|data: &[u8]| { // this must not panic if let Ok(dex) = DexFile::from_raw_parts(&data, DexLocation::InMemory) { if DexFile::verify(&dex, VerifyPreset::All).is_ok() { - let _ = dex; + if let Ok(_) = dex.get_class_def(0) {} } } }); diff --git a/fuzz/fuzz_targets/mutf8.rs b/fuzz/fuzz_targets/mutf8.rs new file mode 100644 index 0000000..3269101 --- /dev/null +++ b/fuzz/fuzz_targets/mutf8.rs @@ -0,0 +1,14 @@ +#![no_main] +#![allow(non_snake_case)] + +use dexrs::utf; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + // end must be a zero + if let Some(end) = data.iter().position(|&x| x == 0) { + if let Ok(s) = utf::mutf8_to_str(&data[0..end]) { + let _ = s.len(); + } + } +}); diff --git a/src/error.rs b/src/error.rs index 77be00e..481858e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -163,6 +163,15 @@ pub enum DexError { offset: usize, max: usize, }, + + #[error( + "Got invalid object reference({offset}) which is out of bounds (start: {start}, end: {end})" + )] + UnknownObjectRef { + offset: usize, + start: usize, + end: usize, + }, } #[macro_export] diff --git a/src/file/container.rs b/src/file/container.rs index 7919f71..1017ee8 100644 --- a/src/file/container.rs +++ b/src/file/container.rs @@ -1,7 +1,7 @@ -use std::ops::{Deref, DerefMut}; use memmap2::{MmapAsRawDesc, MmapMut}; +use std::ops::{Deref, DerefMut}; -use crate::Result; +use crate::{file::MmapMutDexFile, Result}; use super::MmapDexFile; @@ -84,7 +84,7 @@ impl DexFileContainer { } pub fn open<'a>(&'a self) -> Result> { - MmapDexFile::open(self) + MmapDexFile::open_file(self) } pub fn get_location(&self) -> &str { diff --git a/src/file/dump.rs b/src/file/dump.rs index cacb078..8bd86fd 100644 --- a/src/file/dump.rs +++ b/src/file/dump.rs @@ -4,7 +4,9 @@ use crate::{ Result, }; -use super::{vreg, Code, DexContainer, DexFile, Format, Instruction, MethodId, StringId, TypeId}; +use super::{ + vreg, Code, DexContainer, DexFile, FieldId, Format, Instruction, MethodId, StringId, TypeId, +}; pub mod prettify { @@ -22,15 +24,26 @@ pub mod prettify { } impl<'a, C: DexContainer<'a>> DexFile<'a, C> { - pub fn pretty_field(&self, field_idx: u32, opts: prettify::Field) -> String { - match self.pretty_field_opt(field_idx, opts) { + pub fn pretty_field_at(&self, field_idx: u32, opts: prettify::Field) -> String { + match self.pretty_field_opt_at(field_idx, opts) { Ok(s) => s, Err(_) => format!("<>"), } } - pub fn pretty_field_opt(&self, field_idx: u32, opts: prettify::Field) -> Result { + pub fn pretty_field(&self, field_id: &FieldId, opts: prettify::Field) -> String { + match self.pretty_field_opt(field_id, opts) { + Ok(s) => s, + Err(_) => format!("<>"), + } + } + + pub fn pretty_field_opt_at(&self, field_idx: u32, opts: prettify::Field) -> Result { let field_id = self.get_field_id(field_idx)?; + self.pretty_field_opt(field_id, opts) + } + + pub fn pretty_field_opt(&self, field_id: &FieldId, opts: prettify::Field) -> Result { let mut result = String::new(); if opts == prettify::Field::WithType { result.push_str(&self.pretty_type_opt_at(field_id.type_idx)?); @@ -56,7 +69,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } pub fn pretty_type_opt(&self, type_id: &TypeId) -> Result { - Ok(pretty_desc(&self.get_type_desc(type_id)?)) + Ok(pretty_desc(&self.get_type_desc_utf16_lossy(type_id)?)) } pub fn pretty_utf16(&self, string_id: &StringId) -> String { @@ -199,7 +212,7 @@ impl<'a> Instruction<'a> { format!( "{opcode} v{}, {} // field@{}", vreg::A(self)?, - dex.pretty_field(field_idx, prettify::Field::WithType), + dex.pretty_field_at(field_idx, prettify::Field::WithType), field_idx ) } @@ -238,7 +251,7 @@ impl<'a> Instruction<'a> { "{opcode} v{}, v{}, {} // field@{}", vreg::A(self)?, vreg::B(self)?, - dex.pretty_field(index, prettify::Field::WithType), + dex.pretty_field_at(index, prettify::Field::WithType), index ) } diff --git a/src/file/mod.rs b/src/file/mod.rs index 4a3f0a5..40d7a19 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -1,4 +1,4 @@ -use memmap2::Mmap; +use memmap2::{Mmap, MmapMut}; use plain::Plain; pub mod structs; @@ -57,6 +57,7 @@ impl ToString for DexLocation { pub type InMemoryDexFile<'a> = DexFile<'a, InMemoryDexContainer<'a>>; pub type MmapDexFile<'a> = DexFile<'a, Mmap>; +pub type MmapMutDexFile<'a> = DexFile<'a, MmapMut>; pub struct DexFile<'a, T: DexContainer<'a> = Mmap> { mmap: &'a T, @@ -147,25 +148,34 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { Ok(dex) } - pub fn open(container: &DexFileContainer) -> Result> { + pub fn open_file(container: &'a DexFileContainer) -> Result> { let loc = container.get_location(); let size = container.data().len(); if size < std::mem::size_of::
() { return dex_err!(DexFileError, "Invalid or truncated file {:?}", loc); } - let dex = DexFile::from_raw_parts(container.data(), DexLocation::Path(loc.to_string()))?; + DexFile::open( + container.data(), + DexLocation::Path(loc.to_string()), + if container.verify_checksum { + // currenlty supports only checksum + VerifyPreset::ChecksumOnly + } else { + VerifyPreset::None + }, + ) + } + + pub fn open( + container: &'a C, + location: DexLocation, + verify_preset: VerifyPreset, + ) -> Result> { + let dex = DexFile::from_raw_parts(container, location)?; dex.init()?; - if container.verify { - DexFile::verify( - &dex, - if container.verify_checksum { - // currenlty supports only checksum - VerifyPreset::ChecksumOnly - } else { - VerifyPreset::None - }, - )?; + if verify_preset != VerifyPreset::None { + DexFile::verify(&dex, verify_preset)?; } Ok(dex) } @@ -192,13 +202,20 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.mmap.len() } - // -- strings + // ------------------------------------------------------------------------------ + // strings + // ------------------------------------------------------------------------------ #[inline(always)] pub fn get_string_id(&self, idx: u32) -> Result<&'a StringId> { check_lt_result!(idx, self.string_ids.len(), StringId); Ok(&self.string_ids[idx as usize]) } + #[inline(always)] + pub fn string_id_idx(&self, item: &'a StringId) -> Result { + self.offset_of(self.string_ids, item) + } + #[inline(always)] pub fn string_ids(&self) -> &'a [StringId] { self.string_ids @@ -237,6 +254,12 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { Ok(String::from_utf8_unchecked(data.to_vec())) } + #[inline] + pub unsafe fn fast_get_utf8_str_at(&self, idx: u32) -> Result { + let string_id = self.get_string_id(idx)?; + self.fast_get_utf8_str(string_id) + } + #[inline(always)] pub fn get_utf16_str_lossy(&self, string_id: &StringId) -> Result { let (_, data) = self.get_string_data(string_id)?; @@ -267,6 +290,11 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { Ok(&self.type_ids[idx as usize]) } + #[inline(always)] + pub fn type_id_idx(&self, item: &'a TypeId) -> Result { + self.offset_of(self.type_ids, item) + } + #[inline(always)] pub fn num_type_ids(&self) -> u32 { self.header.type_ids_size @@ -278,28 +306,22 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } #[inline(always)] - pub fn get_type_desc(&self, type_id: &TypeId) -> Result { - self.get_utf16_str_lossy_at(type_id.descriptor_idx) - } - - #[inline(always)] - pub fn get_type_desc_at(&self, idx: TypeIndex) -> Result { - self.get_type_desc(self.get_type_id(idx)?) - } - pub fn get_type_desc_utf16_lossy_at(&self, idx: TypeIndex) -> Result { let type_id = self.get_type_id(idx)?; self.get_utf16_str_lossy_at(type_id.descriptor_idx) } + #[inline(always)] pub fn get_type_desc_utf16_lossy(&self, type_id: &TypeId) -> Result { self.get_utf16_str_lossy_at(type_id.descriptor_idx) } + #[inline(always)] pub fn get_type_desc_utf16(&self, type_id: &TypeId) -> Result { self.get_utf16_str_at(type_id.descriptor_idx) } + #[inline(always)] pub fn get_type_desc_utf16_at(&self, idx: TypeIndex) -> Result { let type_id = self.get_type_id(idx)?; self.get_utf16_str_at(type_id.descriptor_idx) @@ -336,6 +358,11 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { Ok(&self.field_ids[idx as usize]) } + #[inline(always)] + pub fn field_id_idx(&self, item: &'a FieldId) -> Result { + self.offset_of(self.field_ids, item) + } + #[inline(always)] pub fn get_field_ids(&self) -> &'a [FieldId] { self.field_ids @@ -400,6 +427,11 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { Ok(&self.method_ids[idx as usize]) } + #[inline(always)] + pub fn method_id_idx(&self, item: &'a MethodId) -> Result { + self.offset_of(self.method_ids, item) + } + #[inline(always)] pub fn num_method_ids(&self) -> u32 { self.header.method_ids_size @@ -464,6 +496,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { + ca.insns_size_in_code_units() as usize; // must be 4-byte aligned let offset = (offset + 3) & !3; + check_lt_result!(offset, self.file_size(), TryItem); self.get_try_items_raw(offset as u32, ca.tries_size() as u16) } @@ -542,7 +575,23 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.get_type_list(class_def.interfaces_off) } - // type list related methods + #[inline] + fn offset_of(&self, buf: &[U], o: &T) -> Result { + let start = buf.as_ptr() as usize; + let target = o as *const _ as usize; + let end = buf.as_ptr() as usize + self.file_size(); + + if target < start || target > end { + return dex_err!(UnknownObjectRef { + offset: target, + start, + end + }); + } + + Ok(((target - start) / std::mem::size_of::()) as u32) + } + #[inline(always)] pub fn get_type_list(&self, offset: u32) -> Result>> { if offset == 0 { diff --git a/src/file/verifier.rs b/src/file/verifier.rs index 2a73867..23ca4ba 100644 --- a/src/file/verifier.rs +++ b/src/file/verifier.rs @@ -6,6 +6,7 @@ use super::{ DexContainer, DexFile, Header, HeaderV41, DEX_ENDIAN_CONSTANT, DEX_MAGIC, DEX_MAGIC_VERSIONS, }; +#[derive(Debug, PartialEq, Eq)] pub enum VerifyPreset { None, All, diff --git a/src/utf.rs b/src/utf.rs index 3688aae..23e39ed 100644 --- a/src/utf.rs +++ b/src/utf.rs @@ -1,4 +1,6 @@ +// TODO: these functions are highly unsafe and does not stand any chance against fuzzing + pub fn mutf8_to_str(utf8_data_in: &[u8]) -> crate::Result { let utf16_data = mutf8_to_utf16(utf8_data_in); Ok(String::from_utf16(&utf16_data)?) @@ -21,7 +23,7 @@ pub fn str_to_mutf8_lossy(str_data_in: &str) -> Vec { } #[inline] -pub fn utf16_from_utf8(utf8_data_in: &[u8], offset: &mut usize) -> u32 { +fn utf16_from_utf8(utf8_data_in: &[u8], offset: &mut usize) -> u32 { let one = utf8_data_in[*offset]; *offset += 1; if one & 0x80 == 0 { @@ -54,37 +56,37 @@ pub fn utf16_from_utf8(utf8_data_in: &[u8], offset: &mut usize) -> u32 { } #[inline(always)] -pub fn trailing_utf16_char(maybe_pair: u32) -> u16 { +fn trailing_utf16_char(maybe_pair: u32) -> u16 { (maybe_pair >> 16) as u16 } #[inline(always)] -pub fn leading_utf16_char(maybe_pair: u32) -> u16 { +fn leading_utf16_char(maybe_pair: u32) -> u16 { (maybe_pair & 0x0000FFFFF) as u16 } #[inline(always)] -pub fn is_lead(ch: u16) -> bool { +fn is_lead(ch: u16) -> bool { ch & 0xFC00 == 0xd800 } #[inline(always)] -pub fn is_trail(ch: u16) -> bool { +fn is_trail(ch: u16) -> bool { ch & 0xFC00 == 0xDC00 } #[inline(always)] -pub fn is_surrogate(ch: u16) -> bool { +fn is_surrogate(ch: u16) -> bool { ch & 0xF800 == 0xD800 } #[inline(always)] -pub fn is_surrogate_lead(ch: u16) -> bool { +fn is_surrogate_lead(ch: u16) -> bool { ch & 0x0400 == 0x00 } #[inline(always)] -pub fn get_supplementary(lead: u16, trail: u16) -> u32 { +fn get_supplementary(lead: u16, trail: u16) -> u32 { const OFFSET: u32 = (0xd800 << 10) + 0xdc00 - 0x10000; ((lead as u32) << 10) + (trail as u32) - OFFSET } @@ -119,13 +121,17 @@ pub fn mutf8_len(utf8_data_in: &[u8], utf8_in_len: usize) -> usize { len } -pub fn mutf8_to_utf16(utf8_data_in: &[u8]) -> Vec { +fn mutf8_to_utf16(utf8_data_in: &[u8]) -> Vec { + if utf8_data_in.is_empty() { + return Vec::new(); + } + let utf8_in_len = utf8_data_in.len() - 1; let out_chars = mutf8_len(utf8_data_in, utf8_in_len); convert_mutf8_to_utf16(utf8_data_in, utf8_in_len, out_chars) } -pub fn convert_mutf8_to_utf16( +fn convert_mutf8_to_utf16( utf8_data_in: &[u8], utf8_in_len: usize, out_chars: usize, @@ -150,7 +156,7 @@ pub fn convert_mutf8_to_utf16( utf16_data_out } -pub fn utf16_to_mutf8(utf16_in: &[u16], options: &Options) -> Vec { +fn utf16_to_mutf8(utf16_in: &[u16], options: &Options) -> Vec { let mut mutf8_len = 0; convert_utf16_to_mutf8(utf16_in, options, |_| mutf8_len += 1); From dad0ccb83679d53f3c87b3afdad7b49c54ef3bc6 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Mon, 10 Feb 2025 21:43:32 +0100 Subject: [PATCH 23/46] Added a check to make parsing mutf8 sequences fuzzing resistant (at least for now) --- + string_ids renamed to get_string_ids() + removed unnecessary import in container.rs + mutf8 withstands at least #33554432 fuzzing runs --- benches/parse.rs | 28 ++++++++++++++++++++++++++-- examples/dex_strings.rs | 2 +- src/error.rs | 5 +++++ src/file/container.rs | 2 +- src/file/mod.rs | 4 ++-- src/utf.rs | 32 +++++++++++++++++++++----------- 6 files changed, 56 insertions(+), 17 deletions(-) diff --git a/benches/parse.rs b/benches/parse.rs index c4140a9..419f2ec 100644 --- a/benches/parse.rs +++ b/benches/parse.rs @@ -1,4 +1,4 @@ -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_group, criterion_main, Criterion, black_box}; use dexrs::file::{verifier::VerifyPreset, DexFile, DexLocation, Header, InMemoryDexContainer}; fn parse_and_verify_small_file(c: &mut Criterion) { @@ -33,5 +33,29 @@ fn parse_small_file(c: &mut Criterion) { }); } -criterion_group!(benches, parse_and_verify_small_file, parse_small_file); +// REVISIT: this is not a real benchmark +macro_rules! parse_strings { + ($name:ident, $lossy:ident) => { + fn $name(c: &mut Criterion) { + let data = include_bytes!("../tests/prime/prime.dex"); + c.bench_function("parse_strings", |b| { + b.iter(|| { + let container = InMemoryDexContainer::new(data); + if let Ok(dex) = DexFile::from_raw_parts(&container, DexLocation::InMemory) { + for string_id in dex.get_string_ids() { + if let Ok(_) = dex.$lossy(string_id) { + black_box(string_id); + } + } + } + }) + }); + } + }; +} + +parse_strings!(parse_strings_lossy, get_utf16_str_lossy); +parse_strings!(parse_strings, get_utf16_str); + +criterion_group!(benches, parse_and_verify_small_file, parse_small_file, parse_strings_lossy, parse_strings); criterion_main!(benches); diff --git a/examples/dex_strings.rs b/examples/dex_strings.rs index 6bc3dee..fe1f65a 100644 --- a/examples/dex_strings.rs +++ b/examples/dex_strings.rs @@ -40,7 +40,7 @@ pub fn mutf8_strings() -> Result<()> { let name = utf::mutf8_to_str(data)?; // // 2. modified utf8 -> utf16 lossy - let name = utf::mutf8_to_str_lossy(data); + let name = utf::mutf8_to_str_lossy(data)?; // conversion back is also supported let mutf8_data = utf::str_to_mutf8(&name); diff --git a/src/error.rs b/src/error.rs index 481858e..b228f47 100644 --- a/src/error.rs +++ b/src/error.rs @@ -172,6 +172,11 @@ pub enum DexError { start: usize, end: usize, }, + + #[error( + "Got invalid mUTF8 encoded string that encodes up to {idx} characters with only {len} bytes" + )] + MalformedMUTF8Sequence { idx: usize, len: usize }, } #[macro_export] diff --git a/src/file/container.rs b/src/file/container.rs index 1017ee8..30d59ce 100644 --- a/src/file/container.rs +++ b/src/file/container.rs @@ -1,7 +1,7 @@ use memmap2::{MmapAsRawDesc, MmapMut}; use std::ops::{Deref, DerefMut}; -use crate::{file::MmapMutDexFile, Result}; +use crate::Result; use super::MmapDexFile; diff --git a/src/file/mod.rs b/src/file/mod.rs index 40d7a19..2e2eef2 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -217,7 +217,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } #[inline(always)] - pub fn string_ids(&self) -> &'a [StringId] { + pub fn get_string_ids(&self) -> &'a [StringId] { self.string_ids } @@ -263,7 +263,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { #[inline(always)] pub fn get_utf16_str_lossy(&self, string_id: &StringId) -> Result { let (_, data) = self.get_string_data(string_id)?; - Ok(utf::mutf8_to_str_lossy(data)) + utf::mutf8_to_str_lossy(data) } #[inline(always)] diff --git a/src/utf.rs b/src/utf.rs index 23e39ed..39bd8c3 100644 --- a/src/utf.rs +++ b/src/utf.rs @@ -1,14 +1,16 @@ // TODO: these functions are highly unsafe and does not stand any chance against fuzzing +use crate::{dex_err, Result, error::DexError}; + pub fn mutf8_to_str(utf8_data_in: &[u8]) -> crate::Result { - let utf16_data = mutf8_to_utf16(utf8_data_in); + let utf16_data = mutf8_to_utf16(utf8_data_in)?; Ok(String::from_utf16(&utf16_data)?) } -pub fn mutf8_to_str_lossy(utf8_data_in: &[u8]) -> String { - let utf16_data = mutf8_to_utf16(utf8_data_in); - String::from_utf16_lossy(&utf16_data) +pub fn mutf8_to_str_lossy(utf8_data_in: &[u8]) -> Result { + let utf16_data = mutf8_to_utf16(utf8_data_in)?; + Ok(String::from_utf16_lossy(&utf16_data)) } pub fn str_to_mutf8(str_data_in: &str) -> Vec { @@ -91,7 +93,7 @@ fn get_supplementary(lead: u16, trail: u16) -> u32 { ((lead as u32) << 10) + (trail as u32) - OFFSET } -pub fn mutf8_len(utf8_data_in: &[u8], utf8_in_len: usize) -> usize { +pub fn mutf8_len(utf8_data_in: &[u8], utf8_in_len: usize) -> Result { let mut len = 0; let mut in_idx = 0; while in_idx < utf8_in_len { @@ -118,17 +120,25 @@ pub fn mutf8_len(utf8_data_in: &[u8], utf8_in_len: usize) -> usize { in_idx += 1; len += 1; } - len + + if in_idx > utf8_in_len { + // This case happens when there are any invalid character sequences + return dex_err!(MalformedMUTF8Sequence { + idx: in_idx, + len: utf8_in_len + }) + } + Ok(len) } -fn mutf8_to_utf16(utf8_data_in: &[u8]) -> Vec { +fn mutf8_to_utf16(utf8_data_in: &[u8]) -> Result> { if utf8_data_in.is_empty() { - return Vec::new(); + return Ok(Vec::new()); } let utf8_in_len = utf8_data_in.len() - 1; - let out_chars = mutf8_len(utf8_data_in, utf8_in_len); - convert_mutf8_to_utf16(utf8_data_in, utf8_in_len, out_chars) + let out_chars = mutf8_len(utf8_data_in, utf8_in_len)?; + Ok(convert_mutf8_to_utf16(utf8_data_in, utf8_in_len, out_chars)) } fn convert_mutf8_to_utf16( @@ -251,6 +261,6 @@ mod tests { #[test] fn test_mutf8_to_str() { let data = &[102, 111, 111, 98, 97, 114, 0]; - assert_eq!(mutf8_to_str_lossy(data), "foobar".to_string()); + assert_eq!(mutf8_to_str_lossy(data).unwrap(), "foobar".to_string()); } } From 02b35f91ef4ba13a4a59e59b4022712517b6478d Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 12 Feb 2025 21:14:57 +0100 Subject: [PATCH 24/46] Added (rudimentary) support for Python extension --- Changes: + decode_leb128p1 will always return i32 from now on + Made file::header::Header cloneable Additions: + Python Extension using pyo3 (feature: "python") + Support is limited to parsing one file and fetching the header for now --- Cargo.toml | 11 ++- pyproject.toml | 21 +++++ python/dexrs/__init__.py | 1 + src/file/header.rs | 2 +- src/file/mod.rs | 60 ++++++++++---- src/leb128.rs | 2 +- src/lib.rs | 20 +++++ src/py/container.rs | 170 +++++++++++++++++++++++++++++++++++++++ src/py/error.rs | 31 +++++++ src/py/file.rs | 121 ++++++++++++++++++++++++++++ src/py/mod.rs | 16 ++++ src/py/structs.rs | 82 +++++++++++++++++++ 12 files changed, 519 insertions(+), 18 deletions(-) create mode 100644 pyproject.toml create mode 100644 python/dexrs/__init__.py create mode 100644 src/py/container.rs create mode 100644 src/py/error.rs create mode 100644 src/py/file.rs create mode 100644 src/py/mod.rs create mode 100644 src/py/structs.rs diff --git a/Cargo.toml b/Cargo.toml index 8daa2be..54a27de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,12 +8,21 @@ adler32 = "1.2.0" memmap2 = "0.9.5" openssl = "0.10.64" plain = "0.2.3" +pyo3 = { version = "0.23.4", optional = true, features = ["extension-module"] } thiserror = "2.0.11" varint-simd = "0.4.1" +[features] +default = [] +python = ["pyo3"] + +[lib] +name = "dexrs" +crate-type = ["cdylib", "rlib"] + [dev-dependencies] criterion = { version = "0.5", features = ["html_reports"] } [[bench]] name = "parse" -harness = false \ No newline at end of file +harness = false diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..eed46a7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["maturin>=1.8,<2.0"] +build-backend = "maturin" + +[project] +name = "dexrs" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version"] + + +[tool.maturin] +# allows us to use `cargo install --features python` directly +features = ["pyo3/extension-module", "dexrs/python"] +python-packages = ["dexrs"] +python-source = "python" +module-name = "dexrs._internal" \ No newline at end of file diff --git a/python/dexrs/__init__.py b/python/dexrs/__init__.py new file mode 100644 index 0000000..395a825 --- /dev/null +++ b/python/dexrs/__init__.py @@ -0,0 +1 @@ +from dexrs._internal import file, error, container \ No newline at end of file diff --git a/src/file/header.rs b/src/file/header.rs index eeb95a0..2b5ee46 100644 --- a/src/file/header.rs +++ b/src/file/header.rs @@ -1,5 +1,5 @@ #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Header { /// magic value magic: [u8; 8], diff --git a/src/file/mod.rs b/src/file/mod.rs index 2e2eef2..a92b1ff 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -19,7 +19,8 @@ pub mod dump; pub use container::*; pub mod annotations; pub use annotations::*; -use verifier::VerifyPreset; +pub mod debug; +pub use debug::*; use crate::{dex_err, error::DexError, leb128::decode_leb128, utf, Result}; @@ -33,6 +34,7 @@ pub const DEX_MAGIC_VERSIONS: &[&[u8]] = &[ ]; pub const DEX_ENDIAN_CONSTANT: u32 = 0x12345678; +pub const DEX_NO_INDEX: u32 = 0xffffffff; #[derive(Debug)] pub enum DexLocation { @@ -94,6 +96,34 @@ impl<'a, C: DexContainerMut<'a>> DexFile<'a, C> { //TODO } +macro_rules! fn_id { + ($name:ident, $attr:ident, $ret_ty:ty, $idx_ty:ty, $(#[$meta:meta])* ) => { + $(#[$meta])* + #[inline(always)] + pub fn $name(&self, idx: $idx_ty) -> Result<&'a $ret_ty> { + check_lt_result!(idx, self.$attr.len(), $ret_ty); + Ok(&self.$attr[idx as usize]) + } + }; + ($name:ident, $attr:ident, Option: $ret_ty:ty, $fallback:ident, $idx_ty:ty, $(#[$meta:meta])*) => { + $(#[$meta])* + #[inline(always)] + pub fn $name(&'a self, idx: $idx_ty) -> Result> { + match idx { + DEX_NO_INDEX => Ok(None), + _=> Ok(Some(self.$fallback(idx)?)), + } + } + }; + ($name:ident, $attr:ident, $ret_ty:ty[], $(#[$meta:meta])* ) => { + $(#[$meta])* + #[inline(always)] + pub fn $name(&'a self) -> &'a [$ret_ty] { + &self.$attr + } + } +} + impl<'a, C: DexContainer<'a>> DexFile<'a, C> { #[inline] fn header_available(base: &'a C) -> bool { @@ -160,9 +190,9 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { DexLocation::Path(loc.to_string()), if container.verify_checksum { // currenlty supports only checksum - VerifyPreset::ChecksumOnly + verifier::VerifyPreset::ChecksumOnly } else { - VerifyPreset::None + verifier::VerifyPreset::None }, ) } @@ -170,11 +200,11 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { pub fn open( container: &'a C, location: DexLocation, - verify_preset: VerifyPreset, + verify_preset: verifier::VerifyPreset, ) -> Result> { let dex = DexFile::from_raw_parts(container, location)?; dex.init()?; - if verify_preset != VerifyPreset::None { + if verify_preset != verifier::VerifyPreset::None { DexFile::verify(&dex, verify_preset)?; } Ok(dex) @@ -202,25 +232,25 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.mmap.len() } + #[inline(always)] + pub fn get_header(&self) -> &'a Header { + &self.header + } + // ------------------------------------------------------------------------------ // strings // ------------------------------------------------------------------------------ - #[inline(always)] - pub fn get_string_id(&self, idx: u32) -> Result<&'a StringId> { - check_lt_result!(idx, self.string_ids.len(), StringId); - Ok(&self.string_ids[idx as usize]) - } + + // TODO: add docs + fn_id!(get_string_id, string_ids, StringId, u32,); + fn_id! {get_string_id_opt, string_ids, Option: StringId, get_string_id, u32,} + fn_id!(get_string_ids, string_ids, StringId[],); #[inline(always)] pub fn string_id_idx(&self, item: &'a StringId) -> Result { self.offset_of(self.string_ids, item) } - #[inline(always)] - pub fn get_string_ids(&self) -> &'a [StringId] { - self.string_ids - } - #[inline(always)] pub fn num_string_ids(&self) -> u32 { self.header.string_ids_size diff --git a/src/leb128.rs b/src/leb128.rs index 5690b65..d31842f 100644 --- a/src/leb128.rs +++ b/src/leb128.rs @@ -9,7 +9,7 @@ pub fn decode_leb128(data_in: &[u8]) -> Result<(T, } #[inline(always)] -pub fn decode_leb128p1(data_in: &[u8]) -> Result<(i32, usize)> { +pub fn decode_leb128p1(data_in: &[u8]) -> Result<(i32, usize)> { let (result, size) = decode_leb128::(data_in)?; Ok(((result - 1) as i32, size)) } diff --git a/src/lib.rs b/src/lib.rs index a06f313..bc33043 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,3 +8,23 @@ pub mod utf; pub mod desc_names; pub type Result = result::Result; + +#[cfg(feature = "python")] +pub(crate) mod py; + +#[cfg(feature = "python")] +#[pyo3::pymodule] +mod _internal { + + #[pymodule_export] + use crate::py::container::container_mod; + + #[pymodule_export] + use crate::py::file::file_mod; + + #[pymodule_export] + use crate::py::error::error; + + #[pymodule_export] + use crate::py::structs::structs; +} \ No newline at end of file diff --git a/src/py/container.rs b/src/py/container.rs new file mode 100644 index 0000000..7890aef --- /dev/null +++ b/src/py/container.rs @@ -0,0 +1,170 @@ +use std::{ops::Deref, sync::Arc}; + +use pyo3::{exceptions::PyNotImplementedError, types::PyBytes, Py, PyRef, PyResult, Python}; + +use crate::file::DexContainer; + +use super::error::GenericError; + +#[pyo3::pyclass(name = "DexContainer", module = "dexrs._internal.container", subclass)] +pub struct PyDexContainer {} + +#[pyo3::pymethods] +impl PyDexContainer { + #[new] + pub fn new() -> Self { + PyDexContainer {} + } + + pub fn data(&self) -> PyResult<&[u8]> { + Err(PyNotImplementedError::new_err("foobar")) + } + + pub fn file_size(&self) -> PyResult { + Err(PyNotImplementedError::new_err("foobar")) + } +} + +// custom implementation of DexFileContainer to support python values + +#[pyo3::pyclass( + name = "InMemoryDexContainer", + module = "dexrs._internal.container", + extends = PyDexContainer, + subclass +)] +pub struct PyInMemoryDexContainer { + data: Py, + length: usize, +} + +impl AsRef<[u8]> for PyInMemoryDexContainer { + #[inline] + fn as_ref(&self) -> &[u8] { + Python::with_gil(|py| self.data.as_bytes(py)) + } +} + +impl Deref for PyInMemoryDexContainer { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} + +impl DexContainer<'_> for PyInMemoryDexContainer {} + +impl PyInMemoryDexContainer { + pub fn open<'py>(py: Python, data: Py) -> Self { + Self { + data: data.clone_ref(py), + length: data.as_bytes(py).len(), + } + } +} + +#[pyo3::pymethods] +impl PyInMemoryDexContainer { + #[new] + pub fn new<'py>( + py: Python<'py>, + data: Py, + ) -> PyResult<(PyInMemoryDexContainer, PyDexContainer)> { + Ok(( + PyInMemoryDexContainer::open(py, data), + PyDexContainer::new(), + )) + } + + // TODO: measure performance overhead if data is huge + pub fn data<'py>(&self, py: Python<'py>) -> PyResult> { + Ok(self.data.clone_ref(py)) + } + + #[getter] + pub fn file_size(&self) -> PyResult { + Ok(self.length) + } + + pub fn __len__(py_self: PyRef<'_, Self>) -> usize { + py_self.length + } +} + +#[pyo3::pyclass( + name = "FileDexContainer", + module = "dexrs._internal.container", + extends = PyDexContainer, + subclass +)] +pub struct PyFileDexContainer { + path: String, + _fp: std::fs::File, + data: Arc, +} + +impl AsRef<[u8]> for PyFileDexContainer { + #[inline] + fn as_ref(&self) -> &[u8] { + self.data.as_ref() + } +} + +impl Deref for PyFileDexContainer { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} + +impl DexContainer<'_> for PyFileDexContainer {} + +impl PyFileDexContainer { + pub fn open(path: String) -> Result { + let fp = std::fs::File::open(path.clone())?; + let mmap = unsafe { memmap2::Mmap::map(&fp)? }; + Ok(PyFileDexContainer { + path, + _fp: fp, + data: Arc::new(mmap), + }) + } +} + +#[pyo3::pymethods] +impl PyFileDexContainer { + #[new] + pub fn new(path: String) -> PyResult<(Self, PyDexContainer)> { + Ok(( + PyFileDexContainer::open(path)?, + PyDexContainer::new(), + )) + } + + pub fn data<'py>(&self, py: Python<'py>) -> PyResult> { + Ok(PyBytes::new(py, self.data.as_ref()).into()) + } + + #[getter] + pub fn file_size(&self) -> PyResult { + Ok(self.data.len()) + } + + #[getter] + pub fn location(&self) -> PyResult { + Ok(self.path.clone()) + } + + pub fn __len__(&self) -> usize { + self.data.len() + } +} + +#[pyo3::pymodule(name = "container")] +pub(crate) mod container_mod { + + #[pymodule_export] + use super::{PyDexContainer, PyFileDexContainer, PyInMemoryDexContainer}; +} diff --git a/src/py/error.rs b/src/py/error.rs new file mode 100644 index 0000000..3474f36 --- /dev/null +++ b/src/py/error.rs @@ -0,0 +1,31 @@ +use pyo3::{create_exception, exceptions::{PyIOError, PyRuntimeError}, PyErr}; + +use crate::error::DexError; + +create_exception!(dexrs._internal.error, PyDexError, PyRuntimeError); + +impl From for PyErr { + fn from(err: DexError) -> PyErr { + PyDexError::new_err(err.to_string()) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum GenericError { + #[error(transparent)] + IOError(#[from] std::io::Error), +} + +impl From for PyErr { + fn from(err: GenericError) -> PyErr { + PyIOError::new_err(err.to_string()) + } +} + +#[pyo3::pymodule] +pub(crate) mod error { + + #[pymodule_export] + use super::PyDexError; + +} \ No newline at end of file diff --git a/src/py/file.rs b/src/py/file.rs new file mode 100644 index 0000000..8a50bea --- /dev/null +++ b/src/py/file.rs @@ -0,0 +1,121 @@ +use std::{ + borrow::Borrow, + sync::{Arc, Mutex}, +}; + +use pyo3::{types::PyBytes, Py, PyResult, Python}; + +use crate::file::{verifier::VerifyPreset, DexFile, DexLocation, Header}; + +use super::{ + arc_mutex_get, + container::{PyFileDexContainer, PyInMemoryDexContainer}, + structs::PyHeader, +}; + +#[pyo3::pyclass(name = "VerifyPreset", module = "dexrs._internal.file", eq, eq_int)] +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum PyVerifyPreset { + ALL = 1, + CHECKSUM_ONLY = 2, + NONE = 3, +} + +impl Into for PyVerifyPreset { + fn into(self) -> VerifyPreset { + match self { + PyVerifyPreset::ALL => VerifyPreset::All, + PyVerifyPreset::CHECKSUM_ONLY => VerifyPreset::ChecksumOnly, + PyVerifyPreset::NONE => VerifyPreset::None, + } + } +} + +// lifetime annotation can't be removed for now +pub type PyInMemoryDexFile<'a> = DexFile<'a, PyInMemoryDexContainer>; +pub type PyDexFile<'a> = DexFile<'a, PyFileDexContainer>; + +// REVISIT: there's currently no other way to store the dex file +pub enum RsDexFile { + InMemory { + dex: PyInMemoryDexFile<'static>, + container: PyInMemoryDexContainer, + }, + File { + dex: PyDexFile<'static>, + container: PyFileDexContainer, + }, +} + +// Python wrapper class that enables mutli-threading operations +#[pyo3::pyclass(name = "DexFile", module = "dexrs._internal.file")] +pub struct PyDexFileImpl { + inner: Arc, +} + +macro_rules! bind_dex { + ($dex_file:ident, $dex_type:ident, $c:ident) => {{ + let static_dex = unsafe { std::mem::transmute($dex_file) }; + let inner = RsDexFile::$dex_type { + container: $c, + dex: static_dex, + }; + PyDexFileImpl { + inner: Arc::new(inner), + } + }}; +} + +impl PyDexFileImpl {} + +macro_rules! dex_action_impl { + ($this:ident, $method:ident) => {{ + match $this.inner.as_ref() { + RsDexFile::InMemory { dex, .. } => dex.$method(), + RsDexFile::File { dex, .. } => dex.$method(), + } + }}; +} + +#[pyo3::pymethods] +impl PyDexFileImpl { + #[staticmethod] + #[pyo3(signature = ( + data, + preset=PyVerifyPreset::ALL + ))] + pub fn from_bytes<'py>( + py: Python<'py>, + data: Py, + preset: PyVerifyPreset, + ) -> PyResult { + let preset = preset.into(); + let container = PyInMemoryDexContainer::open(py, data); + let dex = PyInMemoryDexFile::open(&container, DexLocation::InMemory, preset)?; + Ok(bind_dex!(dex, InMemory, container)) + } + + #[staticmethod] + #[pyo3(signature = ( + path, + preset=PyVerifyPreset::ALL + ))] + pub fn from_file(path: String, preset: PyVerifyPreset) -> PyResult { + let preset = preset.into(); + let container = PyFileDexContainer::open(path.clone())?; + let dex = PyDexFile::open(&container, DexLocation::Path(path), preset)?; + Ok(bind_dex!(dex, File, container)) + } + + pub fn get_header(&self) -> PyResult { + Ok(dex_action_impl!(self, get_header).into()) + } +} + +// final module +#[pyo3::pymodule(name = "file")] +pub(crate) mod file_mod { + + #[pymodule_export] + use super::{PyDexFileImpl, PyVerifyPreset}; +} diff --git a/src/py/mod.rs b/src/py/mod.rs new file mode 100644 index 0000000..f8f8524 --- /dev/null +++ b/src/py/mod.rs @@ -0,0 +1,16 @@ +pub(crate) mod container; +pub(crate) mod file; +pub(crate) mod error; +pub(crate) mod structs; + + +pub(crate) type ArcMutex = std::sync::Arc>; + + +macro_rules! arc_mutex_get { + ($value:expr) => { + $value.lock().unwrap() + }; +} + +pub(crate) use arc_mutex_get; \ No newline at end of file diff --git a/src/py/structs.rs b/src/py/structs.rs new file mode 100644 index 0000000..8a9affb --- /dev/null +++ b/src/py/structs.rs @@ -0,0 +1,82 @@ +use std::sync::{Arc, Mutex}; + +use crate::file::Header; + +macro_rules! py_struct_wrapper { + ($name:literal, $py_type:ident, $rust_type:ident) => { + #[pyo3::pyclass(name = $name, module = "dexrs._internal.structs")] + pub struct $py_type(pub Arc<$rust_type>); + + impl<'a> From<&'a $rust_type> for $py_type { + fn from(value: &'a $rust_type) -> Self { + $py_type(Arc::new(value.clone())) + } + } + }; +} + +macro_rules! py_struct_fields { + ($py_type:ident, { $(($name:ident, $rtype:ty),)+ }, $($extra:tt)*) => { + #[pyo3::pymethods] + impl $py_type { + $( + #[getter] + pub fn $name(&self) -> $rtype { + self.0.$name + } + )+ + + $( + $extra + )* + } + }; +} + +py_struct_wrapper!("Header", PyHeader, Header); + +py_struct_fields!(PyHeader, { + (checksum, u32), + (file_size, u32), + (header_size, u32), + (endian_tag, u32), + (link_size, u32), + (link_off, u32), + (string_ids_size, u32), + (string_ids_off, u32), + (type_ids_size, u32), + (type_ids_off, u32), + (proto_ids_size, u32), + (proto_ids_off, u32), + (field_ids_size, u32), + (field_ids_off, u32), + (method_ids_size, u32), + (method_ids_off, u32), + (class_defs_size, u32), + (class_defs_off, u32), + (data_size, u32), + (data_off, u32), +}, + +#[getter] +pub fn signature(&self) -> Vec { + self.0.get_signature().to_vec() +} + +#[getter] +pub fn version_int(&self) -> u32 { + self.0.get_version() +} + +#[getter] +pub fn get_magic(&self) -> Vec { + self.0.get_magic().to_vec() +} +); + +#[pyo3::pymodule] +pub(crate) mod structs { + + #[pymodule_export] + use super::PyHeader; +} From f7fe1e05514e5b0ab0fcc333a7e05c71d65dd3b7 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 12 Feb 2025 22:56:58 +0100 Subject: [PATCH 25/46] Removed obsolte main --- src/main.rs | 92 ----------------------------------------------------- 1 file changed, 92 deletions(-) delete mode 100644 src/main.rs diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 509353d..0000000 --- a/src/main.rs +++ /dev/null @@ -1,92 +0,0 @@ -use dexrs::file::dump::prettify; -use dexrs::file::{vreg, DexFile, DexFileContainer, Field, InMemoryDexContainer, Method}; -use dexrs::Result; -// use dexrs::art::dex::file::Header; - -// fn main() -> Result<(), dexrs::art::error::Error> { - -// let file = std::fs::File::open(".vscode/classes.dex").unwrap(); -// let mmap = unsafe { memmap2::Mmap::map(&file)? }; - -// let header = Header::from_bytes(&mmap).unwrap(); -// println!("Version: {}", header.get_version_or(0)); -// println!("{:?}", header); -// Ok(()) -// } - -fn main() -> Result<()> { - let path = ".vscode/classes.dex"; - let file = std::fs::File::open(&path).unwrap(); - let container = DexFileContainer::new(&file) - .verify(true) - .verify_checksum(true); - - let dex = container.open()?; - println!("{:?}", dex.get_string_id(0)?); - - // println!("=== Types ==="); - // for type_id in dex.get_type_ids() { - // let name = dex.get_type_desc_utf16_lossy(type_id); - // println!("{}", name); - // } - - // println!("=== Fields ==="); - // for field_id in dex.get_field_ids() { - // let cls_name = dex.get_type_desc_utf16_lossy_at(field_id.class_idx)?; - // let type_name = dex.get_type_desc_utf16_lossy_at(field_id.type_idx)?; - // let name = dex.get_utf16_str_lossy_at(field_id.name_idx)?; - - // println!(".field {}->{}:{}", cls_name, name, type_name); - // } - - let class_def = dex.get_class_def(122)?; - let name = dex.get_type_desc_utf16_lossy_at(class_def.class_idx)?; - println!("Class name: {}", name); - - if let Some(interfaces) = dex.get_type_list(class_def.interfaces_off)? { - println!("Interfaces:"); - for interface in interfaces { - let name = dex.get_type_desc_utf16_lossy_at(interface.type_idx)?; - println!(".implements {}", name); - } - } - - let class_data = dex - .get_class_accessor(class_def) - .expect("msg") - .expect("msg"); - println!("Static Methods: {}", class_data.num_direct_methods); - let fields: Vec = class_data.get_fields().collect(); - - for field in fields { - println!( - ".field {}", - dex.pretty_field(field.index, prettify::Field::WithType) - ); - } - - // for method in fields { - // let ca = dex.get_code_item_accessor(method.code_offset)?; - // let insn = ca.insn_at(0); - // println!("Insn: {:?}", insn.to_string(Some(&dex))?); - // } - - let methods: Vec = class_data.get_methods()?.collect(); - for method in methods { - println!( - ".method {}", - dex.pretty_method_at(method.index, prettify::Method::WithSig) - ); - - let ca = dex.get_code_item_accessor(method.code_offset)?; - println!(" .registers {}\n", ca.registers_size()); - - for inst in &ca { - println!("|{:#08x}| {}", ca.get_inst_offset_in_code_units(&inst), inst.to_string(Some(&dex))?); - } - println!(".end method\n"); - break; - } - - Ok(()) -} From 6a4f1e4f85347c0582e2ded68d4875af8841c789 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Thu, 13 Feb 2025 17:46:57 +0100 Subject: [PATCH 26/46] Updated Python API to always utilize container objects --- Changed: + Splitted decode_leb128_off into decode_leb128_adv and decode_leb128_off with slightly different behaviour + DexFile::get_opt applied boundry check using ::MAX instead of u32::MAX + Most structs in dexrs::file::structs are now cloneable + PyDexFileImpl can be created bypassing dex container instances only Additions: + DebugInfo parser (parameter names and positions) --- python/dexrs/py.typed | 0 src/file/class_accessor.rs | 19 +++-- src/file/debug.rs | 138 +++++++++++++++++++++++++++++++++++++ src/file/mod.rs | 71 +++++++++---------- src/file/structs.rs | 16 +++-- src/leb128.rs | 19 ++++- src/py/container.rs | 45 +++++------- src/py/file.rs | 132 +++++++++++++++++++++++++++-------- src/py/mod.rs | 11 --- src/py/structs.rs | 89 ++++++++++++++++++++++-- 10 files changed, 414 insertions(+), 126 deletions(-) create mode 100644 python/dexrs/py.typed create mode 100644 src/file/debug.rs diff --git a/python/dexrs/py.typed b/python/dexrs/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/file/class_accessor.rs b/src/file/class_accessor.rs index 87b3386..5a6a75e 100644 --- a/src/file/class_accessor.rs +++ b/src/file/class_accessor.rs @@ -47,7 +47,7 @@ impl<'a> Method { impl<'a> ClassItemBase for Method { fn read(&mut self, data: &'_ [u8], pos: &mut usize) -> Result<()> { let target = self.index as usize; - let value = decode_leb128_off::(&data[*pos..], pos)?; + let value = decode_leb128_off::(&data, pos)?; if target + value as usize > u32::MAX as usize { return dex_err!(BadEncodedIndex { index: self.index, @@ -56,8 +56,8 @@ impl<'a> ClassItemBase for Method { }); } self.index += value; - self.access_flags = decode_leb128_off::(&data[*pos..], pos)?; - self.code_offset = decode_leb128_off::(&data[*pos..], pos)?; + self.access_flags = decode_leb128_off::(&data, pos)?; + self.code_offset = decode_leb128_off::(&data, pos)?; Ok(()) } @@ -93,7 +93,7 @@ impl<'a> Field { impl<'a> ClassItemBase for Field { fn read(&mut self, data: &'_ [u8], pos: &mut usize) -> Result<()> { let target = self.index as usize; - let value = decode_leb128_off::(&data[*pos..], pos)?; + let value = decode_leb128_off::(&data, pos)?; if target + value as usize > u32::MAX as usize { return dex_err!(BadEncodedIndex { index: self.index, @@ -102,7 +102,7 @@ impl<'a> ClassItemBase for Field { }); } self.index += value; - self.access_flags = decode_leb128_off::(&data[*pos..], pos)?; + self.access_flags = decode_leb128_off::(&data, pos)?; Ok(()) } @@ -175,12 +175,9 @@ impl<'a> ClassAccessor<'a> { static_fields_off: 0, }; accessor.num_static_fields = decode_leb128_off(&class_data, &mut accessor.ptr_pos)?; - accessor.num_instance_fields = - decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos)?; - accessor.num_direct_methods = - decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos)?; - accessor.num_virtual_methods = - decode_leb128_off(&class_data[accessor.ptr_pos..], &mut accessor.ptr_pos)?; + accessor.num_instance_fields = decode_leb128_off(&class_data, &mut accessor.ptr_pos)?; + accessor.num_direct_methods = decode_leb128_off(&class_data, &mut accessor.ptr_pos)?; + accessor.num_virtual_methods = decode_leb128_off(&class_data, &mut accessor.ptr_pos)?; accessor.static_fields_off = accessor.ptr_pos as u32; Ok(accessor) } diff --git a/src/file/debug.rs b/src/file/debug.rs new file mode 100644 index 0000000..86b7149 --- /dev/null +++ b/src/file/debug.rs @@ -0,0 +1,138 @@ +use crate::{ + leb128::{decode_leb128_off, decode_leb128p1_off}, + Result, +}; + +pub enum SourceFile { + This, + Other(u32), // index to file +} + +#[rustfmt::skip] +pub mod code { + pub const DBG_END_SEQUENCE: u8 = 0x00; + pub const DBG_ADVANCE_PC: u8 = 0x01; + pub const DBG_ADVANCE_LINE: u8 = 0x02; + pub const DBG_START_LOCAL: u8 = 0x03; + pub const DBG_START_LOCAL_EXTENDED: u8 = 0x04; + pub const DBG_END_LOCAL: u8 = 0x05; + pub const DBG_RESTART_LOCAL: u8 = 0x06; + pub const DBG_SET_PROLOGUE_END: u8 = 0x07; + pub const DBG_SET_EPILOGUE_BEGIN: u8 = 0x08; + pub const DBG_SET_FILE: u8 = 0x09; + + pub const DBG_FIRST_SPECIAL: u8 = 0x0a; + pub const DBG_LINE_BASE: u8 = (-4 as i8) as u8; + pub const DBG_LINE_RANGE: u8 = 15; +} + +pub struct PositionInfo { + pub address: u32, + pub line: u32, + pub file: SourceFile, + prologue_end: bool, + epilogue_begin: bool, +} + +impl PositionInfo { + pub fn new() -> Self { + Self { + address: 0, + line: 0, + file: SourceFile::This, + prologue_end: false, + epilogue_begin: false, + } + } +} + +pub struct CodeItemDebugInfoAccessor<'a> { + ptr: &'a [u8], +} + +impl<'a> CodeItemDebugInfoAccessor<'a> { + pub fn new(ptr: &'a [u8]) -> Self { + Self { ptr } + } + + pub fn visit_parameter_names(&self, visitor: F) -> Result<()> + where + F: Fn(u32), + { + let mut offset = 0; + self.decode_parameter_names(visitor, &mut offset)?; + Ok(()) + } + + fn decode_parameter_names(&self, visitor: F, offset: &mut usize) -> Result + where + F: Fn(u32), + { + let line = decode_leb128_off(&self.ptr, offset)?; + let size = decode_leb128_off::(&self.ptr, offset)?; + + for _ in 0..size { + let index = decode_leb128p1_off(&self.ptr, offset)?; + visitor(index as u32); + } + Ok(line) + } + + pub fn decode_position_info(&self, pos_visitor: F) -> Result<()> + where + F: Fn(&PositionInfo), + { + let mut entry = PositionInfo::new(); + let mut offset = 0; + entry.line = self.decode_parameter_names(|_| {}, &mut offset)?; + + loop { + let opcode = self.ptr[offset]; + offset += 1; + + match opcode { + code::DBG_END_SEQUENCE => break, + // This will cause overflow + code::DBG_ADVANCE_PC => { + entry.address += decode_leb128_off::(&self.ptr, &mut offset)? + } + code::DBG_ADVANCE_LINE => { + entry.line += decode_leb128_off::(&self.ptr, &mut offset)? + } + code::DBG_START_LOCAL => { + decode_leb128_off::(&self.ptr, &mut offset)?; // reg + decode_leb128p1_off(&self.ptr, &mut offset)?; // name + decode_leb128p1_off(&self.ptr, &mut offset)?; // descriptor + } + code::DBG_START_LOCAL_EXTENDED => { + decode_leb128_off::(&self.ptr, &mut offset)?; // reg + decode_leb128p1_off(&self.ptr, &mut offset)?; // name + decode_leb128p1_off(&self.ptr, &mut offset)?; // descriptor + decode_leb128p1_off(&self.ptr, &mut offset)?; // signature + } + code::DBG_END_LOCAL | code::DBG_RESTART_LOCAL => { + decode_leb128_off::(&self.ptr, &mut offset)?; // reg + } + code::DBG_SET_PROLOGUE_END => entry.prologue_end = true, + code::DBG_SET_EPILOGUE_BEGIN => entry.epilogue_begin = true, + code::DBG_SET_FILE => { + let file = decode_leb128p1_off(&self.ptr, &mut offset)?; // file + entry.file = SourceFile::Other(file as u32); + } + _ => { + let adjusted_opcode = opcode - code::DBG_FIRST_SPECIAL; + entry.address += (adjusted_opcode / code::DBG_LINE_RANGE) as u32; + entry.line += + (code::DBG_LINE_BASE + (adjusted_opcode % code::DBG_LINE_RANGE)) as u32; + pos_visitor(&entry); + entry.epilogue_begin = false; + entry.prologue_end = false; + } + } + } + Ok(()) + } + + // TODO + // pub fn decode_local_info(&self, visitor: F) +} diff --git a/src/file/mod.rs b/src/file/mod.rs index a92b1ff..a62c656 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -105,12 +105,12 @@ macro_rules! fn_id { Ok(&self.$attr[idx as usize]) } }; - ($name:ident, $attr:ident, Option: $ret_ty:ty, $fallback:ident, $idx_ty:ty, $(#[$meta:meta])*) => { + ($name:ident, $attr:ident, Option: $ret_ty:ty, $fallback:ident, $idx_ty:ident, $(#[$meta:meta])*) => { $(#[$meta])* #[inline(always)] pub fn $name(&'a self, idx: $idx_ty) -> Result> { match idx { - DEX_NO_INDEX => Ok(None), + $idx_ty::MAX => Ok(None), _=> Ok(Some(self.$fallback(idx)?)), } } @@ -121,6 +121,13 @@ macro_rules! fn_id { pub fn $name(&'a self) -> &'a [$ret_ty] { &self.$attr } + }; + ($name:ident, $attr:ident, Idx: $ref_ty:ty, $(#[$meta:meta])* ) => { + $(#[$meta])* + #[inline(always)] + pub fn $name(&'a self, item: &'a $ref_ty) -> Result { + self.offset_of(self.$attr, item) + } } } @@ -243,13 +250,9 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { // TODO: add docs fn_id!(get_string_id, string_ids, StringId, u32,); - fn_id! {get_string_id_opt, string_ids, Option: StringId, get_string_id, u32,} fn_id!(get_string_ids, string_ids, StringId[],); - - #[inline(always)] - pub fn string_id_idx(&self, item: &'a StringId) -> Result { - self.offset_of(self.string_ids, item) - } + fn_id! {get_string_id_opt, string_ids, Option: StringId, get_string_id, u32,} + fn_id! {string_id_idx, string_ids, Idx: StringId, } #[inline(always)] pub fn num_string_ids(&self) -> u32 { @@ -309,32 +312,24 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } #[inline(always)] - pub fn get_utf16_str_at(&self, idx: u32) -> Result { + pub fn get_utf16_str_at(&self, idx: StringIndex) -> Result { let string_id = self.get_string_id(idx)?; self.get_utf16_str(string_id) } - #[inline(always)] - pub fn get_type_id(&self, idx: TypeIndex) -> Result<&'a TypeId> { - check_lt_result!(idx as u32, self.num_type_ids(), TypeId); - Ok(&self.type_ids[idx as usize]) - } - - #[inline(always)] - pub fn type_id_idx(&self, item: &'a TypeId) -> Result { - self.offset_of(self.type_ids, item) - } + // ------------------------------------------------------------------------------ + // types + // ------------------------------------------------------------------------------ + fn_id!(get_type_id, type_ids, TypeId, TypeIndex,); + fn_id!(get_type_ids, type_ids, TypeId[],); + fn_id! {type_id_idx, type_ids, Idx: TypeId, } + fn_id! {get_type_id_opt, type_ids, Option: TypeId, get_type_id, TypeIndex,} #[inline(always)] pub fn num_type_ids(&self) -> u32 { self.header.type_ids_size } - #[inline(always)] - pub fn get_type_ids(&self) -> &'a [TypeId] { - self.type_ids - } - #[inline(always)] pub fn get_type_desc_utf16_lossy_at(&self, idx: TypeIndex) -> Result { let type_id = self.get_type_id(idx)?; @@ -381,27 +376,33 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.non_null_array_data_ptr(code_off, size_in_code_units as usize) } - // -- fields - #[inline] - pub fn get_field_id(&self, idx: u32) -> Result<&'a FieldId> { - check_lt_result!(idx, self.field_ids.len(), FieldId); - Ok(&self.field_ids[idx as usize]) - } + // ------------------------------------------------------------------------------ + // field ids + // ------------------------------------------------------------------------------ + fn_id!(get_field_id, field_ids, FieldId, FieldIndex,); + fn_id!(get_field_ids, field_ids, FieldId[],); + fn_id! {field_id_idx, field_ids, Idx: FieldId, } + fn_id! {get_field_id_opt, field_ids, Option: FieldId, get_field_id, FieldIndex,} #[inline(always)] - pub fn field_id_idx(&self, item: &'a FieldId) -> Result { - self.offset_of(self.field_ids, item) + pub fn num_field_ids(&self) -> u32 { + self.header.field_ids_size } #[inline(always)] - pub fn get_field_ids(&self) -> &'a [FieldId] { - self.field_ids + pub fn get_field_name(&self, field_id: &FieldId) -> Result { + self.get_utf16_str_lossy_at(field_id.name_idx) } - pub fn get_field_name(&self, field_id: &FieldId) -> Result { + #[inline(always)] + pub fn get_field_name_at(&self, idx: FieldIndex) -> Result { + let field_id = self.get_field_id(idx)?; self.get_utf16_str_lossy_at(field_id.name_idx) } + // ------------------------------------------------------------------------------ + // field ids + // ------------------------------------------------------------------------------ // Proto related methods pub fn get_proto_id(&self, idx: ProtoIndex) -> Result<&'a ProtoId> { check_lt_result!(idx, self.proto_ids.len(), ProtoId); diff --git a/src/file/structs.rs b/src/file/structs.rs index 1ed445b..b9fafd9 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -3,7 +3,7 @@ use plain::Plain; pub type StringIndex = u32; #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct StringId { pub string_data_off: u32, } @@ -20,15 +20,17 @@ impl StringId { pub type TypeIndex = u16; #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct TypeId { pub descriptor_idx: StringIndex, } unsafe impl plain::Plain for TypeId {} +pub type FieldIndex = u32; + #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct FieldId { pub class_idx: TypeIndex, // index into type_ids_ array for defining class pub type_idx: TypeIndex, // index into type_ids_ array for field type @@ -40,7 +42,7 @@ unsafe impl plain::Plain for FieldId {} pub type ProtoIndex = u16; #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ProtoId { pub shorty_idx: StringIndex, // index into string_ids array for shorty descriptor pub return_type_idx: TypeIndex, // index into type_ids array for return type @@ -51,7 +53,7 @@ pub struct ProtoId { unsafe impl plain::Plain for ProtoId {} #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct MethodId { pub class_idx: TypeIndex, // index into type_ids_ array for defining class pub proto_idx: ProtoIndex, // index into proto_ids_ array for method signature @@ -61,7 +63,7 @@ pub struct MethodId { unsafe impl plain::Plain for MethodId {} #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ClassDef { pub class_idx: TypeIndex, // index into type_ids_ array for this class pad1_: u16, // padding = 0 @@ -78,7 +80,7 @@ pub struct ClassDef { unsafe impl plain::Plain for ClassDef {} #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct TypeItem { pub type_idx: TypeIndex, // index into type_ids section } diff --git a/src/leb128.rs b/src/leb128.rs index d31842f..24008aa 100644 --- a/src/leb128.rs +++ b/src/leb128.rs @@ -15,7 +15,7 @@ pub fn decode_leb128p1(data_in: &[u8]) -> Result<(i32, usize)> { } #[inline(always)] -pub fn decode_leb128_off( +pub fn decode_leb128_adv( data_in: &[u8], ptr_pos: &mut usize, ) -> Result { @@ -23,3 +23,20 @@ pub fn decode_leb128_off( *ptr_pos += size; Ok(value) } + +#[inline(always)] +pub fn decode_leb128_off( + data_in: &[u8], + ptr_pos: &mut usize, +) -> Result { + let (value, size) = decode_leb128(&data_in[*ptr_pos..])?; + *ptr_pos += size; + Ok(value) +} + +#[inline(always)] +pub fn decode_leb128p1_off(data_in: &[u8], ptr_pos: &mut usize) -> Result { + let (value, size) = decode_leb128p1(&data_in[*ptr_pos..])?; + *ptr_pos += size; + Ok(value) +} diff --git a/src/py/container.rs b/src/py/container.rs index 7890aef..d276aa9 100644 --- a/src/py/container.rs +++ b/src/py/container.rs @@ -30,18 +30,17 @@ impl PyDexContainer { #[pyo3::pyclass( name = "InMemoryDexContainer", module = "dexrs._internal.container", - extends = PyDexContainer, - subclass + frozen )] pub struct PyInMemoryDexContainer { - data: Py, + pub(crate) data: Py, length: usize, } impl AsRef<[u8]> for PyInMemoryDexContainer { #[inline] fn as_ref(&self) -> &[u8] { - Python::with_gil(|py| self.data.as_bytes(py)) + self.deref() } } @@ -49,7 +48,9 @@ impl Deref for PyInMemoryDexContainer { type Target = [u8]; fn deref(&self) -> &Self::Target { - self.as_ref() + Python::with_gil(|py| { + self.data.as_bytes(py) + }) } } @@ -67,24 +68,18 @@ impl PyInMemoryDexContainer { #[pyo3::pymethods] impl PyInMemoryDexContainer { #[new] - pub fn new<'py>( - py: Python<'py>, - data: Py, - ) -> PyResult<(PyInMemoryDexContainer, PyDexContainer)> { - Ok(( - PyInMemoryDexContainer::open(py, data), - PyDexContainer::new(), - )) + pub fn new<'py>(py: Python<'py>, data: Py) -> PyResult { + Ok(PyInMemoryDexContainer::open(py, data)) } // TODO: measure performance overhead if data is huge - pub fn data<'py>(&self, py: Python<'py>) -> PyResult> { - Ok(self.data.clone_ref(py)) + pub fn data<'py>(py_self: PyRef<'_, Self>, py: Python<'py>) -> PyResult> { + Ok(py_self.data.clone_ref(py)) } #[getter] - pub fn file_size(&self) -> PyResult { - Ok(self.length) + pub fn file_size(py_self: PyRef<'_, Self>) -> PyResult { + Ok(py_self.length) } pub fn __len__(py_self: PyRef<'_, Self>) -> usize { @@ -95,11 +90,10 @@ impl PyInMemoryDexContainer { #[pyo3::pyclass( name = "FileDexContainer", module = "dexrs._internal.container", - extends = PyDexContainer, - subclass + frozen )] pub struct PyFileDexContainer { - path: String, + pub(crate) path: String, _fp: std::fs::File, data: Arc, } @@ -107,7 +101,7 @@ pub struct PyFileDexContainer { impl AsRef<[u8]> for PyFileDexContainer { #[inline] fn as_ref(&self) -> &[u8] { - self.data.as_ref() + &self.data.as_ref() } } @@ -115,7 +109,7 @@ impl Deref for PyFileDexContainer { type Target = [u8]; fn deref(&self) -> &Self::Target { - self.as_ref() + &self.data.deref() } } @@ -136,11 +130,8 @@ impl PyFileDexContainer { #[pyo3::pymethods] impl PyFileDexContainer { #[new] - pub fn new(path: String) -> PyResult<(Self, PyDexContainer)> { - Ok(( - PyFileDexContainer::open(path)?, - PyDexContainer::new(), - )) + pub fn new(path: String) -> PyResult { + Ok(PyFileDexContainer::open(path)?) } pub fn data<'py>(&self, py: Python<'py>) -> PyResult> { diff --git a/src/py/file.rs b/src/py/file.rs index 8a50bea..4525eab 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -1,20 +1,18 @@ -use std::{ - borrow::Borrow, - sync::{Arc, Mutex}, -}; +use std::sync::Arc; -use pyo3::{types::PyBytes, Py, PyResult, Python}; +use pyo3::{exceptions::PyValueError, Py, PyResult, Python}; -use crate::file::{verifier::VerifyPreset, DexFile, DexLocation, Header}; +use crate::file::{verifier::VerifyPreset, DexFile, DexLocation, StringIndex}; use super::{ - arc_mutex_get, container::{PyFileDexContainer, PyInMemoryDexContainer}, - structs::PyHeader, + structs::{PyDexHeader, PyDexStringId}, }; -#[pyo3::pyclass(name = "VerifyPreset", module = "dexrs._internal.file", eq, eq_int)] + +#[allow(non_camel_case_types)] #[derive(Clone, Copy, PartialEq, Eq)] +#[pyo3::pyclass(name = "VerifyPreset", module = "dexrs._internal.file", eq, eq_int)] pub enum PyVerifyPreset { ALL = 1, CHECKSUM_ONLY = 2, @@ -32,6 +30,7 @@ impl Into for PyVerifyPreset { } // lifetime annotation can't be removed for now + pub type PyInMemoryDexFile<'a> = DexFile<'a, PyInMemoryDexContainer>; pub type PyDexFile<'a> = DexFile<'a, PyFileDexContainer>; @@ -39,11 +38,11 @@ pub type PyDexFile<'a> = DexFile<'a, PyFileDexContainer>; pub enum RsDexFile { InMemory { dex: PyInMemoryDexFile<'static>, - container: PyInMemoryDexContainer, + container: Py, }, File { dex: PyDexFile<'static>, - container: PyFileDexContainer, + container: Py, }, } @@ -54,10 +53,10 @@ pub struct PyDexFileImpl { } macro_rules! bind_dex { - ($dex_file:ident, $dex_type:ident, $c:ident) => {{ + ($dex_file:ident, $dex_type:ident, $c:ident, $py:ident) => {{ let static_dex = unsafe { std::mem::transmute($dex_file) }; let inner = RsDexFile::$dex_type { - container: $c, + container: $c.clone_ref($py), dex: static_dex, }; PyDexFileImpl { @@ -68,11 +67,41 @@ macro_rules! bind_dex { impl PyDexFileImpl {} +macro_rules! dex_container_check { + ($container:ident, $py:ident, $method:expr) => { + if $container.get_refcnt($py) == 0 { + return Err(PyValueError::new_err(concat!( + "Tried to execute DexFile::", + stringify!($method), + " on a dex container that was deleted by Python!" + ))); + } + }; +} + macro_rules! dex_action_impl { - ($this:ident, $method:ident) => {{ - match $this.inner.as_ref() { - RsDexFile::InMemory { dex, .. } => dex.$method(), - RsDexFile::File { dex, .. } => dex.$method(), + ($this:ident, $method:ident, $py:ident) => {{ + match &$this.inner.as_ref() { + RsDexFile::InMemory { dex, container } => { + dex_container_check!(container, $py, $method); + dex.$method() + } + RsDexFile::File { dex, container } => { + dex_container_check!(container, $py, $method); + dex.$method() + } + } + }}; + ($this:ident, $method:ident, $arg:expr, $py:ident) => {{ + match &$this.inner.as_ref() { + RsDexFile::InMemory { dex, container } => { + dex_container_check!(container, $py, $method); + dex.$method($arg)? + } + RsDexFile::File { dex, container } => { + dex_container_check!(container, $py, $method); + dex.$method($arg)? + } } }}; } @@ -86,29 +115,76 @@ impl PyDexFileImpl { ))] pub fn from_bytes<'py>( py: Python<'py>, - data: Py, + data: Py, preset: PyVerifyPreset, ) -> PyResult { let preset = preset.into(); - let container = PyInMemoryDexContainer::open(py, data); - let dex = PyInMemoryDexFile::open(&container, DexLocation::InMemory, preset)?; - Ok(bind_dex!(dex, InMemory, container)) + let dex = PyInMemoryDexFile::open(data.get(), DexLocation::InMemory, preset)?; + Ok(bind_dex!(dex, InMemory, data, py)) } #[staticmethod] #[pyo3(signature = ( - path, + data, preset=PyVerifyPreset::ALL ))] - pub fn from_file(path: String, preset: PyVerifyPreset) -> PyResult { + pub fn from_file<'py>( + py: Python<'py>, + data: Py, + preset: PyVerifyPreset, + ) -> PyResult { let preset = preset.into(); - let container = PyFileDexContainer::open(path.clone())?; - let dex = PyDexFile::open(&container, DexLocation::Path(path), preset)?; - Ok(bind_dex!(dex, File, container)) + let container = data.get(); + let dex = PyDexFile::open( + data.get(), + DexLocation::Path(container.path.clone()), + preset, + )?; + Ok(bind_dex!(dex, File, data, py)) } - pub fn get_header(&self) -> PyResult { - Ok(dex_action_impl!(self, get_header).into()) + pub fn get_header<'py>(&self, py: Python<'py>) -> PyResult { + Ok(dex_action_impl!(self, get_header, py).into()) + } + + // ---------------------------------------------------------------------------- + // String Ids + // ---------------------------------------------------------------------------- + pub fn get_string_id<'py>( + &self, + py: Python<'py>, + index: StringIndex, + ) -> PyResult { + Ok(dex_action_impl!(self, get_string_id, index, py).into()) + } + + pub fn get_string_id_opt<'py>( + &self, + py: Python<'py>, + index: StringIndex, + ) -> PyResult> { + Ok(dex_action_impl!(self, get_string_id_opt, index, py).map(Into::into)) + } + + pub fn num_string_ids<'py>(&self, py: Python<'py>) -> PyResult { + Ok(dex_action_impl!(self, num_string_ids, py)) + } + + // ---------------------------------------------------------------------------- + // string data + // ---------------------------------------------------------------------------- + + pub fn get_utf16_at<'py>(&self, py: Python<'py>, index: StringIndex) -> PyResult { + Ok(dex_action_impl!(self, get_utf16_str_at, index, py)) + } + + pub fn get_utf16<'py>( + &self, + py: Python<'py>, + py_string_id: Py, + ) -> PyResult { + let string_id = &py_string_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_utf16_str, &string_id, py)) } } diff --git a/src/py/mod.rs b/src/py/mod.rs index f8f8524..b96c126 100644 --- a/src/py/mod.rs +++ b/src/py/mod.rs @@ -3,14 +3,3 @@ pub(crate) mod file; pub(crate) mod error; pub(crate) mod structs; - -pub(crate) type ArcMutex = std::sync::Arc>; - - -macro_rules! arc_mutex_get { - ($value:expr) => { - $value.lock().unwrap() - }; -} - -pub(crate) use arc_mutex_get; \ No newline at end of file diff --git a/src/py/structs.rs b/src/py/structs.rs index 8a9affb..66207ac 100644 --- a/src/py/structs.rs +++ b/src/py/structs.rs @@ -1,6 +1,9 @@ -use std::sync::{Arc, Mutex}; +use std::sync::Arc; -use crate::file::Header; +use crate::file::{ + ClassDef, FieldId, Header, MethodId, ProtoId, ProtoIndex, StringId, StringIndex, TypeId, + TypeIndex, TypeItem, +}; macro_rules! py_struct_wrapper { ($name:literal, $py_type:ident, $rust_type:ident) => { @@ -33,9 +36,11 @@ macro_rules! py_struct_fields { }; } -py_struct_wrapper!("Header", PyHeader, Header); - -py_struct_fields!(PyHeader, { +// -------------------------------------------------------------------- +// Header +// -------------------------------------------------------------------- +py_struct_wrapper!("Header", PyDexHeader, Header); +py_struct_fields!(PyDexHeader, { (checksum, u32), (file_size, u32), (header_size, u32), @@ -74,9 +79,81 @@ pub fn get_magic(&self) -> Vec { } ); +// -------------------------------------------------------------------- +// StringId +// -------------------------------------------------------------------- +py_struct_wrapper!("StringId", PyDexStringId, StringId); +py_struct_fields!(PyDexStringId, { + (string_data_off, StringIndex), +},); + +// -------------------------------------------------------------------- +// TypeId +// -------------------------------------------------------------------- +py_struct_wrapper!("TypeId", PyDexTypeId, TypeId); +py_struct_fields!(PyDexTypeId, { + (descriptor_idx, StringIndex), +},); + +// -------------------------------------------------------------------- +// FieldId +// -------------------------------------------------------------------- +py_struct_wrapper!("FieldId", PyDexFieldId, FieldId); +py_struct_fields!(PyDexFieldId, { + (class_idx, TypeIndex), + (type_idx, TypeIndex), + (name_idx, StringIndex), +},); + +// -------------------------------------------------------------------- +// ProtoId +// -------------------------------------------------------------------- +py_struct_wrapper!("ProtoId", PyDexProtoId, ProtoId); +py_struct_fields!(PyDexProtoId, { + (shorty_idx, StringIndex), + (return_type_idx, TypeIndex), + (parameters_off, u32), +},); + +// -------------------------------------------------------------------- +// MethodId +// -------------------------------------------------------------------- +py_struct_wrapper!("MethodId", PyDexMethodId, MethodId); +py_struct_fields!(PyDexMethodId, { + (class_idx, TypeIndex), + (proto_idx, ProtoIndex), + (name_idx, StringIndex), +},); + +// -------------------------------------------------------------------- +// ClassDef +// -------------------------------------------------------------------- +py_struct_wrapper!("ClassDef", PyDexClassDef, ClassDef); +py_struct_fields!(PyDexClassDef, { + (class_idx, TypeIndex), + (access_flags, u32), + (superclass_idx, TypeIndex), + (interfaces_off, u32), + (source_file_idx, StringIndex), + (annotations_off, u32), + (class_data_off, u32), + (static_values_off, u32), +},); + +// -------------------------------------------------------------------- +// TypeItem +// -------------------------------------------------------------------- +py_struct_wrapper!("TypeItem", PyDexTypeItem, TypeItem); +py_struct_fields!(PyDexTypeItem, { + (type_idx, TypeIndex), +},); + #[pyo3::pymodule] pub(crate) mod structs { #[pymodule_export] - use super::PyHeader; + use super::{ + PyDexClassDef, PyDexFieldId, PyDexHeader, PyDexMethodId, PyDexProtoId, PyDexStringId, + PyDexTypeId, PyDexTypeItem, + }; } From aa2237139c6a43028973049040f07cc794271daa Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Thu, 13 Feb 2025 17:58:44 +0100 Subject: [PATCH 27/46] Added tests for Python API --- + updated initialization of python package to automatically include all types --- python/dexrs/__init__.py | 12 +++++++++++- python/tests/__init__.py | 0 python/tests/_util.py | 7 +++++++ python/tests/test_dex_parse.py | 18 ++++++++++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 python/tests/__init__.py create mode 100644 python/tests/_util.py create mode 100644 python/tests/test_dex_parse.py diff --git a/python/dexrs/__init__.py b/python/dexrs/__init__.py index 395a825..353f51a 100644 --- a/python/dexrs/__init__.py +++ b/python/dexrs/__init__.py @@ -1 +1,11 @@ -from dexrs._internal import file, error, container \ No newline at end of file +from dexrs._internal import ( + file as rust_file, + container as rust_container, + error as rust_error, +) + +DexFile = rust_file.DexFile +FileDexContainer = rust_container.FileDexContainer +InMemoryDexContainer = rust_container.InMemoryDexContainer +VerifyPreset = rust_file.VerifyPreset +PyDexError = rust_error.PyDexError diff --git a/python/tests/__init__.py b/python/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/_util.py b/python/tests/_util.py new file mode 100644 index 0000000..950fc5a --- /dev/null +++ b/python/tests/_util.py @@ -0,0 +1,7 @@ +import pathlib + +assets_base_dir = pathlib.Path(__file__).parent.parent.parent / "tests" + + +def get_asset(asset_name: str) -> pathlib.Path: + return assets_base_dir / asset_name \ No newline at end of file diff --git a/python/tests/test_dex_parse.py b/python/tests/test_dex_parse.py new file mode 100644 index 0000000..f23dcd0 --- /dev/null +++ b/python/tests/test_dex_parse.py @@ -0,0 +1,18 @@ +import dexrs +import pytest + +from . import _util + + +def test_parse_invalid_dex() -> None: + with pytest.raises(dexrs.PyDexError): + data = dexrs.InMemoryDexContainer(b"...") + dexrs.DexFile.from_bytes(data) + + +def test_parse_valid_dex() -> None: + path = _util.get_asset("prime/prime.dex") + data = dexrs.FileDexContainer(str(path)) + dex = dexrs.DexFile.from_file(data) + + assert dex.get_header().version_int == 35 From 630b964e00ff75cebb5dcb0ea2a962c7cf59ee68 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Thu, 13 Feb 2025 20:31:54 +0100 Subject: [PATCH 28/46] Updated debug info parser (currently unstable) --- Changes: + Added iterator implementation over parameter names in debug info + Added TypeId interface for Python API --- python/dexrs/__init__.py | 1 + python/tests/_util.py | 8 ++++++- python/tests/test_dex_items.py | 30 ++++++++++++++++++++++++ src/file/debug.rs | 43 ++++++++++++++++++++++++++++++++++ src/file/mod.rs | 31 ++++++++++++++++++++++++ src/py/file.rs | 24 ++++++++++++++++--- 6 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 python/tests/test_dex_items.py diff --git a/python/dexrs/__init__.py b/python/dexrs/__init__.py index 353f51a..c69bff5 100644 --- a/python/dexrs/__init__.py +++ b/python/dexrs/__init__.py @@ -4,6 +4,7 @@ error as rust_error, ) +# REVISIT: create individual submodules DexFile = rust_file.DexFile FileDexContainer = rust_container.FileDexContainer InMemoryDexContainer = rust_container.InMemoryDexContainer diff --git a/python/tests/_util.py b/python/tests/_util.py index 950fc5a..169f12e 100644 --- a/python/tests/_util.py +++ b/python/tests/_util.py @@ -1,7 +1,13 @@ import pathlib +import dexrs assets_base_dir = pathlib.Path(__file__).parent.parent.parent / "tests" def get_asset(asset_name: str) -> pathlib.Path: - return assets_base_dir / asset_name \ No newline at end of file + return assets_base_dir / asset_name + + +PRIME_DEX = dexrs.DexFile.from_file( + dexrs.FileDexContainer(str(get_asset("prime/prime.dex"))) +) \ No newline at end of file diff --git a/python/tests/test_dex_items.py b/python/tests/test_dex_items.py new file mode 100644 index 0000000..4af002c --- /dev/null +++ b/python/tests/test_dex_items.py @@ -0,0 +1,30 @@ +import dexrs +import pytest + +from . import _util + + +def test_get_type_id() -> None: + dex = _util.PRIME_DEX + type_id = dex.get_type_id(0) + # pre-computed values + assert type_id.descriptor_idx == 3 + + +def test_get_type_desc() -> None: + dex = _util.PRIME_DEX + type_id = dex.get_type_id(0) + # query type descriptor for type id + descriptor = dex.get_utf16_at(type_id.descriptor_idx) + assert descriptor == "I" + +def test_invalid_type_id_idx() -> None: + dex = _util.PRIME_DEX + index = 0xFFFF # max of u16 + + # no error if optional + assert dex.get_type_id_opt(index) is None + + # error otherwise + with pytest.raises(dexrs.PyDexError): + dex.get_type_id(index) diff --git a/src/file/debug.rs b/src/file/debug.rs index 86b7149..cdb7468 100644 --- a/src/file/debug.rs +++ b/src/file/debug.rs @@ -55,6 +55,10 @@ impl<'a> CodeItemDebugInfoAccessor<'a> { Self { ptr } } + pub fn parameter_names(&self) -> Result> { + DebugInfoParameterNamesIterator::new(self.ptr, 0) + } + pub fn visit_parameter_names(&self, visitor: F) -> Result<()> where F: Fn(u32), @@ -136,3 +140,42 @@ impl<'a> CodeItemDebugInfoAccessor<'a> { // TODO // pub fn decode_local_info(&self, visitor: F) } + +pub struct DebugInfoParameterNamesIterator<'dex> { + ptr: &'dex [u8], + offset: usize, + idx: usize, + size: usize, +} + +impl<'dex> DebugInfoParameterNamesIterator<'dex> { + pub fn new(ptr: &'dex [u8], offset: usize) -> Result { + let mut pos = offset; + // skipping line number + let line = decode_leb128_off::(&ptr, &mut pos)?; + let size = decode_leb128_off::(&ptr, &mut pos)? as usize; + Ok(Self { + ptr, + offset, + size, + idx: 0, + }) + } +} + +impl<'a> Iterator for DebugInfoParameterNamesIterator<'a> { + type Item = u32; + + fn next(&mut self) -> Option { + if self.idx >= self.size { + return None; + } + self.idx += 1; + match decode_leb128p1_off(&self.ptr, &mut self.offset) { + Ok(v) => { + Some(v as u32) + } + Err(_) => None, + } + } +} diff --git a/src/file/mod.rs b/src/file/mod.rs index a62c656..428b81e 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -317,6 +317,14 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.get_utf16_str(string_id) } + #[inline(always)] + pub fn get_utf16_str_opt_at(&self, idx: StringIndex) -> Result> { + match idx { + StringIndex::MAX => Ok(None), + _ => Ok(Some(self.get_utf16_str_at(idx)?)), + } + } + // ------------------------------------------------------------------------------ // types // ------------------------------------------------------------------------------ @@ -376,6 +384,29 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.non_null_array_data_ptr(code_off, size_in_code_units as usize) } + // ------------------------------------------------------------------------------ + // Debug Info + // ------------------------------------------------------------------------------ + #[inline(always)] + pub fn get_debug_info_accessor(&'a self, offset: u32) -> Result> { + check_lt_result!(offset, self.file_size(), "debug info offset"); + Ok(CodeItemDebugInfoAccessor::new( + &self.mmap[offset as usize..], + )) + } + + #[inline(always)] + pub fn get_debug_info_accessor_opt( + &'a self, + offset: u32, + ) -> Result>> { + match offset { + // WHY?: It seems that some applications incorrectly set the debug info offset to 0 + 0 | u32::MAX => Ok(None), + _ => Ok(Some(self.get_debug_info_accessor(offset)?)), + } + } + // ------------------------------------------------------------------------------ // field ids // ------------------------------------------------------------------------------ diff --git a/src/py/file.rs b/src/py/file.rs index 4525eab..9ebca39 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -2,14 +2,13 @@ use std::sync::Arc; use pyo3::{exceptions::PyValueError, Py, PyResult, Python}; -use crate::file::{verifier::VerifyPreset, DexFile, DexLocation, StringIndex}; +use crate::file::{verifier::VerifyPreset, DexFile, DexLocation, StringIndex, TypeIndex}; use super::{ container::{PyFileDexContainer, PyInMemoryDexContainer}, - structs::{PyDexHeader, PyDexStringId}, + structs::{PyDexHeader, PyDexStringId, PyDexTypeId}, }; - #[allow(non_camel_case_types)] #[derive(Clone, Copy, PartialEq, Eq)] #[pyo3::pyclass(name = "VerifyPreset", module = "dexrs._internal.file", eq, eq_int)] @@ -170,6 +169,25 @@ impl PyDexFileImpl { Ok(dex_action_impl!(self, num_string_ids, py)) } + // ---------------------------------------------------------------------------- + // Type Ids + // ---------------------------------------------------------------------------- + pub fn get_type_id<'py>(&self, py: Python<'py>, index: TypeIndex) -> PyResult { + Ok(dex_action_impl!(self, get_type_id, index, py).into()) + } + + pub fn get_type_id_opt<'py>( + &self, + py: Python<'py>, + index: TypeIndex, + ) -> PyResult> { + Ok(dex_action_impl!(self, get_type_id_opt, index, py).map(Into::into)) + } + + pub fn num_type_ids<'py>(&self, py: Python<'py>) -> PyResult { + Ok(dex_action_impl!(self, num_type_ids, py)) + } + // ---------------------------------------------------------------------------- // string data // ---------------------------------------------------------------------------- From 067380eb915b19dba9ecac4f2e42615fe1f0339d Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Thu, 13 Feb 2025 21:20:20 +0100 Subject: [PATCH 29/46] Reordered Python extension to include type stubs --- Changes: + Added python stubs for each module currently available + Renamed all rust python bindings to begin with py_ Additions: + Added Python API to encode and decode MUTF8 data (currently limited to null-terminated strings) --- python/dexrs/__init__.py | 16 +++--------- python/dexrs/_internal/__init__.pyi | 0 python/dexrs/_internal/container.pyi | 24 ++++++++++++++++++ python/dexrs/_internal/error.pyi | 2 ++ python/dexrs/_internal/file.pyi | 18 ++++++++++++++ python/dexrs/_internal/mutf8.pyi | 4 +++ python/dexrs/_internal/structs.pyi | 28 +++++++++++++++++++++ python/dexrs/container.py | 5 ++++ python/dexrs/error.py | 5 ++++ python/dexrs/file.py | 7 ++++++ python/dexrs/mutf8.py | 7 ++++++ python/tests/_util.py | 9 ++++--- python/tests/test_dex_items.py | 5 ++-- python/tests/test_dex_parse.py | 9 ++++--- python/tests/test_mutf8.py | 29 ++++++++++++++++++++++ src/leb128.rs | 2 +- src/lib.rs | 11 ++++++--- src/py/container.rs | 2 +- src/py/error.rs | 13 ++++++---- src/py/file.rs | 2 +- src/py/mod.rs | 2 +- src/py/mutf8.rs | 37 ++++++++++++++++++++++++++++ src/py/structs.rs | 4 +-- src/utf.rs | 3 ++- 24 files changed, 207 insertions(+), 37 deletions(-) create mode 100644 python/dexrs/_internal/__init__.pyi create mode 100644 python/dexrs/_internal/container.pyi create mode 100644 python/dexrs/_internal/error.pyi create mode 100644 python/dexrs/_internal/file.pyi create mode 100644 python/dexrs/_internal/mutf8.pyi create mode 100644 python/dexrs/_internal/structs.pyi create mode 100644 python/dexrs/container.py create mode 100644 python/dexrs/error.py create mode 100644 python/dexrs/file.py create mode 100644 python/dexrs/mutf8.py create mode 100644 python/tests/test_mutf8.py create mode 100644 src/py/mutf8.rs diff --git a/python/dexrs/__init__.py b/python/dexrs/__init__.py index c69bff5..9d53bd8 100644 --- a/python/dexrs/__init__.py +++ b/python/dexrs/__init__.py @@ -1,12 +1,4 @@ -from dexrs._internal import ( - file as rust_file, - container as rust_container, - error as rust_error, -) - -# REVISIT: create individual submodules -DexFile = rust_file.DexFile -FileDexContainer = rust_container.FileDexContainer -InMemoryDexContainer = rust_container.InMemoryDexContainer -VerifyPreset = rust_file.VerifyPreset -PyDexError = rust_error.PyDexError +# some shortcuts +from .file import DexFile, VerifyPreset +from .container import InMemoryDexContainer, FileDexContainer +from .error import PyDexError \ No newline at end of file diff --git a/python/dexrs/_internal/__init__.pyi b/python/dexrs/_internal/__init__.pyi new file mode 100644 index 0000000..e69de29 diff --git a/python/dexrs/_internal/container.pyi b/python/dexrs/_internal/container.pyi new file mode 100644 index 0000000..0370c92 --- /dev/null +++ b/python/dexrs/_internal/container.pyi @@ -0,0 +1,24 @@ +import abc + +# deprecated +class DexContainer(abc.ABC): + def data(self) -> bytes: ... + @property + @abc.abstractmethod + def file_size(self) -> int: ... + +class InMemoryDexContainer: + def __init__(self, data: bytes) -> None: ... + def data(self) -> bytes: ... + @property + def file_size(self) -> int: ... + def __len__(self) -> int: ... + +class FileDexContainer: + def __init__(self, path: str) -> None: ... + def data(self) -> bytes: ... + @property + def file_size(self) -> int: ... + @property + def location(self) -> str: ... + def __len__(self) -> int: ... diff --git a/python/dexrs/_internal/error.pyi b/python/dexrs/_internal/error.pyi new file mode 100644 index 0000000..757ab86 --- /dev/null +++ b/python/dexrs/_internal/error.pyi @@ -0,0 +1,2 @@ +class PyDexError(Exception): + def __init__(self, message: str) -> None: ... diff --git a/python/dexrs/_internal/file.pyi b/python/dexrs/_internal/file.pyi new file mode 100644 index 0000000..d9be611 --- /dev/null +++ b/python/dexrs/_internal/file.pyi @@ -0,0 +1,18 @@ +from .container import InMemoryDexContainer, FileDexContainer +from .structs import Header + +class VerifyPreset: + ALL: VerifyPreset + NONE: VerifyPreset + CHECKSUM_ONLY: VerifyPreset + +class DexFile: + @staticmethod + def from_file(data: FileDexContainer, preset: VerifyPreset = ...) -> DexFile: ... + @staticmethod + def from_bytes( + data: InMemoryDexContainer, preset: VerifyPreset = ... + ) -> DexFile: ... + + # instance methods + def get_header(self) -> Header: ... diff --git a/python/dexrs/_internal/mutf8.pyi b/python/dexrs/_internal/mutf8.pyi new file mode 100644 index 0000000..34d3122 --- /dev/null +++ b/python/dexrs/_internal/mutf8.pyi @@ -0,0 +1,4 @@ +def mutf8_to_str(utf8_data_in: bytes) -> str: ... +def str_to_mutf8(str_data_in: str) -> bytes: ... +def mutf8_to_str_lossy(utf8_data_in: bytes) -> str: ... +def str_to_mutf8_lossy(str_data_in: str) -> bytes: ... diff --git a/python/dexrs/_internal/structs.pyi b/python/dexrs/_internal/structs.pyi new file mode 100644 index 0000000..9057058 --- /dev/null +++ b/python/dexrs/_internal/structs.pyi @@ -0,0 +1,28 @@ +class Header: + checksum: int + file_size: int + header_size: int + endian_tag: int + link_size: int + link_off: int + string_ids_size: int + string_ids_off: int + type_ids_size: int + type_ids_off: int + proto_ids_size: int + proto_ids_off: int + field_ids_size: int + field_ids_off: int + method_ids_size: int + method_ids_off: int + class_defs_size: int + class_defs_off: int + data_size: int + data_off: int + + @property + def version_int(self) -> int: ... + @property + def signature(self) -> bytes: ... + @property + def magic(self) -> bytes: ... diff --git a/python/dexrs/container.py b/python/dexrs/container.py new file mode 100644 index 0000000..17f8348 --- /dev/null +++ b/python/dexrs/container.py @@ -0,0 +1,5 @@ +from dexrs._internal import container as rust_container + + +InMemoryDexContainer = rust_container.InMemoryDexContainer +FileDexContainer = rust_container.FileDexContainer diff --git a/python/dexrs/error.py b/python/dexrs/error.py new file mode 100644 index 0000000..228e266 --- /dev/null +++ b/python/dexrs/error.py @@ -0,0 +1,5 @@ +from dexrs._internal import error as rust_error + +PyDexError = rust_error.PyDexError + +__all__ = ["PyDexError"] diff --git a/python/dexrs/file.py b/python/dexrs/file.py new file mode 100644 index 0000000..2fa9d68 --- /dev/null +++ b/python/dexrs/file.py @@ -0,0 +1,7 @@ +from dexrs._internal import file as rust_file + + +DexFile = rust_file.DexFile +VerifyPreset = rust_file.VerifyPreset + +__all__ = ["DexFile", "VerifyPreset"] diff --git a/python/dexrs/mutf8.py b/python/dexrs/mutf8.py new file mode 100644 index 0000000..290f244 --- /dev/null +++ b/python/dexrs/mutf8.py @@ -0,0 +1,7 @@ +from dexrs._internal import mutf8 as rust_mutf8 + + +mutf8_to_str = rust_mutf8.mutf8_to_str +mutf8_to_str_lossy = rust_mutf8.mutf8_to_str_lossy +str_to_mutf8 = rust_mutf8.str_to_mutf8 +str_to_mutf8_lossy = rust_mutf8.str_to_mutf8_lossy \ No newline at end of file diff --git a/python/tests/_util.py b/python/tests/_util.py index 169f12e..130d336 100644 --- a/python/tests/_util.py +++ b/python/tests/_util.py @@ -1,5 +1,6 @@ import pathlib -import dexrs + +from dexrs import DexFile, container assets_base_dir = pathlib.Path(__file__).parent.parent.parent / "tests" @@ -8,6 +9,6 @@ def get_asset(asset_name: str) -> pathlib.Path: return assets_base_dir / asset_name -PRIME_DEX = dexrs.DexFile.from_file( - dexrs.FileDexContainer(str(get_asset("prime/prime.dex"))) -) \ No newline at end of file +PRIME_DEX = DexFile.from_file( + container.FileDexContainer(str(get_asset("prime/prime.dex"))) +) diff --git a/python/tests/test_dex_items.py b/python/tests/test_dex_items.py index 4af002c..3e7325c 100644 --- a/python/tests/test_dex_items.py +++ b/python/tests/test_dex_items.py @@ -1,6 +1,7 @@ -import dexrs import pytest +from dexrs.error import PyDexError + from . import _util @@ -26,5 +27,5 @@ def test_invalid_type_id_idx() -> None: assert dex.get_type_id_opt(index) is None # error otherwise - with pytest.raises(dexrs.PyDexError): + with pytest.raises(PyDexError): dex.get_type_id(index) diff --git a/python/tests/test_dex_parse.py b/python/tests/test_dex_parse.py index f23dcd0..5d03769 100644 --- a/python/tests/test_dex_parse.py +++ b/python/tests/test_dex_parse.py @@ -1,18 +1,21 @@ import dexrs import pytest +from dexrs.error import PyDexError + from . import _util + def test_parse_invalid_dex() -> None: - with pytest.raises(dexrs.PyDexError): - data = dexrs.InMemoryDexContainer(b"...") + with pytest.raises(PyDexError): + data = dexrs.container.InMemoryDexContainer(b"...") dexrs.DexFile.from_bytes(data) def test_parse_valid_dex() -> None: path = _util.get_asset("prime/prime.dex") - data = dexrs.FileDexContainer(str(path)) + data = dexrs.container.FileDexContainer(str(path)) dex = dexrs.DexFile.from_file(data) assert dex.get_header().version_int == 35 diff --git a/python/tests/test_mutf8.py b/python/tests/test_mutf8.py new file mode 100644 index 0000000..ce44487 --- /dev/null +++ b/python/tests/test_mutf8.py @@ -0,0 +1,29 @@ +import pytest + +from dexrs.mutf8 import ( + mutf8_to_str, + mutf8_to_str_lossy, + str_to_mutf8, + str_to_mutf8_lossy, +) +from dexrs.error import PyDexError + + +def test_parse_valid_mutf8() -> None: + data = b"foobar\0" # trailing null byte is mandatory + + assert mutf8_to_str(data) == "foobar" + assert str_to_mutf8("foobar") == b"foobar\0" + + +# REVISIT: add surrogate examples +def test_parse_valid_mutf8_lossy() -> None: + data = b"foobar\0" # trailing null byte is mandatory + + assert mutf8_to_str_lossy(data) == "foobar" + assert str_to_mutf8_lossy("foobar") == b"foobar\0" + + +def test_parse_invalid_mutf8() -> None: + with pytest.raises(PyDexError): + mutf8_to_str(b"0x00") # missing null byte diff --git a/src/leb128.rs b/src/leb128.rs index 24008aa..b7d3069 100644 --- a/src/leb128.rs +++ b/src/leb128.rs @@ -11,7 +11,7 @@ pub fn decode_leb128(data_in: &[u8]) -> Result<(T, #[inline(always)] pub fn decode_leb128p1(data_in: &[u8]) -> Result<(i32, usize)> { let (result, size) = decode_leb128::(data_in)?; - Ok(((result - 1) as i32, size)) + Ok((result as i32 - 1, size)) } #[inline(always)] diff --git a/src/lib.rs b/src/lib.rs index bc33043..06108d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,14 +17,17 @@ pub(crate) mod py; mod _internal { #[pymodule_export] - use crate::py::container::container_mod; + use crate::py::container::py_container; #[pymodule_export] - use crate::py::file::file_mod; + use crate::py::file::py_file; #[pymodule_export] - use crate::py::error::error; + use crate::py::error::py_error; #[pymodule_export] - use crate::py::structs::structs; + use crate::py::structs::py_structs; + + #[pymodule_export] + use crate::py::mutf8::py_mutf8; } \ No newline at end of file diff --git a/src/py/container.rs b/src/py/container.rs index d276aa9..22685fc 100644 --- a/src/py/container.rs +++ b/src/py/container.rs @@ -154,7 +154,7 @@ impl PyFileDexContainer { } #[pyo3::pymodule(name = "container")] -pub(crate) mod container_mod { +pub(crate) mod py_container { #[pymodule_export] use super::{PyDexContainer, PyFileDexContainer, PyInMemoryDexContainer}; diff --git a/src/py/error.rs b/src/py/error.rs index 3474f36..7a66080 100644 --- a/src/py/error.rs +++ b/src/py/error.rs @@ -1,4 +1,8 @@ -use pyo3::{create_exception, exceptions::{PyIOError, PyRuntimeError}, PyErr}; +use pyo3::{ + create_exception, + exceptions::{PyIOError, PyRuntimeError}, + PyErr, +}; use crate::error::DexError; @@ -22,10 +26,9 @@ impl From for PyErr { } } -#[pyo3::pymodule] -pub(crate) mod error { +#[pyo3::pymodule(name = "error")] +pub(crate) mod py_error { #[pymodule_export] use super::PyDexError; - -} \ No newline at end of file +} diff --git a/src/py/file.rs b/src/py/file.rs index 9ebca39..48380db 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -208,7 +208,7 @@ impl PyDexFileImpl { // final module #[pyo3::pymodule(name = "file")] -pub(crate) mod file_mod { +pub(crate) mod py_file { #[pymodule_export] use super::{PyDexFileImpl, PyVerifyPreset}; diff --git a/src/py/mod.rs b/src/py/mod.rs index b96c126..185f813 100644 --- a/src/py/mod.rs +++ b/src/py/mod.rs @@ -2,4 +2,4 @@ pub(crate) mod container; pub(crate) mod file; pub(crate) mod error; pub(crate) mod structs; - +pub(crate) mod mutf8; \ No newline at end of file diff --git a/src/py/mutf8.rs b/src/py/mutf8.rs new file mode 100644 index 0000000..fd2a05d --- /dev/null +++ b/src/py/mutf8.rs @@ -0,0 +1,37 @@ +use pyo3::PyResult; + +use crate::{error::DexError, utf}; + +#[pyo3::pyfunction] +pub fn mutf8_to_str(utf8_data_in: &[u8]) -> PyResult { + if let Some(end) = utf8_data_in.iter().position(|&x| x == 0) { + Ok(utf::mutf8_to_str(&utf8_data_in[0..=end])?) + } else { + Err(DexError::BadStringDataMissingNullByte(utf8_data_in.as_ptr() as usize).into()) + } +} + +#[pyo3::pyfunction] +pub fn mutf8_to_str_lossy(utf8_data_in: &[u8]) -> PyResult { + if let Some(end) = utf8_data_in.iter().position(|&x| x == 0) { + Ok(utf::mutf8_to_str_lossy(&utf8_data_in[0..=end])?) + } else { + Err(DexError::BadStringDataMissingNullByte(utf8_data_in.as_ptr() as usize).into()) + } +} + +#[pyo3::pyfunction] +pub fn str_to_mutf8(str_data_in: &str) -> Vec { + utf::str_to_mutf8(str_data_in) +} + +#[pyo3::pyfunction] +pub fn str_to_mutf8_lossy(str_data_in: &str) -> Vec { + utf::str_to_mutf8_lossy(str_data_in) +} + +#[pyo3::pymodule(name = "mutf8")] +pub(crate) mod py_mutf8 { + #[pymodule_export] + use super::{mutf8_to_str, mutf8_to_str_lossy, str_to_mutf8, str_to_mutf8_lossy}; +} diff --git a/src/py/structs.rs b/src/py/structs.rs index 66207ac..366ea43 100644 --- a/src/py/structs.rs +++ b/src/py/structs.rs @@ -148,8 +148,8 @@ py_struct_fields!(PyDexTypeItem, { (type_idx, TypeIndex), },); -#[pyo3::pymodule] -pub(crate) mod structs { +#[pyo3::pymodule(name = "structs")] +pub(crate) mod py_structs { #[pymodule_export] use super::{ diff --git a/src/utf.rs b/src/utf.rs index 39bd8c3..e1c022a 100644 --- a/src/utf.rs +++ b/src/utf.rs @@ -1,5 +1,6 @@ // TODO: these functions are highly unsafe and does not stand any chance against fuzzing +// -> resolved for now with Result<> as return type and additional checks use crate::{dex_err, Result, error::DexError}; @@ -167,7 +168,7 @@ fn convert_mutf8_to_utf16( } fn utf16_to_mutf8(utf16_in: &[u16], options: &Options) -> Vec { - let mut mutf8_len = 0; + let mut mutf8_len = 0; // trailing null byte convert_utf16_to_mutf8(utf16_in, options, |_| mutf8_len += 1); let mut mutf8_out; From 33df152221df118fe9360b7d44b5b15f4acd7c22 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 15 Feb 2025 08:45:09 +0100 Subject: [PATCH 30/46] Added python interface for common structs --- + Removed Field::is_static method from class_accessor + Added DexFile::pretty_type to always return a non-empty string without the need of a result + Instruction::relative_at is now bound to a one-byte extra boundary + ClassAccessor Python API + README updated to reflect API changes --- README.md | 45 ++-- fuzz/fuzz_targets/instructions.rs | 3 + python/dexrs/_internal/class_accessor.pyi | 29 +++ python/dexrs/_internal/file.pyi | 67 +++++- python/dexrs/_internal/structs.pyi | 35 +++ src/file/class_accessor.rs | 13 +- src/file/debug.rs | 2 +- src/file/dump.rs | 7 + src/file/instruction.rs | 2 +- src/file/mod.rs | 91 +++----- src/lib.rs | 3 + src/py/class_accessor.rs | 159 +++++++++++++ src/py/file.rs | 263 +++++++++++++++++++++- src/py/mod.rs | 3 +- src/py/structs.rs | 6 +- 15 files changed, 624 insertions(+), 104 deletions(-) create mode 100644 python/dexrs/_internal/class_accessor.pyi create mode 100644 src/py/class_accessor.rs diff --git a/README.md b/README.md index 8a2e88d..a0da2bf 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,17 @@ # DEXrs -**DEXrs** is an exploratory project in Rust aimed at developing a decompiler for Android executable files (DEX files). +**DEXrs** is an exploratory project in Rust aimed at developing a decompiler for Android executable files (DEX files). It currently covers a low level DEX file parser and disassembler with a Python API. #### What this project already covers: -- [x] A (*blazingly fast*) DEX file parser using lazy parsing +- [x] A (*blazingly fast* 🔥) DEX file parser that utilizes + - [x] *zero-copy* wherever applicable + - [x] *lazy-parsing* all the time + - [x] respect fuzzing tests to make sure there's no panic +- [x] Python extension using pyo3 for Pythonists - [x] A simple disassembler for Dalvik byte code -- [x] A simplistic Smali decompiler +- [ ] Benchmarks are WIP, but present +- [ ] A simplistic Smali disassembler #### Roadmap @@ -22,36 +27,32 @@ Install DEXrs using Cargo: cargo install --git https://github.com/MatrixEditor/dexrs dexrs ``` +Or directly using pip: +```bash +pip install -ve dexrs@git+https://github.com/MatrixEditor/dexrs.git +``` + ## Usage ### Disassembling DEX files -Here’s a quick example of how to disassemble a DEX file: +Here’s a quick example of how to parse a DEX file: ```rust let mut f = File::open("classes.dex").expect("file not found"); // parse DEX input and verify its contents -let mut dex = Dex::read(&mut f, true)?; - -let class = dex.get_class_def(0)?; -if let Some(method) = class.get_direct_method(0) { - for insn in method.disasm(&mut dex)? { - println!(" {:#06x}: {:?}", insn.range.start, insn); - } -} -``` +let container = DexFileContainer::new(&file) + .verify(true) + .verify_checksum(true); -## Decompilation to Smali +// please use the examples/ directory for more usage information +let dex = container.open()?; +``` +In-memory parsing is also allowed: ```rust -use dexrs::smali::SmaliWrite; - -let mut f = File::open("classes.dex").expect("file not found"); -let mut dex = Dex::read(&mut f, true)?; - -let class = dex.get_class_def(0)?; -let mut stdout = std::io::stdout(); -stdout.write_class(&class, &mut dex)?; +let data: [u8] = ...; +let dex = DexFile::open(&data, DexLocation::InMemory, VerifyPreset::All)?; ``` ## License diff --git a/fuzz/fuzz_targets/instructions.rs b/fuzz/fuzz_targets/instructions.rs index 4b6adf0..340e0e2 100644 --- a/fuzz/fuzz_targets/instructions.rs +++ b/fuzz/fuzz_targets/instructions.rs @@ -1,3 +1,6 @@ +// #81835918 REDUCE cov: 439 ft: 2487 corp: 407/57Kb lim: 4096 exec/s: 26796 rss: 631Mb L: 284/3689 MS: 4 ChangeByte-PersAutoDict-ChangeBit-EraseBytes- DE: "\322)\000\000"- +// #81899060 REDUCE cov: 439 ft: 2487 corp: 407/57Kb lim: 4096 exec/s: 26799 rss: 631Mb L: 282/3689 MS: 1 EraseBytes- +// #81909187 REDUCE cov: 439 ft: 2487 corp: 407/57Kb lim: 4096 exec/s: 26802 rss: 631Mb L: 784/3689 MS: 1 EraseBytes- #![no_main] #![allow(non_snake_case)] diff --git a/python/dexrs/_internal/class_accessor.pyi b/python/dexrs/_internal/class_accessor.pyi new file mode 100644 index 0000000..e1caaa4 --- /dev/null +++ b/python/dexrs/_internal/class_accessor.pyi @@ -0,0 +1,29 @@ +from typing import List + +class Method: + index: int + access_flags: int + code_offset: int + + def is_static_or_direct(self) -> bool: ... + +class Field: + index: int + access_flags: int + + def is_static(self) -> bool: ... + +class ClassAccessor: + num_fields: int + num_methods: int + num_static_fields: int + num_instance_fields: int + num_direct_methods: int + num_virtual_methods: int + + def get_fields(self) -> List[Field]: ... + def get_methods(self) -> List[Method]: ... + def get_static_fields(self) -> List[Field]: ... + def get_instance_fields(self) -> List[Field]: ... + def get_direct_methods(self) -> List[Method]: ... + def get_virtual_methods(self) -> List[Method]: ... diff --git a/python/dexrs/_internal/file.pyi b/python/dexrs/_internal/file.pyi index d9be611..5f9d487 100644 --- a/python/dexrs/_internal/file.pyi +++ b/python/dexrs/_internal/file.pyi @@ -1,5 +1,17 @@ +from typing import Optional, Tuple, List + from .container import InMemoryDexContainer, FileDexContainer -from .structs import Header +from .structs import ( + Header, + StringId, + TypeId, + FieldId, + ProtoId, + MethodId, + ClassDef, + TypeItem, +) +from .class_accessor import ClassAccessor class VerifyPreset: ALL: VerifyPreset @@ -16,3 +28,56 @@ class DexFile: # instance methods def get_header(self) -> Header: ... + + # string ids + def get_string_id(self, index: int) -> StringId: ... + def get_string_id_opt(self, index: int) -> Optional[StringId]: ... + def num_string_ids(self) -> int: ... + + # string operations + def get_utf16_at(self, index: int) -> str: ... + def get_utf16(self, string_id: StringId) -> str: ... + def get_utf16_opt_at(self, string_id: StringId) -> Optional[str]: ... + def get_utf16_lossy(self, string_id: StringId) -> str: ... + def get_utf16_lossy_at(self, index: int) -> str: ... + def get_string_data(self, string_id: StringId) -> Tuple[int, bytes]: ... + def fast_get_utf8(self, string_id: StringId) -> str: ... + def fast_get_utf8_at(self, index: int) -> str: ... + + # type ids + def get_type_id(self, index: int) -> TypeId: ... + def get_type_id_opt(self, index: int) -> Optional[TypeId]: ... + def num_type_ids(self) -> int: ... + def get_type_desc(self, type_id: TypeId) -> str: ... + def get_type_desc_at(self, index: int) -> str: ... + def pretty_type_at(self, index: int) -> str: ... + def pretty_type(self, type_id: TypeId) -> str: ... + + # field ids + def get_field_id(self, index: int) -> FieldId: ... + def get_field_id_opt(self, index: int) -> Optional[FieldId]: ... + def num_field_ids(self) -> int: ... + def get_field_name(self, field_id: FieldId) -> str: ... + def get_field_name_at(self, index: int) -> str: ... + + # proto ids + def get_proto_id(self, index: int) -> ProtoId: ... + def get_proto_id_opt(self, index: int) -> Optional[ProtoId]: ... + def num_proto_ids(self) -> int: ... + def get_proto_shorty(self, proto_id: ProtoId) -> str: ... + def get_proto_shorty_at(self, index: int) -> str: ... + + # method ids + def get_method_id(self, index: int) -> MethodId: ... + def get_method_id_opt(self, index: int) -> Optional[MethodId]: ... + def num_method_ids(self) -> int: ... + + # class defs + def get_class_def(self, index: int) -> ClassDef: ... + def get_class_def_opt(self, index: int) -> Optional[ClassDef]: ... + def num_class_defs(self) -> int: ... + def get_class_desc(self, class_def: ClassDef) -> str: ... + def get_interfaces_list(self, class_def: ClassDef) -> Optional[List[TypeItem]]: ... + + # class data + def get_class_accessor(self, class_def: ClassDef) -> Optional[ClassAccessor]: ... \ No newline at end of file diff --git a/python/dexrs/_internal/structs.pyi b/python/dexrs/_internal/structs.pyi index 9057058..a4c7667 100644 --- a/python/dexrs/_internal/structs.pyi +++ b/python/dexrs/_internal/structs.pyi @@ -26,3 +26,38 @@ class Header: def signature(self) -> bytes: ... @property def magic(self) -> bytes: ... + +class StringId: + string_data_off: int + +class TypeId: + descriptor_idx: int + +class FieldId: + class_idx: int + type_idx: int + name_idx: int + +class ProtoId: + shorty_idx: int + return_type_idx: int + parameters_off: int + +class MethodId: + class_idx: int + proto_idx: int + name_idx: int + +class ClassDef: + class_idx: int + access_flags: int + superclass_idx: int + interfaces_off: int + source_file_idx: int + annotations_off: int + class_data_off: int + static_values_off: int + + +class TypeItem: + type_idx: int \ No newline at end of file diff --git a/src/file/class_accessor.rs b/src/file/class_accessor.rs index 5a6a75e..6c9b435 100644 --- a/src/file/class_accessor.rs +++ b/src/file/class_accessor.rs @@ -83,13 +83,6 @@ pub struct Field { pub is_static: bool, } -impl<'a> Field { - #[inline(always)] - pub fn is_static(&self) -> bool { - self.is_static - } -} - impl<'a> ClassItemBase for Field { fn read(&mut self, data: &'_ [u8], pos: &mut usize) -> Result<()> { let target = self.index as usize; @@ -121,9 +114,9 @@ impl Default for Field { } } -pub struct ClassAccessor<'a> { +pub struct ClassAccessor<'dex> { ptr_pos: usize, - class_data: &'a [u8], + class_data: &'dex [u8], pub num_static_fields: u32, pub num_instance_fields: u32, @@ -135,7 +128,7 @@ pub struct ClassAccessor<'a> { } impl<'a, C: DexContainer<'a>> DexFile<'a, C> { - pub fn get_class_accessor(&self, class_def: &ClassDef) -> Result>> { + pub fn get_class_accessor(&self, class_def: &ClassDef) -> Result>> { match class_def.class_data_off { 0 => Ok(None), off => { diff --git a/src/file/debug.rs b/src/file/debug.rs index cdb7468..adc1070 100644 --- a/src/file/debug.rs +++ b/src/file/debug.rs @@ -152,7 +152,7 @@ impl<'dex> DebugInfoParameterNamesIterator<'dex> { pub fn new(ptr: &'dex [u8], offset: usize) -> Result { let mut pos = offset; // skipping line number - let line = decode_leb128_off::(&ptr, &mut pos)?; + decode_leb128_off::(&ptr, &mut pos)?; let size = decode_leb128_off::(&ptr, &mut pos)? as usize; Ok(Self { ptr, diff --git a/src/file/dump.rs b/src/file/dump.rs index 8bd86fd..40d2608 100644 --- a/src/file/dump.rs +++ b/src/file/dump.rs @@ -64,6 +64,13 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } } + pub fn pretty_type(&self, type_id: &TypeId) -> String { + match self.pretty_type_opt(type_id) { + Ok(s) => s, + Err(_) => format!("<>", type_id.descriptor_idx), + } + } + pub fn pretty_type_opt_at(&self, type_idx: TypeIndex) -> Result { self.pretty_type_opt(self.get_type_id(type_idx)?) } diff --git a/src/file/instruction.rs b/src/file/instruction.rs index d8b2036..4eeabc9 100644 --- a/src/file/instruction.rs +++ b/src/file/instruction.rs @@ -14,7 +14,7 @@ impl<'a> Instruction<'a> { #[inline] pub fn relative_at(&self, offset: usize) -> Result> { - if offset + 2 >= self.0.len() { + if offset + 1 >= self.0.len() { return dex_err!(BadInstructionOffset { opcode: self.name(), offset: offset, diff --git a/src/file/mod.rs b/src/file/mod.rs index 428b81e..ddc1718 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -283,8 +283,8 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { #[inline] pub unsafe fn fast_get_utf8_str(&self, string_id: &StringId) -> Result { - let (_, data) = self.get_string_data(string_id)?; - Ok(String::from_utf8_unchecked(data.to_vec())) + let (size, data) = self.get_string_data(string_id)?; + Ok(String::from_utf8_unchecked(data[0..size as usize].to_vec())) } #[inline] @@ -432,22 +432,17 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } // ------------------------------------------------------------------------------ - // field ids + // proto ids // ------------------------------------------------------------------------------ - // Proto related methods - pub fn get_proto_id(&self, idx: ProtoIndex) -> Result<&'a ProtoId> { - check_lt_result!(idx, self.proto_ids.len(), ProtoId); - Ok(&self.proto_ids[idx as usize]) - } + fn_id!(get_proto_id, proto_ids, ProtoId, ProtoIndex,); + fn_id!(get_proto_ids, proto_ids, ProtoId[],); + fn_id! {proto_id_idx, proto_ids, Idx: ProtoId, } + fn_id! {get_proto_id_opt, proto_ids, Option: ProtoId, get_proto_id, ProtoIndex,} pub fn num_proto_ids(&self) -> u32 { self.header.proto_ids_size } - pub fn get_proto_ids(&self) -> &'a [ProtoId] { - self.proto_ids - } - pub fn get_shorty_at(&self, idx: ProtoIndex) -> Result { let proto_id = self.get_proto_id(idx)?; self.get_shorty(proto_id) @@ -479,36 +474,46 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { AnnotationItem::from_raw_parts(&self.mmap[off as usize..]) } - // method ids related methods //------------------------------------------------------------------------------ // Method Ids //------------------------------------------------------------------------------ + fn_id!(get_method_id, method_ids, MethodId, u32,); + fn_id!(get_method_ids, method_ids, MethodId[],); + fn_id! {method_id_idx, method_ids, Idx: MethodId, } + fn_id! {get_method_id_opt, method_ids, Option: MethodId, get_method_id, u32,} + #[inline(always)] - pub fn get_method_id(&self, idx: u32) -> Result<&'a MethodId> { - check_lt_result!(idx, self.method_ids.len(), MethodId); - Ok(&self.method_ids[idx as usize]) + pub fn num_method_ids(&self) -> u32 { + self.header.method_ids_size } + // classdef related methods + //------------------------------------------------------------------------------ + // ClassDefs + //------------------------------------------------------------------------------ + fn_id!(get_class_def, class_defs, ClassDef, u32,); + fn_id!(get_class_defs, class_defs, ClassDef[],); + fn_id! {class_def_idx, class_defs, Idx: ClassDef, } + fn_id! {get_class_def_opt, class_defs, Option: ClassDef, get_class_def, u32,} + #[inline(always)] - pub fn method_id_idx(&self, item: &'a MethodId) -> Result { - self.offset_of(self.method_ids, item) + pub fn num_class_defs(&self) -> u32 { + self.header.class_defs_size } - #[inline(always)] - pub fn num_method_ids(&self) -> u32 { - self.header.method_ids_size + #[inline] + pub fn get_class_desc_utf16_lossy(&self, class_def: &ClassDef) -> Result { + self.get_type_desc_utf16_lossy_at(class_def.class_idx) } - #[inline(always)] - pub fn get_method_ids(&self) -> &'a [MethodId] { - self.method_ids + #[inline] + pub fn get_class_desc_utf16(&self, class_def: &ClassDef) -> Result { + self.get_type_desc_utf16_at(class_def.class_idx) } - // classdef related methods - #[inline(always)] - pub fn get_class_def(&self, idx: u32) -> Result<&'a ClassDef> { - check_lt_result!(idx, self.class_defs.len(), ClassDef); - Ok(&self.class_defs[idx as usize]) + #[inline] + pub fn get_interfaces_list(&self, class_def: &ClassDef) -> Result>> { + self.get_type_list(class_def.interfaces_off) } //------------------------------------------------------------------------------ @@ -609,34 +614,6 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.get_annotation_set(anno_item.annotations_off) } - //------------------------------------------------------------------------------ - // ClassDefs - //------------------------------------------------------------------------------ - #[inline(always)] - pub fn num_class_defs(&self) -> u32 { - self.header.class_defs_size - } - - #[inline(always)] - pub fn get_class_defs(&self) -> &'a [ClassDef] { - self.class_defs - } - - #[inline] - pub fn get_class_desc_utf16_lossy(&self, class_def: &ClassDef) -> Result { - self.get_type_desc_utf16_lossy_at(class_def.class_idx) - } - - #[inline] - pub fn get_class_desc_utf16(&self, class_def: &ClassDef) -> Result { - self.get_type_desc_utf16_at(class_def.class_idx) - } - - #[inline] - pub fn get_interfaces_list(&self, class_def: &ClassDef) -> Result>> { - self.get_type_list(class_def.interfaces_off) - } - #[inline] fn offset_of(&self, buf: &[U], o: &T) -> Result { let start = buf.as_ptr() as usize; diff --git a/src/lib.rs b/src/lib.rs index 06108d3..0acc23f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,4 +30,7 @@ mod _internal { #[pymodule_export] use crate::py::mutf8::py_mutf8; + + #[pymodule_export] + use crate::py::class_accessor::py_class_accessor; } \ No newline at end of file diff --git a/src/py/class_accessor.rs b/src/py/class_accessor.rs new file mode 100644 index 0000000..99eb656 --- /dev/null +++ b/src/py/class_accessor.rs @@ -0,0 +1,159 @@ +use std::sync::Arc; + +use pyo3::PyResult; + +use crate::file::{ClassAccessor, Field, Method}; + +// Rust-side of the class accessor +// +// However, this may seem invalid, there's actually no need for us to +// use mem::transmute here, because only Rust can create instances of +// this type. +pub struct RsClassAccessor(ClassAccessor<'static>); + +#[pyo3::pyclass(name = "ClassAccessor", module = "dexrs._internal.class_accessor")] +pub struct PyClassAccessor { + inner: Arc, +} + +impl PyClassAccessor { + // Rust interface + pub fn from_instance(class_accessor: ClassAccessor<'static>) -> PyClassAccessor { + PyClassAccessor { + inner: Arc::new(RsClassAccessor(class_accessor)), + } + } +} + +impl From> for PyClassAccessor { + fn from(class_accessor: ClassAccessor<'static>) -> Self { + PyClassAccessor::from_instance(class_accessor) + } +} + + +#[pyo3::pymethods] +impl PyClassAccessor { + // no constructor + #[getter] + pub fn num_fields(&self) -> usize { + self.inner.0.num_fields() + } + + #[getter] + pub fn num_methods(&self) -> usize { + self.inner.0.num_methods() + } + + #[getter] + pub fn num_static_fields(&self) -> u32 { + self.inner.0.num_static_fields + } + + #[getter] + pub fn num_instance_fields(&self) -> u32 { + self.inner.0.num_instance_fields + } + + #[getter] + pub fn num_direct_methods(&self) -> u32 { + self.inner.0.num_direct_methods + } + + #[getter] + pub fn num_virtual_methods(&self) -> u32 { + self.inner.0.num_virtual_methods + } + + pub fn get_fields(&self) -> PyResult> { + Ok(self.inner.0.get_fields().map(Into::into).collect()) + } + + pub fn get_static_fieds(&self) -> PyResult> { + Ok(self.inner.0.get_static_fieds().map(Into::into).collect()) + } + + pub fn get_instance_fields(&self) -> PyResult> { + Ok(self.inner.0.get_instance_fields().map(Into::into).collect()) + } + + pub fn get_methods(&self) -> PyResult> { + Ok(self.inner.0.get_methods()?.map(Into::into).collect()) + } + + pub fn get_virtual_methods(&self) -> PyResult> { + Ok(self + .inner + .0 + .get_virtual_methods()? + .map(Into::into) + .collect()) + } + + pub fn get_direct_methods(&self) -> PyResult> { + Ok(self.inner.0.get_direct_methods()?.map(Into::into).collect()) + } +} + +#[pyo3::pyclass(name = "Method", module = "dexrs._internal.class_accessor")] +pub struct PyDexMethod(Arc); + +impl From for PyDexMethod { + fn from(method: Method) -> Self { + PyDexMethod(Arc::new(method)) + } +} + +#[pyo3::pymethods] +impl PyDexMethod { + #[getter] + pub fn index(&self) -> u32 { + self.0.index + } + + #[getter] + pub fn access_flags(&self) -> u32 { + self.0.access_flags + } + + #[getter] + pub fn code_offset(&self) -> u32 { + self.0.code_offset + } + + pub fn is_static_or_direct(&self) -> bool { + self.0.is_static_or_direct + } +} + +#[pyo3::pyclass(name = "Field", module = "dexrs._internal.class_accessor")] +pub struct PyDexField(Arc); + +impl From for PyDexField { + fn from(field: Field) -> Self { + PyDexField(Arc::new(field)) + } +} + +#[pyo3::pymethods] +impl PyDexField { + #[getter] + pub fn index(&self) -> u32 { + self.0.index + } + + #[getter] + pub fn access_flags(&self) -> u32 { + self.0.access_flags + } + + pub fn is_static(&self) -> bool { + self.0.is_static + } +} + +#[pyo3::pymodule] +pub mod py_class_accessor { + #[pymodule_export] + use super::{PyClassAccessor, PyDexField, PyDexMethod}; +} diff --git a/src/py/file.rs b/src/py/file.rs index 48380db..b04eb72 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -2,11 +2,17 @@ use std::sync::Arc; use pyo3::{exceptions::PyValueError, Py, PyResult, Python}; -use crate::file::{verifier::VerifyPreset, DexFile, DexLocation, StringIndex, TypeIndex}; +use crate::file::{ + verifier::VerifyPreset, DexFile, DexLocation, FieldIndex, ProtoIndex, StringIndex, TypeIndex, +}; use super::{ + class_accessor::PyClassAccessor, container::{PyFileDexContainer, PyInMemoryDexContainer}, - structs::{PyDexHeader, PyDexStringId, PyDexTypeId}, + structs::{ + PyDexClassDef, PyDexFieldId, PyDexHeader, PyDexMethodId, PyDexProtoId, PyDexStringId, + PyDexTypeId, PyDexTypeItem, + }, }; #[allow(non_camel_case_types)] @@ -78,6 +84,7 @@ macro_rules! dex_container_check { }; } +// REVISIT: this can be reduced macro_rules! dex_action_impl { ($this:ident, $method:ident, $py:ident) => {{ match &$this.inner.as_ref() { @@ -91,7 +98,7 @@ macro_rules! dex_action_impl { } } }}; - ($this:ident, $method:ident, $arg:expr, $py:ident) => {{ + ($this:ident, $method:ident?, $arg:expr, $py:ident) => {{ match &$this.inner.as_ref() { RsDexFile::InMemory { dex, container } => { dex_container_check!(container, $py, $method); @@ -103,6 +110,30 @@ macro_rules! dex_action_impl { } } }}; + ($this:ident, $method:ident, $arg:expr, $py:ident) => {{ + match &$this.inner.as_ref() { + RsDexFile::InMemory { dex, container } => { + dex_container_check!(container, $py, $method); + dex.$method($arg) + } + RsDexFile::File { dex, container } => { + dex_container_check!(container, $py, $method); + dex.$method($arg) + } + } + }}; + ($this:ident, unsafe { $method:ident }, $arg:expr, $py:ident) => {{ + match &$this.inner.as_ref() { + RsDexFile::InMemory { dex, container } => { + dex_container_check!(container, $py, $method); + unsafe { dex.$method($arg)? } + } + RsDexFile::File { dex, container } => { + dex_container_check!(container, $py, $method); + unsafe { dex.$method($arg)? } + } + } + }}; } #[pyo3::pymethods] @@ -154,7 +185,7 @@ impl PyDexFileImpl { py: Python<'py>, index: StringIndex, ) -> PyResult { - Ok(dex_action_impl!(self, get_string_id, index, py).into()) + Ok(dex_action_impl!(self, get_string_id?, index, py).into()) } pub fn get_string_id_opt<'py>( @@ -162,7 +193,7 @@ impl PyDexFileImpl { py: Python<'py>, index: StringIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_string_id_opt, index, py).map(Into::into)) + Ok(dex_action_impl!(self, get_string_id_opt?, index, py).map(Into::into)) } pub fn num_string_ids<'py>(&self, py: Python<'py>) -> PyResult { @@ -173,7 +204,7 @@ impl PyDexFileImpl { // Type Ids // ---------------------------------------------------------------------------- pub fn get_type_id<'py>(&self, py: Python<'py>, index: TypeIndex) -> PyResult { - Ok(dex_action_impl!(self, get_type_id, index, py).into()) + Ok(dex_action_impl!(self, get_type_id?, index, py).into()) } pub fn get_type_id_opt<'py>( @@ -181,19 +212,177 @@ impl PyDexFileImpl { py: Python<'py>, index: TypeIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_type_id_opt, index, py).map(Into::into)) + Ok(dex_action_impl!(self, get_type_id_opt?, index, py).map(Into::into)) } pub fn num_type_ids<'py>(&self, py: Python<'py>) -> PyResult { Ok(dex_action_impl!(self, num_type_ids, py)) } + pub fn get_type_desc<'py>( + &self, + py: Python<'py>, + py_type_id: Py, + ) -> PyResult { + let type_id = &py_type_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_type_desc_utf16?, type_id, py)) + } + + pub fn get_type_desc_at<'py>(&self, py: Python<'py>, index: TypeIndex) -> PyResult { + Ok(dex_action_impl!(self, get_type_desc_utf16_at?, index, py)) + } + + pub fn pretty_type_at<'py>(&self, py: Python<'py>, index: TypeIndex) -> PyResult { + Ok(dex_action_impl!(self, pretty_type_at, index, py)) + } + + pub fn pretty_type<'py>( + &self, + py: Python<'py>, + py_type_id: Py, + ) -> PyResult { + let type_id = &py_type_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, pretty_type, type_id, py)) + } + // ---------------------------------------------------------------------------- - // string data + // Field Ids // ---------------------------------------------------------------------------- + pub fn get_field_id<'py>(&self, py: Python<'py>, index: FieldIndex) -> PyResult { + Ok(dex_action_impl!(self, get_field_id?, index, py).into()) + } + + pub fn get_field_id_opt<'py>( + &self, + py: Python<'py>, + index: FieldIndex, + ) -> PyResult> { + Ok(dex_action_impl!(self, get_field_id_opt?, index, py).map(Into::into)) + } + + pub fn num_field_ids<'py>(&self, py: Python<'py>) -> PyResult { + Ok(dex_action_impl!(self, num_field_ids, py)) + } + + pub fn get_field_name<'py>( + &self, + py: Python<'py>, + py_field_id: Py, + ) -> PyResult { + let field_id = &py_field_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_field_name?, field_id, py)) + } + + pub fn get_field_name_at<'py>(&self, py: Python<'py>, index: FieldIndex) -> PyResult { + Ok(dex_action_impl!(self, get_field_name_at?, index, py)) + } + // ---------------------------------------------------------------------------- + // Proto Ids + // ---------------------------------------------------------------------------- + pub fn get_proto_id<'py>(&self, py: Python<'py>, index: ProtoIndex) -> PyResult { + Ok(dex_action_impl!(self, get_proto_id?, index, py).into()) + } + + pub fn get_proto_id_opt<'py>( + &self, + py: Python<'py>, + index: ProtoIndex, + ) -> PyResult> { + Ok(dex_action_impl!(self, get_proto_id_opt?, index, py).map(Into::into)) + } + + pub fn num_proto_ids<'py>(&self, py: Python<'py>) -> PyResult { + Ok(dex_action_impl!(self, num_proto_ids, py)) + } + + pub fn get_shorty<'py>( + &self, + py: Python<'py>, + py_proto_id: Py, + ) -> PyResult { + let proto_id = &py_proto_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_shorty?, proto_id, py)) + } + + pub fn get_shorty_at<'py>(&self, py: Python<'py>, index: ProtoIndex) -> PyResult { + Ok(dex_action_impl!(self, get_shorty_at?, index, py)) + } + + // ---------------------------------------------------------------------------- + // method ids + // ---------------------------------------------------------------------------- + pub fn get_method_id<'py>(&self, py: Python<'py>, index: u32) -> PyResult { + Ok(dex_action_impl!(self, get_method_id?, index, py).into()) + } + + pub fn get_method_id_opt<'py>( + &self, + py: Python<'py>, + index: u32, + ) -> PyResult> { + Ok(dex_action_impl!(self, get_method_id_opt?, index, py).map(Into::into)) + } + + pub fn num_method_ids<'py>(&self, py: Python<'py>) -> PyResult { + Ok(dex_action_impl!(self, num_method_ids, py)) + } + + //------------------------------------------------------------------------------ + // ClassDefs + //------------------------------------------------------------------------------ + pub fn get_class_def<'py>(&self, py: Python<'py>, index: u32) -> PyResult { + Ok(dex_action_impl!(self, get_class_def?, index, py).into()) + } + + pub fn get_class_def_opt<'py>( + &self, + py: Python<'py>, + index: u32, + ) -> PyResult> { + Ok(dex_action_impl!(self, get_class_def_opt?, index, py).map(Into::into)) + } + + pub fn num_class_defs<'py>(&self, py: Python<'py>) -> PyResult { + Ok(dex_action_impl!(self, num_class_defs, py)) + } + + pub fn get_class_desc<'py>( + &self, + py: Python<'py>, + py_class_def: Py, + ) -> PyResult { + let class_def = &py_class_def.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_class_desc_utf16?, class_def, py)) + } + + pub fn get_interfaces_list<'py>( + &self, + py: Python<'py>, + py_class_def: Py, + ) -> PyResult>> { + let class_def = &py_class_def.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_interfaces_list?, class_def, py) + .map(|x| x.iter().map(Into::into).collect())) + } + + // ---------------------------------------------------------------------------- + // class accessor + // ---------------------------------------------------------------------------- + pub fn get_class_accessor<'py>( + &self, + py: Python<'py>, + py_class_def: Py, + ) -> PyResult> { + let class_def = &py_class_def.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_class_accessor?, class_def, py).map(Into::into)) + } + + // ---------------------------------------------------------------------------- + // string data + // ---------------------------------------------------------------------------- pub fn get_utf16_at<'py>(&self, py: Python<'py>, index: StringIndex) -> PyResult { - Ok(dex_action_impl!(self, get_utf16_str_at, index, py)) + Ok(dex_action_impl!(self, get_utf16_str_at?, index, py)) } pub fn get_utf16<'py>( @@ -202,7 +391,61 @@ impl PyDexFileImpl { py_string_id: Py, ) -> PyResult { let string_id = &py_string_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_utf16_str, &string_id, py)) + Ok(dex_action_impl!(self, get_utf16_str?, &string_id, py)) + } + + pub fn get_utf16_opt_at<'py>( + &self, + py: Python<'py>, + index: StringIndex, + ) -> PyResult> { + Ok(dex_action_impl!(self, get_utf16_str_opt_at?, index, py)) + } + + pub fn get_utf16_lossy_at<'py>(&self, py: Python<'py>, index: StringIndex) -> PyResult { + Ok(dex_action_impl!(self, get_utf16_str_lossy_at?, index, py)) + } + + pub fn get_utf16_lossy<'py>( + &self, + py: Python<'py>, + py_string_id: Py, + ) -> PyResult { + let string_id = &py_string_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_utf16_str_lossy?, &string_id, py)) + } + + pub fn get_string_data<'py>( + &self, + py: Python<'py>, + py_string_id: Py, + ) -> PyResult<(u32, &'py [u8])> { + let string_id = &py_string_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_string_data?, &string_id, py)) + } + + // unsafe string API + pub fn fast_get_utf8<'py>( + &self, + py: Python<'py>, + py_string_id: Py, + ) -> PyResult { + let string_id = &py_string_id.try_borrow(py)?.0; + Ok(dex_action_impl!( + self, + unsafe { fast_get_utf8_str }, + &string_id, + py + )) + } + + pub fn fast_get_utf8_at<'py>(&self, py: Python<'py>, index: StringIndex) -> PyResult { + Ok(dex_action_impl!( + self, + unsafe { fast_get_utf8_str_at }, + index, + py + )) } } diff --git a/src/py/mod.rs b/src/py/mod.rs index 185f813..5a43b9e 100644 --- a/src/py/mod.rs +++ b/src/py/mod.rs @@ -2,4 +2,5 @@ pub(crate) mod container; pub(crate) mod file; pub(crate) mod error; pub(crate) mod structs; -pub(crate) mod mutf8; \ No newline at end of file +pub(crate) mod mutf8; +pub(crate) mod class_accessor; \ No newline at end of file diff --git a/src/py/structs.rs b/src/py/structs.rs index 366ea43..d2eb629 100644 --- a/src/py/structs.rs +++ b/src/py/structs.rs @@ -85,7 +85,10 @@ pub fn get_magic(&self) -> Vec { py_struct_wrapper!("StringId", PyDexStringId, StringId); py_struct_fields!(PyDexStringId, { (string_data_off, StringIndex), -},); +}, +pub fn __repr__(&self) -> String { + format!("StringId(string_data_off={})", self.0.string_data_off) +}); // -------------------------------------------------------------------- // TypeId @@ -148,6 +151,7 @@ py_struct_fields!(PyDexTypeItem, { (type_idx, TypeIndex), },); + #[pyo3::pymodule(name = "structs")] pub(crate) mod py_structs { From ba910bf51e407e932d438b5cc0842885a3299283 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 15 Feb 2025 23:14:09 +0100 Subject: [PATCH 31/46] Moved Python API into Rust code files --- + Python API will be generated if cfg(feature = python) + Moved existing Python api into Rust source code base + Python stubs are WIP and don't include constant code flags --- python/dexrs/_internal/code/__init__.pyi | 338 ++++++++++ python/dexrs/_internal/code/signatures.pyi | 3 + python/dexrs/_internal/structs.pyi | 11 +- src/error.rs | 30 + src/file/class_accessor.rs | 168 +++++ src/file/code_item_accessors.rs | 91 +++ src/file/container.rs | 190 +++++- src/file/header.rs | 50 ++ src/file/instruction.rs | 726 +++++++++++++-------- src/file/structs.rs | 280 +++++++- src/leb128.rs | 18 + src/lib.rs | 18 +- src/py.rs | 80 +++ src/py/class_accessor.rs | 159 ----- src/py/code_item.rs | 114 ++++ src/py/container.rs | 161 ----- src/py/error.rs | 34 - src/py/file.rs | 13 +- src/py/mod.rs | 6 - src/py/mutf8.rs | 37 -- src/py/structs.rs | 163 ----- src/utf.rs | 50 +- 22 files changed, 1864 insertions(+), 876 deletions(-) create mode 100644 python/dexrs/_internal/code/__init__.pyi create mode 100644 python/dexrs/_internal/code/signatures.pyi create mode 100644 src/py.rs delete mode 100644 src/py/class_accessor.rs create mode 100644 src/py/code_item.rs delete mode 100644 src/py/container.rs delete mode 100644 src/py/error.rs delete mode 100644 src/py/mod.rs delete mode 100644 src/py/mutf8.rs delete mode 100644 src/py/structs.rs diff --git a/python/dexrs/_internal/code/__init__.pyi b/python/dexrs/_internal/code/__init__.pyi new file mode 100644 index 0000000..7f2c831 --- /dev/null +++ b/python/dexrs/_internal/code/__init__.pyi @@ -0,0 +1,338 @@ +from typing import List + +from ..structs import CodeItem + +class CodeItemAccessor: + code_off: int + code_item: CodeItem + + @property + def registers_size(self) -> int: ... + @property + def ins_size(self) -> int: ... + @property + def outs_size(self) -> int: ... + @property + def tries_size(self) -> int: ... + @property + def debug_info_off(self) -> int: ... + @property + def code_off(self) -> int: ... + @property + def insns_size_in_code_units(self) -> int: ... + @property + def insns_size_in_bytes(self) -> int: ... + def insns_raw(self) -> List[int]: ... + def insns(self) -> List[Instruction]: ... + def inst_at(self, pc: int) -> Instruction: ... + +class Instruction: + @staticmethod + def opcode_of(inst_data: int) -> Code: ... + @staticmethod + def name_of(opcode: Code) -> str: ... + +class Format: + k10x: Format + k12x: Format + k11n: Format + k11x: Format + k10t: Format + k20t: Format + k22x: Format + k21t: Format + k21s: Format + k21h: Format + k21c: Format + k23x: Format + k22b: Format + k22t: Format + k22s: Format + k22c: Format + k32x: Format + k30t: Format + k31t: Format + k31i: Format + k31c: Format + k35c: Format + k3rc: Format + k45cc: Format + k4rcc: Format + k51l: Format + kInvalidFormat: Format + + def __int__(self) -> int: ... + +class IndexType: + Unknown: IndexType + NoIndex: IndexType + TypeRef: IndexType + StringRef: IndexType + MethodRef: IndexType + FieldRef: IndexType + MethodAndProtoRef: IndexType + CallSiteRef: IndexType + MethodHandleRef: IndexType + ProtoRef: IndexType + + def __int__(self) -> int: ... + +class Code: + NOP: Code + MOVE: Code + MOVE_FROM16: Code + MOVE_16: Code + MOVE_WIDE: Code + MOVE_WIDE_FROM16: Code + MOVE_WIDE_16: Code + MOVE_OBJECT: Code + MOVE_OBJECT_FROM16: Code + MOVE_OBJECT_16: Code + MOVE_RESULT: Code + MOVE_RESULT_WIDE: Code + MOVE_RESULT_OBJECT: Code + MOVE_EXCEPTION: Code + RETURN_VOID: Code + RETURN: Code + RETURN_WIDE: Code + RETURN_OBJECT: Code + CONST_4: Code + CONST_16: Code + CONST: Code + CONST_HIGH16: Code + CONST_WIDE_16: Code + CONST_WIDE_32: Code + CONST_WIDE: Code + CONST_WIDE_HIGH16: Code + CONST_STRING: Code + CONST_STRING_JUMBO: Code + CONST_CLASS: Code + MONITOR_ENTER: Code + MONITOR_EXIT: Code + CHECK_CAST: Code + INSTANCE_OF: Code + ARRAY_LENGTH: Code + NEW_INSTANCE: Code + NEW_ARRAY: Code + FILLED_NEW_ARRAY: Code + FILLED_NEW_ARRAY_RANGE: Code + FILL_ARRAY_DATA: Code + THROW: Code + GOTO: Code + GOTO_16: Code + GOTO_32: Code + PACKED_SWITCH: Code + SPARSE_SWITCH: Code + CMPL_FLOAT: Code + CMPG_FLOAT: Code + CMPL_DOUBLE: Code + CMPG_DOUBLE: Code + CMP_LONG: Code + IF_EQ: Code + IF_NE: Code + IF_LT: Code + IF_GE: Code + IF_GT: Code + IF_LE: Code + IF_EQZ: Code + IF_NEZ: Code + IF_LTZ: Code + IF_GEZ: Code + IF_GTZ: Code + IF_LEZ: Code + UNUSED_3E: Code + UNUSED_3F: Code + UNUSED_40: Code + UNUSED_41: Code + UNUSED_42: Code + UNUSED_43: Code + AGET: Code + AGET_WIDE: Code + AGET_OBJECT: Code + AGET_BOOLEAN: Code + AGET_BYTE: Code + AGET_CHAR: Code + AGET_SHORT: Code + APUT: Code + APUT_WIDE: Code + APUT_OBJECT: Code + APUT_BOOLEAN: Code + APUT_BYTE: Code + APUT_CHAR: Code + APUT_SHORT: Code + IGET: Code + IGET_WIDE: Code + IGET_OBJECT: Code + IGET_BOOLEAN: Code + IGET_BYTE: Code + IGET_CHAR: Code + IGET_SHORT: Code + IPUT: Code + IPUT_WIDE: Code + IPUT_OBJECT: Code + IPUT_BOOLEAN: Code + IPUT_BYTE: Code + IPUT_CHAR: Code + IPUT_SHORT: Code + SGET: Code + SGET_WIDE: Code + SGET_OBJECT: Code + SGET_BOOLEAN: Code + SGET_BYTE: Code + SGET_CHAR: Code + SGET_SHORT: Code + SPUT: Code + SPUT_WIDE: Code + SPUT_OBJECT: Code + SPUT_BOOLEAN: Code + SPUT_BYTE: Code + SPUT_CHAR: Code + SPUT_SHORT: Code + INVOKE_VIRTUAL: Code + INVOKE_SUPER: Code + INVOKE_DIRECT: Code + INVOKE_STATIC: Code + INVOKE_INTERFACE: Code + UNUSED_73: Code + INVOKE_VIRTUAL_RANGE: Code + INVOKE_SUPER_RANGE: Code + INVOKE_DIRECT_RANGE: Code + INVOKE_STATIC_RANGE: Code + INVOKE_INTERFACE_RANGE: Code + UNUSED_79: Code + UNUSED_7A: Code + NEG_INT: Code + NOT_INT: Code + NEG_LONG: Code + NOT_LONG: Code + NEG_FLOAT: Code + NEG_DOUBLE: Code + INT_TO_LONG: Code + INT_TO_FLOAT: Code + INT_TO_DOUBLE: Code + LONG_TO_INT: Code + LONG_TO_FLOAT: Code + LONG_TO_DOUBLE: Code + FLOAT_TO_INT: Code + FLOAT_TO_LONG: Code + FLOAT_TO_DOUBLE: Code + DOUBLE_TO_INT: Code + DOUBLE_TO_LONG: Code + DOUBLE_TO_FLOAT: Code + INT_TO_BYTE: Code + INT_TO_CHAR: Code + INT_TO_SHORT: Code + ADD_INT: Code + SUB_INT: Code + MUL_INT: Code + DIV_INT: Code + REM_INT: Code + AND_INT: Code + OR_INT: Code + XOR_INT: Code + SHL_INT: Code + SHR_INT: Code + USHR_INT: Code + ADD_LONG: Code + SUB_LONG: Code + MUL_LONG: Code + DIV_LONG: Code + REM_LONG: Code + AND_LONG: Code + OR_LONG: Code + XOR_LONG: Code + SHL_LONG: Code + SHR_LONG: Code + USHR_LONG: Code + ADD_FLOAT: Code + SUB_FLOAT: Code + MUL_FLOAT: Code + DIV_FLOAT: Code + REM_FLOAT: Code + ADD_DOUBLE: Code + SUB_DOUBLE: Code + MUL_DOUBLE: Code + DIV_DOUBLE: Code + REM_DOUBLE: Code + ADD_INT_2ADDR: Code + SUB_INT_2ADDR: Code + MUL_INT_2ADDR: Code + DIV_INT_2ADDR: Code + REM_INT_2ADDR: Code + AND_INT_2ADDR: Code + OR_INT_2ADDR: Code + XOR_INT_2ADDR: Code + SHL_INT_2ADDR: Code + SHR_INT_2ADDR: Code + USHR_INT_2ADDR: Code + ADD_LONG_2ADDR: Code + SUB_LONG_2ADDR: Code + MUL_LONG_2ADDR: Code + DIV_LONG_2ADDR: Code + REM_LONG_2ADDR: Code + AND_LONG_2ADDR: Code + OR_LONG_2ADDR: Code + XOR_LONG_2ADDR: Code + SHL_LONG_2ADDR: Code + SHR_LONG_2ADDR: Code + USHR_LONG_2ADDR: Code + ADD_FLOAT_2ADDR: Code + SUB_FLOAT_2ADDR: Code + MUL_FLOAT_2ADDR: Code + DIV_FLOAT_2ADDR: Code + REM_FLOAT_2ADDR: Code + ADD_DOUBLE_2ADDR: Code + SUB_DOUBLE_2ADDR: Code + MUL_DOUBLE_2ADDR: Code + DIV_DOUBLE_2ADDR: Code + REM_DOUBLE_2ADDR: Code + ADD_INT_LIT16: Code + RSUB_INT: Code + MUL_INT_LIT16: Code + DIV_INT_LIT16: Code + REM_INT_LIT16: Code + AND_INT_LIT16: Code + OR_INT_LIT16: Code + XOR_INT_LIT16: Code + ADD_INT_LIT8: Code + RSUB_INT_LIT8: Code + MUL_INT_LIT8: Code + DIV_INT_LIT8: Code + REM_INT_LIT8: Code + AND_INT_LIT8: Code + OR_INT_LIT8: Code + XOR_INT_LIT8: Code + SHL_INT_LIT8: Code + SHR_INT_LIT8: Code + USHR_INT_LIT8: Code + UNUSED_E3: Code + UNUSED_E4: Code + UNUSED_E5: Code + UNUSED_E6: Code + UNUSED_E7: Code + UNUSED_E8: Code + UNUSED_E9: Code + UNUSED_EA: Code + UNUSED_EB: Code + UNUSED_EC: Code + UNUSED_ED: Code + UNUSED_EE: Code + UNUSED_EF: Code + UNUSED_F0: Code + UNUSED_F1: Code + UNUSED_F2: Code + UNUSED_F3: Code + UNUSED_F4: Code + UNUSED_F5: Code + UNUSED_F6: Code + UNUSED_F7: Code + UNUSED_F8: Code + UNUSED_F9: Code + INVOKE_POLYMORPHIC: Code + INVOKE_POLYMORPHIC_RANGE: Code + INVOKE_CUSTOM: Code + INVOKE_CUSTOM_RANGE: Code + CONST_METHOD_HANDLE: Code + CONST_METHOD_TYPE: Code + + def __int__(self) -> int: ... diff --git a/python/dexrs/_internal/code/signatures.pyi b/python/dexrs/_internal/code/signatures.pyi new file mode 100644 index 0000000..3b2a31b --- /dev/null +++ b/python/dexrs/_internal/code/signatures.pyi @@ -0,0 +1,3 @@ +ArrayDataSignature: int = ... +SparseSwitchSignature: int = ... +PackedSwitchSignature: int = ... diff --git a/python/dexrs/_internal/structs.pyi b/python/dexrs/_internal/structs.pyi index a4c7667..e7a1def 100644 --- a/python/dexrs/_internal/structs.pyi +++ b/python/dexrs/_internal/structs.pyi @@ -58,6 +58,13 @@ class ClassDef: class_data_off: int static_values_off: int - class TypeItem: - type_idx: int \ No newline at end of file + type_idx: int + +class CodeItem: + registers_size: int + ins_size: int + outs_size: int + tries_size: int + debug_info_off: int + insns_size: int diff --git a/src/error.rs b/src/error.rs index b228f47..59ae59c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -209,3 +209,33 @@ impl Debug for DexError { write!(f, "{}", self) } } + +#[cfg(feature = "python")] +#[pyo3::pymodule(name = "error")] +pub(crate) mod py_error { + use pyo3::exceptions::PyException; + + pyo3::create_exception!(dexrs._internal.error, PyDexError, PyException); + + impl From for pyo3::PyErr { + fn from(err: super::DexError) -> pyo3::PyErr { + PyDexError::new_err(err.to_string()) + } + } + + #[pymodule_export] + use PyDexError as PyDexErrorExport; + + // generic errors not wrapped by dexrs + #[derive(Debug, thiserror::Error)] + pub enum GenericError { + #[error(transparent)] + IOError(#[from] std::io::Error), + } + + impl From for pyo3::PyErr { + fn from(err: GenericError) -> pyo3::PyErr { + pyo3::exceptions::PyIOError::new_err(err.to_string()) + } + } +} diff --git a/src/file/class_accessor.rs b/src/file/class_accessor.rs index 6c9b435..1b2cd98 100644 --- a/src/file/class_accessor.rs +++ b/src/file/class_accessor.rs @@ -1,3 +1,9 @@ +#[cfg(feature = "python")] +use std::sync::Arc; + +#[cfg(feature = "python")] +use pyo3::PyResult; + use super::{ClassDef, DexContainer, DexFile, InvokeType, ACC_STATIC}; use crate::{ dex_err, @@ -7,12 +13,18 @@ use crate::{ Result, }; +#[cfg(feature = "python")] +use crate::py::rs_type_wrapper; + pub trait ClassItemBase: Copy + Clone + Default { fn read(&mut self, data: &[u8], pos: &mut usize) -> Result<()>; fn next_section(&mut self); } +// ---------------------------------------------------------------------------- +// Method +// ---------------------------------------------------------------------------- #[derive(Copy, Clone)] pub struct Method { pub index: u32, @@ -76,6 +88,43 @@ impl Default for Method { } } } + +// >>> begin python export +#[cfg(feature = "python")] +rs_type_wrapper!( + Method, + PyDexMethod, + name: "Method", + module: "dexrs._internal.class_accessor" +); + +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyDexMethod { + #[getter] + pub fn index(&self) -> u32 { + self.0.index + } + + #[getter] + pub fn access_flags(&self) -> u32 { + self.0.access_flags + } + + #[getter] + pub fn code_offset(&self) -> u32 { + self.0.code_offset + } + + pub fn is_static_or_direct(&self) -> bool { + self.0.is_static_or_direct + } +} +// <<< end python export + +// ---------------------------------------------------------------------------- +// Field +// ---------------------------------------------------------------------------- #[derive(Copy, Clone)] pub struct Field { pub index: u32, @@ -114,6 +163,37 @@ impl Default for Field { } } +// >>> begin python export +#[cfg(feature = "python")] +rs_type_wrapper!( + Field, + PyDexField, + name: "Field", + module: "dexrs._internal.class_accessor" +); + +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyDexField { + #[getter] + pub fn index(&self) -> u32 { + self.0.index + } + + #[getter] + pub fn access_flags(&self) -> u32 { + self.0.access_flags + } + + pub fn is_static(&self) -> bool { + self.0.is_static + } +} +// <<< end python export + +// ---------------------------------------------------------------------------- +// ClassAccessor +// ---------------------------------------------------------------------------- pub struct ClassAccessor<'dex> { ptr_pos: usize, class_data: &'dex [u8], @@ -332,6 +412,86 @@ impl<'a> ClassAccessor<'a> { } } +// >>> begin python export +// Python-side of the class accessor +// +// However, this <'static> may seem invalid, there's actually no need for us to +// use mem::transmute here, because only Rust can create instances of this type. +// Since the DexFile was already converteed to be 'static, this is also valid. +#[cfg(feature = "python")] +rs_type_wrapper!( + ClassAccessor<'static>, + PyClassAccessor, + RsClassAccessor, + name: "ClassAccessor", + module: "dexrs._internal.class_accessor" +); + +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyClassAccessor { + // no constructor + #[getter] + pub fn num_fields(&self) -> usize { + self.inner.0.num_fields() + } + + #[getter] + pub fn num_methods(&self) -> usize { + self.inner.0.num_methods() + } + + #[getter] + pub fn num_static_fields(&self) -> u32 { + self.inner.0.num_static_fields + } + + #[getter] + pub fn num_instance_fields(&self) -> u32 { + self.inner.0.num_instance_fields + } + + #[getter] + pub fn num_direct_methods(&self) -> u32 { + self.inner.0.num_direct_methods + } + + #[getter] + pub fn num_virtual_methods(&self) -> u32 { + self.inner.0.num_virtual_methods + } + + pub fn get_fields(&self) -> PyResult> { + Ok(self.inner.0.get_fields().map(Into::into).collect()) + } + + pub fn get_static_fieds(&self) -> PyResult> { + Ok(self.inner.0.get_static_fieds().map(Into::into).collect()) + } + + pub fn get_instance_fields(&self) -> PyResult> { + Ok(self.inner.0.get_instance_fields().map(Into::into).collect()) + } + + pub fn get_methods(&self) -> PyResult> { + Ok(self.inner.0.get_methods()?.map(Into::into).collect()) + } + + pub fn get_virtual_methods(&self) -> PyResult> { + Ok(self + .inner + .0 + .get_virtual_methods()? + .map(Into::into) + .collect()) + } + + pub fn get_direct_methods(&self) -> PyResult> { + Ok(self.inner.0.get_direct_methods()?.map(Into::into).collect()) + } +} +// <<< end python export + pub struct DataIterator<'a, T: ClassItemBase> { class_data: &'a [u8], value: T, @@ -390,3 +550,11 @@ impl<'a, T: ClassItemBase> Iterator for DataIterator<'a, T> { return None; } } + +// >>> begin python module export +#[cfg(feature = "python")] +#[pyo3::pymodule] +pub mod py_class_accessor { + #[pymodule_export] + use super::{PyClassAccessor, PyDexField, PyDexMethod}; +} diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index ee51829..33ce9c7 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -1,7 +1,21 @@ +#[cfg(feature = "python")] +use pyo3::PyResult; +#[cfg(feature = "python")] +use std::sync::Arc; + +#[cfg(feature = "python")] +use crate::py::rs_type_wrapper; + use crate::Result; use super::{CodeItem, DexContainer, DexFile, Instruction}; +#[cfg(feature = "python")] +use super::{PyDexCodeItem, PyInstruction}; + +// ---------------------------------------------------------------------------- +// CodeItemAccessor +// ---------------------------------------------------------------------------- pub struct CodeItemAccessor<'a> { code_off: u32, code_item: &'a CodeItem, @@ -105,6 +119,83 @@ impl<'a> IntoIterator for &'a CodeItemAccessor<'a> { } } +// >>> begin python export +#[cfg(feature = "python")] +rs_type_wrapper!( + CodeItemAccessor<'static>, + PyCodeItemAccessor, + RsCodeItemAccessor, + name: "CodeItemAccessor", + module: "dexrs._internal.code" +); + +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyCodeItemAccessor { + #[getter] + pub fn insns_size_in_code_units(&self) -> u32 { + self.inner.0.insns_size_in_code_units() + } + + #[getter] + pub fn insns_size_in_bytes(&self) -> u32 { + self.inner.0.insns_size_in_bytes() + } + + pub fn has_code(&self) -> bool { + self.inner.0.has_code() + } + + #[getter] + pub fn code_off(&self) -> u32 { + self.inner.0.code_off() + } + + #[getter] + pub fn code_item(&self) -> PyDexCodeItem { + self.inner.0.code_item().into() + } + + #[getter] + pub fn registers_size(&self) -> u16 { + self.inner.0.registers_size() + } + + #[getter] + pub fn ins_size(&self) -> u16 { + self.inner.0.ins_size() + } + + #[getter] + pub fn outs_size(&self) -> u16 { + self.inner.0.outs_size() + } + + #[getter] + pub fn tries_size(&self) -> u16 { + self.inner.0.tries_size() + } + + pub fn insns_raw(&self) -> &[u16] { + self.inner.0.insns() + } + + pub fn inst_at(&self, pc: u32) -> PyInstruction { + self.inner.0.inst_at(pc).into() + } + + // REVISIT: dex_pc is unused here + pub fn insns(&self) -> PyResult> { + Ok(DexInstructionIterator::new(self.inner.0.insns) + .map(Into::into) + .collect()) + } +} +// <<< end python export + +// ---------------------------------------------------------------------------- +// Instruction Iterator +// ---------------------------------------------------------------------------- pub struct DexInstructionIterator<'a> { instructions: &'a [u16], pc: usize, diff --git a/src/file/container.rs b/src/file/container.rs index 30d59ce..8729b60 100644 --- a/src/file/container.rs +++ b/src/file/container.rs @@ -1,10 +1,21 @@ use memmap2::{MmapAsRawDesc, MmapMut}; use std::ops::{Deref, DerefMut}; -use crate::Result; +#[cfg(feature = "python")] +use std::sync::Arc; + +#[cfg(feature = "python")] +use pyo3::{types::PyBytes, Py, PyRef, PyResult, Python}; use super::MmapDexFile; +use crate::Result; +#[cfg(feature = "python")] +use crate::error::py_error::GenericError; + +// ---------------------------------------------------------------------------- +// DexContainer +// ---------------------------------------------------------------------------- pub trait DexContainer<'a>: AsRef<[u8]> + Deref + 'a { fn data(&'a self) -> &'a [u8] { self.as_ref() @@ -15,14 +26,33 @@ pub trait DexContainer<'a>: AsRef<[u8]> + Deref + 'a { } } +// ---------------------------------------------------------------------------- +// DexContainerMut +// ---------------------------------------------------------------------------- + pub trait DexContainerMut<'a>: DexContainer<'a> + DerefMut { fn data_mut(&'a mut self) -> &'a mut [u8] { self.deref_mut() } } -impl<'a> DexContainer<'a> for memmap2::Mmap {} +// ---------------------------------------------------------------------------- +// default implementations +// ---------------------------------------------------------------------------- +impl DexContainer<'_> for memmap2::Mmap {} +impl DexContainer<'_> for MmapMut {} +impl DexContainerMut<'_> for MmapMut {} + +impl<'a> DexContainer<'a> for &'a [u8] {} +impl<'a> DexContainer<'a> for &'a mut [u8] {} +impl<'a> DexContainerMut<'a> for &'a mut [u8] {} + +impl DexContainer<'_> for Vec {} +impl DexContainerMut<'_> for Vec {} +// ---------------------------------------------------------------------------- +// InMemoryDexContainer +// ---------------------------------------------------------------------------- pub struct InMemoryDexContainer<'a>(&'a [u8]); impl<'a> InMemoryDexContainer<'a> { @@ -46,8 +76,75 @@ impl<'a> AsRef<[u8]> for InMemoryDexContainer<'a> { impl<'a> DexContainer<'a> for InMemoryDexContainer<'a> {} -impl<'a> DexContainer<'a> for &'a [u8] {} +// >>> begin python export + +#[cfg(feature = "python")] +// custom implementation of DexFileContainer to support python values +#[pyo3::pyclass( + name = "InMemoryDexContainer", + module = "dexrs._internal.container", + frozen +)] +pub struct PyInMemoryDexContainer { + pub(crate) data: Py, + length: usize, +} + +#[cfg(feature = "python")] +impl AsRef<[u8]> for PyInMemoryDexContainer { + #[inline] + fn as_ref(&self) -> &[u8] { + self.deref() + } +} + +#[cfg(feature = "python")] +impl Deref for PyInMemoryDexContainer { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + Python::with_gil(|py| self.data.as_bytes(py)) + } +} + +#[cfg(feature = "python")] +impl DexContainer<'_> for PyInMemoryDexContainer {} + +#[cfg(feature = "python")] +impl PyInMemoryDexContainer { + pub fn open<'py>(py: Python, data: Py) -> Self { + Self { + data: data.clone_ref(py), + length: data.as_bytes(py).len(), + } + } +} +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyInMemoryDexContainer { + #[new] + pub fn new<'py>(py: Python<'py>, data: Py) -> PyResult { + Ok(PyInMemoryDexContainer::open(py, data)) + } + + pub fn data<'py>(py_self: PyRef<'_, Self>, py: Python<'py>) -> PyResult> { + Ok(py_self.data.clone_ref(py)) + } + + #[getter] + pub fn file_size(py_self: PyRef<'_, Self>) -> PyResult { + Ok(py_self.length) + } + + pub fn __len__(py_self: PyRef<'_, Self>) -> usize { + py_self.length + } +} + +// ---------------------------------------------------------------------------- +// DexFileContainer +// ---------------------------------------------------------------------------- pub struct DexFileContainer { mmap: memmap2::Mmap, location: String, @@ -96,11 +193,86 @@ impl DexFileContainer { } } -impl DexContainer<'_> for MmapMut {} -impl DexContainerMut<'_> for MmapMut {} +// >>> begin python export +#[cfg(feature = "python")] +#[pyo3::pyclass( + name = "FileDexContainer", + module = "dexrs._internal.container", + frozen +)] +pub struct PyFileDexContainer { + pub(crate) path: String, + _fp: Arc, + data: Arc, +} -impl<'a> DexContainer<'a> for &'a mut [u8] {} -impl<'a> DexContainerMut<'a> for &'a mut [u8] {} +#[cfg(feature = "python")] +impl AsRef<[u8]> for PyFileDexContainer { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.data.as_ref() + } +} -impl DexContainer<'_> for Vec {} -impl DexContainerMut<'_> for Vec {} +#[cfg(feature = "python")] +impl Deref for PyFileDexContainer { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.data.deref() + } +} + +#[cfg(feature = "python")] +impl DexContainer<'_> for PyFileDexContainer {} + +// Rust API +#[cfg(feature = "python")] +impl PyFileDexContainer { + pub fn open(path: String) -> std::result::Result { + let fp = std::fs::File::open(path.clone())?; + let mmap = unsafe { memmap2::Mmap::map(&fp)? }; + Ok(PyFileDexContainer { + path, + _fp: Arc::new(fp), + data: Arc::new(mmap), + }) + } +} + +// Python API +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyFileDexContainer { + #[new] + pub fn new(path: String) -> PyResult { + Ok(PyFileDexContainer::open(path)?) + } + + #[getter] + pub fn location(&self) -> PyResult { + Ok(self.path.clone()) + } + + pub fn data<'py>(&self, py: Python<'py>) -> PyResult> { + Ok(PyBytes::new(py, self.data.as_ref()).into()) + } + + #[getter] + pub fn file_size(&self) -> PyResult { + Ok(self.data.len()) + } + + pub fn __len__(&self) -> usize { + self.data.len() + } +} +// <<< end python export + +#[cfg(feature = "python")] +#[pyo3::pymodule(name = "container")] +pub(crate) mod py_container { + + #[pymodule_export] + use super::{PyFileDexContainer, PyInMemoryDexContainer}; +} diff --git a/src/file/header.rs b/src/file/header.rs index 2b5ee46..abcf44f 100644 --- a/src/file/header.rs +++ b/src/file/header.rs @@ -1,3 +1,7 @@ +#[cfg(feature = "python")] +use crate::py::{rs_struct_fields, rs_struct_wrapper}; +#[cfg(feature = "python")] +use std::sync::Arc; #[repr(C)] #[derive(Debug, Clone)] pub struct Header { @@ -113,3 +117,49 @@ pub struct HeaderV41 { pub container_size: u32, // total size of all dex files in the container. pub header_off: u32, // offset of this dex's header in the container. } + + +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("Header", PyDexHeader, Header); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexHeader, { + (checksum, u32), + (file_size, u32), + (header_size, u32), + (endian_tag, u32), + (link_size, u32), + (link_off, u32), + (map_off, u32), + (string_ids_size, u32), + (string_ids_off, u32), + (type_ids_size, u32), + (type_ids_off, u32), + (proto_ids_size, u32), + (proto_ids_off, u32), + (field_ids_size, u32), + (field_ids_off, u32), + (method_ids_size, u32), + (method_ids_off, u32), + (class_defs_size, u32), + (class_defs_off, u32), + (data_size, u32), + (data_off, u32), +}, + +#[getter] +pub fn version_int(&self) -> u32 { + self.0.get_version() +} + +#[getter] +pub fn magic(&self) -> &[u8; 8] { + self.0.get_magic() +} + +#[getter] +pub fn signature(&self) -> &[u8; 20] { + self.0.get_signature() +} +); +// <<< end python export diff --git a/src/file/instruction.rs b/src/file/instruction.rs index 4eeabc9..22149d6 100644 --- a/src/file/instruction.rs +++ b/src/file/instruction.rs @@ -1,5 +1,14 @@ +#[cfg(feature = "python")] +use std::sync::Arc; + +#[cfg(feature = "python")] +use crate::py::rs_type_wrapper; + use crate::{dex_err, error::DexError, Result}; +// ---------------------------------------------------------------------------- +// Instruction +// ---------------------------------------------------------------------------- pub struct Instruction<'a>(&'a [u16]); impl<'a> Instruction<'a> { @@ -51,7 +60,7 @@ impl<'a> Instruction<'a> { Ok(self.fetch16(offset)? as u32 | ((self.fetch16(offset + 1)? as u32) << 16)) } - const fn format_desc_of(opcode: Code) -> &'static InstructionDescriptor { + pub(crate) const fn format_desc_of(opcode: Code) -> &'static InstructionDescriptor { &Instruction::INSN_DESCRIPTORS[opcode as usize] } @@ -103,118 +112,249 @@ impl<'a> Instruction<'a> { } } -#[allow(non_camel_case_types)] -#[repr(u8)] -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum Format { - k10x, // op - k12x, // op vA, vB - k11n, // op vA, #+B - k11x, // op vAA - k10t, // op +AA - k20t, // op +AAAA - k22x, // op vAA, vBBBB - k21t, // op vAA, +BBBB - k21s, // op vAA, #+BBBB - k21h, // op vAA, #+BBBB00000[00000000] - k21c, // op vAA, thing@BBBB - k23x, // op vAA, vBB, vCC - k22b, // op vAA, vBB, #+CC - k22t, // op vA, vB, +CCCC - k22s, // op vA, vB, #+CCCC - k22c, // op vA, vB, thing@CCCC - k32x, // op vAAAA, vBBBB - k30t, // op +AAAAAAAA - k31t, // op vAA, +BBBBBBBB - k31i, // op vAA, #+BBBBBBBB - k31c, // op vAA, thing@BBBBBBBB - k35c, // op {vC, vD, vE, vF, vG}, thing@BBBB (B: count, A: vG) - k3rc, // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB +// >>> begin python export +#[cfg(feature = "python")] +rs_type_wrapper!( + Instruction<'static>, + PyInstruction, + RsInstruction, + name: "Instruction", + module: "dexrs._internal.code" +); + +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyInstruction { + pub fn fetch16(&self, offset: u32) -> pyo3::PyResult { + Ok(self.inner.0.fetch16(offset as usize)?) + } + + pub fn fetch32(&self, offset: u32) -> pyo3::PyResult { + Ok(self.inner.0.fetch32(offset as usize)?) + } + + #[staticmethod] + pub fn opcode_of(inst_data: u16) -> PyDexCode { + let opcode = Instruction::opcode_of(inst_data); + Instruction::format_desc_of(opcode).py_opcode + } + + #[staticmethod] + pub fn name_of(opcode: PyDexCode) -> &'static str { + Instruction::format_desc_of(opcode.into()).name + } +} +// <<< end python export + +// ---------------------------------------------------------------------------- +// Format IDs +// ---------------------------------------------------------------------------- +macro_rules! define_formats { + ($($fmtids:tt|)*) => { + #[allow(non_camel_case_types)] + #[repr(u8)] + #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] + pub enum Format { + $($fmtids,)* + } + #[cfg(feature = "python")] + #[repr(u8)] + #[allow(non_camel_case_types)] + #[derive(Debug, PartialEq, Eq)] + #[pyo3::pyclass(name = "Format", module = "dexrs._internal.code", eq)] + pub enum PyDexFormat { + $($fmtids,)* + } + + #[cfg(feature = "python")] + impl From for PyDexFormat { + fn from(f: Format) -> Self { + match f { + $(Format::$fmtids => PyDexFormat::$fmtids,)* + } + } + } + + }; +} + +define_formats!( + k10x| // op + k12x| // op vA, vB + k11n| // op vA, #+B + k11x| // op vAA + k10t| // op +AA + k20t| // op +AAAA + k22x| // op vAA, vBBBB + k21t| // op vAA, +BBBB + k21s| // op vAA, #+BBBB + k21h| // op vAA, #+BBBB00000[00000000] + k21c| // op vAA, thing@BBBB + k23x| // op vAA, vBB, vCC + k22b| // op vAA, vBB, #+CC + k22t| // op vA, vB, +CCCC + k22s| // op vA, vB, #+CCCC + k22c| // op vA, vB, thing@CCCC + k32x| // op vAAAA, vBBBB + k30t| // op +AAAAAAAA + k31t| // op vAA, +BBBBBBBB + k31i| // op vAA, #+BBBBBBBB + k31c| // op vAA, thing@BBBBBBBB + k35c| // op {vC, vD, vE, vF, vG}, thing@BBBB (B: count, A: vG) + k3rc| // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH (A: count) // format: AG op BBBB FEDC HHHH - k45cc, - + k45cc| // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count) // format: AA op BBBB CCCC HHHH - k4rcc, // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count) + k4rcc| // op {VCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH (AA: count) + k51l| // op vAA, #+BBBBBBBBBBBBBBBB + kInvalidFormat| +); - k51l, // op vAA, #+BBBBBBBBBBBBBBBB - kInvalidFormat, -} +// ---------------------------------------------------------------------------- +// Index Types +// ---------------------------------------------------------------------------- +macro_rules! define_index_types { + ($($index_ty:tt|)*) => { + pub enum IndexType { + $($index_ty,)* + } -pub enum IndexType { - Unknown = 0, - None, // has no index - TypeRef, // type reference index - StringRef, // string reference index - MethodRef, // method reference index - FieldRef, // field reference index - MethodAndProtoRef, // method and a proto reference index (for invoke-polymorphic) - CallSiteRef, // call site reference index - MethodHandleRef, // constant method handle reference index - ProtoRef, // prototype reference index -} + #[cfg(feature = "python")] + #[derive(Debug, PartialEq, Eq)] + #[pyo3::pyclass(name = "IndexType", module = "dexrs._internal.code", eq)] + pub enum PyDexIndexType { + $($index_ty,)* + } -#[rustfmt::skip] -#[allow(non_upper_case_globals)] -pub mod code_flags { - pub const Complex: u8 = 0xFF; - pub const Custom: u8 = 0xFE; + #[cfg(feature = "python")] + impl From for PyDexIndexType { + fn from(f: IndexType) -> Self { + match f { + $(IndexType::$index_ty => PyDexIndexType::$index_ty,)* + } + } + } + }; } -#[rustfmt::skip] -#[allow(non_upper_case_globals)] -pub mod signatures { - pub const PackedSwitchSignature: u16 = 0x0100; - pub const SparseSwitchSignature: u16 = 0x0200; - pub const ArrayDataSignature: u16 = 0x0300; -} -#[rustfmt::skip] -#[allow(non_upper_case_globals)] -pub mod flags { - pub const Branch: u8 = 0x01; // conditional or unconditional branch - pub const Continue: u8 = 0x02; // flow can continue to next statement - pub const Switch: u8 = 0x04; // switch statement - pub const Throw: u8 = 0x08; // could cause an exception to be thrown - pub const Return: u8 = 0x10; // returns, no additional statements - pub const Invoke: u8 = 0x20; // a flavor of invoke - pub const Unconditional: u8 = 0x40; // unconditional branch - pub const Experimental: u8 = 0x80; // is an experimental opcode +define_index_types!( + Unknown| // unknown + NoIndex| // has no index + TypeRef| // type reference index + StringRef| // string reference index + MethodRef| // method reference index + FieldRef| // field reference index + MethodAndProtoRef| // method and a proto reference index (for invoke-polymorphic) + CallSiteRef| // call site reference index + MethodHandleRef| // constant method handle reference index + ProtoRef| // prototype reference index +); + +// ---------------------------------------------------------------------------- +// Flags +// ---------------------------------------------------------------------------- +macro_rules! define_flags { + ($mod_name:ident, $py_mod_name:ident, $mod_name_str:literal, {$($name:ident:$target_type:ty=$value:tt;)*}) => { + #[rustfmt::skip] + #[allow(non_upper_case_globals)] + pub mod $mod_name { + $( + pub const $name: $target_type = $value; + )* + } + + #[cfg(feature = "python")] + #[rustfmt::skip] + #[allow(non_upper_case_globals)] + #[pyo3::pymodule(name = $mod_name_str)] + pub mod $py_mod_name { + use pyo3::types::PyModuleMethods; + use super::$mod_name; + + #[pymodule_init] + fn init(m: &pyo3::Bound<'_, pyo3::types::PyModule>) -> pyo3::PyResult<()> { + $(m.add(stringify!($name), $mod_name::$name)?;)* + Ok(()) + } + } + }; } +define_flags!( + code_flags, + py_code_flags, + "code_flags", + { + Complex: u8 = 0xFF; + Custom: u8 = 0xFE; + } +); + +define_flags!( + signatures, + py_signatures, + "signatures", + { + PackedSwitchSignature: u16 = 0x0100; + SparseSwitchSignature: u16 = 0x0200; + ArrayDataSignature: u16 = 0x0300; + } +); + +define_flags!( + flags, + py_flags, + "flags", + { + Branch: u8 = 0x01; // conditional or unconditional branch + Continue: u8 = 0x02; // flow can continue to next statement + Switch: u8 = 0x04; // switch statement + Throw: u8 = 0x08; // could cause an exception to be thrown + Return: u8 = 0x10; // returns, no additional statements + Invoke: u8 = 0x20; // a flavor of invoke + Unconditional: u8 = 0x40; // unconditional branch + Experimental: u8 = 0x80; // is an experimental opcode + } +); + // These flags may be used later to verify instructions -#[rustfmt::skip] -#[allow(non_upper_case_globals)] -pub mod verify_flags { - pub const VerifyNothing: u32 = 0x0000000; - pub const VerifyRegA: u32 = 0x0000001; - pub const VerifyRegAWide: u32 = 0x0000002; - pub const VerifyRegB: u32 = 0x0000004; - pub const VerifyRegBField: u32 = 0x0000008; - pub const VerifyRegBMethod: u32 = 0x0000010; - pub const VerifyRegBNewInstance: u32 = 0x0000020; - pub const VerifyRegBString: u32 = 0x0000040; - pub const VerifyRegBType: u32 = 0x0000080; - pub const VerifyRegBWide: u32 = 0x0000100; - pub const VerifyRegC: u32 = 0x0000200; - pub const VerifyRegCField: u32 = 0x0000400; - pub const VerifyRegCNewArray: u32 = 0x0000800; - pub const VerifyRegCType: u32 = 0x0001000; - pub const VerifyRegCWide: u32 = 0x0002000; - pub const VerifyArrayData: u32 = 0x0004000; - pub const VerifyBranchTarget: u32 = 0x0008000; - pub const VerifySwitchTargets: u32 = 0x0010000; - pub const VerifyVarArg: u32 = 0x0020000; - pub const VerifyVarArgNonZero: u32 = 0x0040000; - pub const VerifyVarArgRange: u32 = 0x0080000; - pub const VerifyVarArgRangeNonZero: u32 = 0x0100000; - pub const VerifyError: u32 = 0x0200000; - pub const VerifyRegHPrototype: u32 = 0x0400000; - pub const VerifyRegBCallSite: u32 = 0x0800000; - pub const VerifyRegBMethodHandle: u32 = 0x1000000; - pub const VerifyRegBPrototype: u32 = 0x2000000; -} +define_flags!( + verify_flags, + py_verify_flags, + "verify_flags", + { + VerifyNothing: u32 = 0x0000000; + VerifyRegA: u32 = 0x0000001; + VerifyRegAWide: u32 = 0x0000002; + VerifyRegB: u32 = 0x0000004; + VerifyRegBField: u32 = 0x0000008; + VerifyRegBMethod: u32 = 0x0000010; + VerifyRegBNewInstance: u32 = 0x0000020; + VerifyRegBString: u32 = 0x0000040; + VerifyRegBType: u32 = 0x0000080; + VerifyRegBWide: u32 = 0x0000100; + VerifyRegC: u32 = 0x0000200; + VerifyRegCField: u32 = 0x0000400; + VerifyRegCNewArray: u32 = 0x0000800; + VerifyRegCType: u32 = 0x0001000; + VerifyRegCWide: u32 = 0x0002000; + VerifyArrayData: u32 = 0x0004000; + VerifyBranchTarget: u32 = 0x0008000; + VerifySwitchTargets: u32 = 0x0010000; + VerifyVarArg: u32 = 0x0020000; + VerifyVarArgNonZero: u32 = 0x0040000; + VerifyVarArgRange: u32 = 0x0080000; + VerifyVarArgRangeNonZero: u32 = 0x0100000; + VerifyError: u32 = 0x0200000; + VerifyRegHPrototype: u32 = 0x0400000; + VerifyRegBCallSite: u32 = 0x0800000; + VerifyRegBMethodHandle: u32 = 0x1000000; + VerifyRegBPrototype: u32 = 0x2000000; + } +); impl<'a> Instruction<'a> { #[inline(always)] @@ -603,6 +743,8 @@ pub struct InstructionDescriptor { pub size_in_code_units: u8, pub opcode: Code, pub verify_flags: u32, + #[cfg(feature = "python")] + pub py_opcode: PyDexCode, } macro_rules! insn_desc_table { @@ -616,7 +758,9 @@ macro_rules! insn_desc_table { flags: $flags, size_in_code_units: Instruction::code_size_in_code_units_by_opcode(Code::$code, Format::$format), opcode: Code::$code, - verify_flags: $verify_flags + verify_flags: $verify_flags, + #[cfg(feature = "python")] + py_opcode: PyDexCode::$code },)* ]; } @@ -627,91 +771,113 @@ macro_rules! insn_desc_table { pub enum Code { $($code,)* } + + // python type generation without the need of second definition + #[cfg(feature = "python")] + #[repr(u8)] + #[allow(non_camel_case_types)] + #[pyo3::pyclass(name = "Code", module = "dexrs._internal.code", eq)] + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] + pub enum PyDexCode { + $($code = Code::$code as u8,)* + } + + #[cfg(feature = "python")] + impl Into for PyDexCode { + #[inline] + fn into(self) -> Code { + Instruction::opcode_of(self as u8 as u16) + } + } }; } + +// ---------------------------------------------------------------------------- +// Instruction Descriptors +// ---------------------------------------------------------------------------- insn_desc_table!( - /* 0x00 */ {NOP, "nop", k10x, None, flags::Continue, verify_flags::VerifyNothing}, - /* 0x01 */ {MOVE, "move", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x02 */ {MOVE_FROM16, "move/from16", k22x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x03 */ {MOVE_16, "move/16", k32x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x04 */ {MOVE_WIDE, "move-wide", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0x05 */ {MOVE_WIDE_FROM16, "move-wide/from16", k22x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0x06 */ {MOVE_WIDE_16, "move-wide/16", k32x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0x07 */ {MOVE_OBJECT, "move-object", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x08 */ {MOVE_OBJECT_FROM16, "move-object/from16", k22x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x09 */ {MOVE_OBJECT_16, "move-object/16", k32x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x0a */ {MOVE_RESULT, "move-result", k11x, None, flags::Continue, verify_flags::VerifyRegA}, - /* 0x0b */ {MOVE_RESULT_WIDE, "move-result-wide", k11x, None, flags::Continue, verify_flags::VerifyRegAWide}, - /* 0x0c */ {MOVE_RESULT_OBJECT, "move-result-object", k11x, None, flags::Continue, verify_flags::VerifyRegA}, - /* 0x0d */ {MOVE_EXCEPTION, "move-exception", k11x, None, flags::Continue, verify_flags::VerifyRegA}, - /* 0x0e */ {RETURN_VOID, "return-void", k10x, None, flags::Return, verify_flags::VerifyNothing}, - /* 0x0f */ {RETURN, "return", k11x, None, flags::Return, verify_flags::VerifyRegA}, - /* 0x10 */ {RETURN_WIDE, "return-wide", k11x, None, flags::Return, verify_flags::VerifyRegAWide}, - /* 0x11 */ {RETURN_OBJECT, "return-object", k11x, None, flags::Return, verify_flags::VerifyRegA}, - /* 0x12 */ {CONST_4, "const/4", k11n, None, flags::Continue, verify_flags::VerifyRegA}, - /* 0x13 */ {CONST_16, "const/16", k21s, None, flags::Continue, verify_flags::VerifyRegA}, - /* 0x14 */ {CONST, "const", k31i, None, flags::Continue, verify_flags::VerifyRegA}, - /* 0x15 */ {CONST_HIGH16, "const/high16", k21h, None, flags::Continue, verify_flags::VerifyRegA}, - /* 0x16 */ {CONST_WIDE_16, "const-wide/16", k21s, None, flags::Continue, verify_flags::VerifyRegAWide}, - /* 0x17 */ {CONST_WIDE_32, "const-wide/32", k31i, None, flags::Continue, verify_flags::VerifyRegAWide}, - /* 0x18 */ {CONST_WIDE, "const-wide", k51l, None, flags::Continue, verify_flags::VerifyRegAWide}, - /* 0x19 */ {CONST_WIDE_HIGH16, "const-wide/high16", k21h, None, flags::Continue, verify_flags::VerifyRegAWide}, + /* 0x00 */ {NOP, "nop", k10x, NoIndex, flags::Continue, verify_flags::VerifyNothing}, + /* 0x01 */ {MOVE, "move", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x02 */ {MOVE_FROM16, "move/from16", k22x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x03 */ {MOVE_16, "move/16", k32x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x04 */ {MOVE_WIDE, "move-wide", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x05 */ {MOVE_WIDE_FROM16, "move-wide/from16", k22x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x06 */ {MOVE_WIDE_16, "move-wide/16", k32x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x07 */ {MOVE_OBJECT, "move-object", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x08 */ {MOVE_OBJECT_FROM16, "move-object/from16", k22x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x09 */ {MOVE_OBJECT_16, "move-object/16", k32x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x0a */ {MOVE_RESULT, "move-result", k11x, NoIndex, flags::Continue, verify_flags::VerifyRegA}, + /* 0x0b */ {MOVE_RESULT_WIDE, "move-result-wide", k11x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide}, + /* 0x0c */ {MOVE_RESULT_OBJECT, "move-result-object", k11x, NoIndex, flags::Continue, verify_flags::VerifyRegA}, + /* 0x0d */ {MOVE_EXCEPTION, "move-exception", k11x, NoIndex, flags::Continue, verify_flags::VerifyRegA}, + /* 0x0e */ {RETURN_VOID, "return-void", k10x, NoIndex, flags::Return, verify_flags::VerifyNothing}, + /* 0x0f */ {RETURN, "return", k11x, NoIndex, flags::Return, verify_flags::VerifyRegA}, + /* 0x10 */ {RETURN_WIDE, "return-wide", k11x, NoIndex, flags::Return, verify_flags::VerifyRegAWide}, + /* 0x11 */ {RETURN_OBJECT, "return-object", k11x, NoIndex, flags::Return, verify_flags::VerifyRegA}, + /* 0x12 */ {CONST_4, "const/4", k11n, NoIndex, flags::Continue, verify_flags::VerifyRegA}, + /* 0x13 */ {CONST_16, "const/16", k21s, NoIndex, flags::Continue, verify_flags::VerifyRegA}, + /* 0x14 */ {CONST, "const", k31i, NoIndex, flags::Continue, verify_flags::VerifyRegA}, + /* 0x15 */ {CONST_HIGH16, "const/high16", k21h, NoIndex, flags::Continue, verify_flags::VerifyRegA}, + /* 0x16 */ {CONST_WIDE_16, "const-wide/16", k21s, NoIndex, flags::Continue, verify_flags::VerifyRegAWide}, + /* 0x17 */ {CONST_WIDE_32, "const-wide/32", k31i, NoIndex, flags::Continue, verify_flags::VerifyRegAWide}, + /* 0x18 */ {CONST_WIDE, "const-wide", k51l, NoIndex, flags::Continue, verify_flags::VerifyRegAWide}, + /* 0x19 */ {CONST_WIDE_HIGH16, "const-wide/high16", k21h, NoIndex, flags::Continue, verify_flags::VerifyRegAWide}, /* 0x1a */ {CONST_STRING, "const-string", k21c, StringRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBString}, /* 0x1b */ {CONST_STRING_JUMBO, "const-string/jumbo", k31c, StringRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBString}, /* 0x1c */ {CONST_CLASS, "const-class", k21c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBType}, - /* 0x1d */ {MONITOR_ENTER, "monitor-enter", k11x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA}, - /* 0x1e */ {MONITOR_EXIT, "monitor-exit", k11x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA}, + /* 0x1d */ {MONITOR_ENTER, "monitor-enter", k11x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA}, + /* 0x1e */ {MONITOR_EXIT, "monitor-exit", k11x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA}, /* 0x1f */ {CHECK_CAST, "check-cast", k21c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBType}, /* 0x20 */ {INSTANCE_OF, "instance-of", k22c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCType}, - /* 0x21 */ {ARRAY_LENGTH, "array-length", k12x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x21 */ {ARRAY_LENGTH, "array-length", k12x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, /* 0x22 */ {NEW_INSTANCE, "new-instance", k21c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBNewInstance}, /* 0x23 */ {NEW_ARRAY, "new-array", k22c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCNewArray}, /* 0x24 */ {FILLED_NEW_ARRAY, "filled-new-array", k35c, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegBType | verify_flags::VerifyVarArg}, /* 0x25 */ {FILLED_NEW_ARRAY_RANGE, "filled-new-array/range", k3rc, TypeRef, flags::Continue | flags::Throw, verify_flags::VerifyRegBType | verify_flags::VerifyVarArgRange}, - /* 0x26 */ {FILL_ARRAY_DATA, "fill-array-data", k31t, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyArrayData}, - /* 0x27 */ {THROW, "throw", k11x, None, flags::Throw, verify_flags::VerifyRegA}, - /* 0x28 */ {GOTO, "goto", k10t, None, flags::Branch | flags::Unconditional, verify_flags::VerifyBranchTarget}, - /* 0x29 */ {GOTO_16, "goto/16", k20t, None, flags::Branch | flags::Unconditional, verify_flags::VerifyBranchTarget}, - /* 0x2a */ {GOTO_32, "goto/32", k30t, None, flags::Branch | flags::Unconditional, verify_flags::VerifyBranchTarget}, - /* 0x2b */ {PACKED_SWITCH, "packed-switch", k31t, None, flags::Continue | flags::Switch, verify_flags::VerifyRegA | verify_flags::VerifySwitchTargets}, - /* 0x2c */ {SPARSE_SWITCH, "sparse-switch", k31t, None, flags::Continue | flags::Switch, verify_flags::VerifyRegA | verify_flags::VerifySwitchTargets}, - /* 0x2d */ {CMPL_FLOAT, "cmpl-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x2e */ {CMPG_FLOAT, "cmpg-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x2f */ {CMPL_DOUBLE, "cmpl-double", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0x30 */ {CMPG_DOUBLE, "cmpg-double", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0x31 */ {CMP_LONG, "cmp-long", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0x32 */ {IF_EQ, "if-eq", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, - /* 0x33 */ {IF_NE, "if-ne", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, - /* 0x34 */ {IF_LT, "if-lt", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, - /* 0x35 */ {IF_GE, "if-ge", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, - /* 0x36 */ {IF_GT, "if-gt", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, - /* 0x37 */ {IF_LE, "if-le", k22t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, - /* 0x38 */ {IF_EQZ, "if-eqz", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, - /* 0x39 */ {IF_NEZ, "if-nez", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, - /* 0x3a */ {IF_LTZ, "if-ltz", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, - /* 0x3b */ {IF_GEZ, "if-gez", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, - /* 0x3c */ {IF_GTZ, "if-gtz", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, - /* 0x3d */ {IF_LEZ, "if-lez", k21t, None, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x26 */ {FILL_ARRAY_DATA, "fill-array-data", k31t, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyArrayData}, + /* 0x27 */ {THROW, "throw", k11x, NoIndex, flags::Throw, verify_flags::VerifyRegA}, + /* 0x28 */ {GOTO, "goto", k10t, NoIndex, flags::Branch | flags::Unconditional, verify_flags::VerifyBranchTarget}, + /* 0x29 */ {GOTO_16, "goto/16", k20t, NoIndex, flags::Branch | flags::Unconditional, verify_flags::VerifyBranchTarget}, + /* 0x2a */ {GOTO_32, "goto/32", k30t, NoIndex, flags::Branch | flags::Unconditional, verify_flags::VerifyBranchTarget}, + /* 0x2b */ {PACKED_SWITCH, "packed-switch", k31t, NoIndex, flags::Continue | flags::Switch, verify_flags::VerifyRegA | verify_flags::VerifySwitchTargets}, + /* 0x2c */ {SPARSE_SWITCH, "sparse-switch", k31t, NoIndex, flags::Continue | flags::Switch, verify_flags::VerifyRegA | verify_flags::VerifySwitchTargets}, + /* 0x2d */ {CMPL_FLOAT, "cmpl-float", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x2e */ {CMPG_FLOAT, "cmpg-float", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x2f */ {CMPL_DOUBLE, "cmpl-double", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x30 */ {CMPG_DOUBLE, "cmpg-double", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x31 */ {CMP_LONG, "cmp-long", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x32 */ {IF_EQ, "if-eq", k22t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x33 */ {IF_NE, "if-ne", k22t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x34 */ {IF_LT, "if-lt", k22t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x35 */ {IF_GE, "if-ge", k22t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x36 */ {IF_GT, "if-gt", k22t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x37 */ {IF_LE, "if-le", k22t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyBranchTarget}, + /* 0x38 */ {IF_EQZ, "if-eqz", k21t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x39 */ {IF_NEZ, "if-nez", k21t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x3a */ {IF_LTZ, "if-ltz", k21t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x3b */ {IF_GEZ, "if-gez", k21t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x3c */ {IF_GTZ, "if-gtz", k21t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, + /* 0x3d */ {IF_LEZ, "if-lez", k21t, NoIndex, flags::Continue | flags::Branch, verify_flags::VerifyRegA | verify_flags::VerifyBranchTarget}, /* 0x3e */ {UNUSED_3E, "unused-3e", k10x, Unknown, 0, verify_flags::VerifyError}, /* 0x3f */ {UNUSED_3F, "unused-3f", k10x, Unknown, 0, verify_flags::VerifyError}, /* 0x40 */ {UNUSED_40, "unused-40", k10x, Unknown, 0, verify_flags::VerifyError}, /* 0x41 */ {UNUSED_41, "unused-41", k10x, Unknown, 0, verify_flags::VerifyError}, /* 0x42 */ {UNUSED_42, "unused-42", k10x, Unknown, 0, verify_flags::VerifyError}, /* 0x43 */ {UNUSED_43, "unused-43", k10x, Unknown, 0, verify_flags::VerifyError}, - /* 0x44 */ {AGET, "aget", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x45 */ {AGET_WIDE, "aget-wide", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x46 */ {AGET_OBJECT, "aget-object", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x47 */ {AGET_BOOLEAN, "aget-boolean", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x48 */ {AGET_BYTE, "aget-byte", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x49 */ {AGET_CHAR, "aget-char", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x4a */ {AGET_SHORT, "aget-short", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x4b */ {APUT, "aput", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x4c */ {APUT_WIDE, "aput-wide", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x4d */ {APUT_OBJECT, "aput-object", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x4e */ {APUT_BOOLEAN, "aput-boolean", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x4f */ {APUT_BYTE, "aput-byte", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x50 */ {APUT_CHAR, "aput-char", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x51 */ {APUT_SHORT, "aput-short", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x44 */ {AGET, "aget", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x45 */ {AGET_WIDE, "aget-wide", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x46 */ {AGET_OBJECT, "aget-object", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x47 */ {AGET_BOOLEAN, "aget-boolean", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x48 */ {AGET_BYTE, "aget-byte", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x49 */ {AGET_CHAR, "aget-char", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4a */ {AGET_SHORT, "aget-short", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4b */ {APUT, "aput", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4c */ {APUT_WIDE, "aput-wide", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4d */ {APUT_OBJECT, "aput-object", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4e */ {APUT_BOOLEAN, "aput-boolean", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x4f */ {APUT_BYTE, "aput-byte", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x50 */ {APUT_CHAR, "aput-char", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x51 */ {APUT_SHORT, "aput-short", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, /* 0x52 */ {IGET, "iget", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, /* 0x53 */ {IGET_WIDE, "iget-wide", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, /* 0x54 */ {IGET_OBJECT, "iget-object", k22c, FieldRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegCField}, @@ -753,110 +919,110 @@ insn_desc_table!( /* 0x78 */ {INVOKE_INTERFACE_RANGE, "invoke-interface/range", k3rc, MethodRef, flags::Continue | flags::Throw | flags::Invoke, verify_flags::VerifyRegBMethod | verify_flags::VerifyVarArgRangeNonZero}, /* 0x79 */ {UNUSED_79, "unused-79", k10x, Unknown, 0, verify_flags::VerifyError}, /* 0x7a */ {UNUSED_7A, "unused-7a", k10x, Unknown, 0, verify_flags::VerifyError}, - /* 0x7b */ {NEG_INT, "neg-int", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x7c */ {NOT_INT, "not-int", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x7d */ {NEG_LONG, "neg-long", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0x7e */ {NOT_LONG, "not-long", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0x7f */ {NEG_FLOAT, "neg-float", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x80 */ {NEG_DOUBLE, "neg-double", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0x81 */ {INT_TO_LONG, "int-to-long", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, - /* 0x82 */ {INT_TO_FLOAT, "int-to-float", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x83 */ {INT_TO_DOUBLE, "int-to-double", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, - /* 0x84 */ {LONG_TO_INT, "long-to-int", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, - /* 0x85 */ {LONG_TO_FLOAT, "long-to-float", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, - /* 0x86 */ {LONG_TO_DOUBLE, "long-to-double", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0x87 */ {FLOAT_TO_INT, "float-to-int", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x88 */ {FLOAT_TO_LONG, "float-to-long", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, - /* 0x89 */ {FLOAT_TO_DOUBLE, "float-to-double", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, - /* 0x8a */ {DOUBLE_TO_INT, "double-to-int", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, - /* 0x8b */ {DOUBLE_TO_LONG, "double-to-long", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0x8c */ {DOUBLE_TO_FLOAT, "double-to-float", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, - /* 0x8d */ {INT_TO_BYTE, "int-to-byte", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x8e */ {INT_TO_CHAR, "int-to-char", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x8f */ {INT_TO_SHORT, "int-to-short", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0x90 */ {ADD_INT, "add-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x91 */ {SUB_INT, "sub-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x92 */ {MUL_INT, "mul-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x93 */ {DIV_INT, "div-int", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x94 */ {REM_INT, "rem-int", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x95 */ {AND_INT, "and-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x96 */ {OR_INT, "or-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x97 */ {XOR_INT, "xor-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x98 */ {SHL_INT, "shl-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x99 */ {SHR_INT, "shr-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x9a */ {USHR_INT, "ushr-int", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0x9b */ {ADD_LONG, "add-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0x9c */ {SUB_LONG, "sub-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0x9d */ {MUL_LONG, "mul-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0x9e */ {DIV_LONG, "div-long", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0x9f */ {REM_LONG, "rem-long", k23x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0xa0 */ {AND_LONG, "and-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0xa1 */ {OR_LONG, "or-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0xa2 */ {XOR_LONG, "xor-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0xa3 */ {SHL_LONG, "shl-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegC}, - /* 0xa4 */ {SHR_LONG, "shr-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegC}, - /* 0xa5 */ {USHR_LONG, "ushr-long", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegC}, - /* 0xa6 */ {ADD_FLOAT, "add-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0xa7 */ {SUB_FLOAT, "sub-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0xa8 */ {MUL_FLOAT, "mul-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0xa9 */ {DIV_FLOAT, "div-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0xaa */ {REM_FLOAT, "rem-float", k23x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, - /* 0xab */ {ADD_DOUBLE, "add-double", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0xac */ {SUB_DOUBLE, "sub-double", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0xad */ {MUL_DOUBLE, "mul-double", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0xae */ {DIV_DOUBLE, "div-double", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0xaf */ {REM_DOUBLE, "rem-double", k23x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, - /* 0xb0 */ {ADD_INT_2ADDR, "add-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xb1 */ {SUB_INT_2ADDR, "sub-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xb2 */ {MUL_INT_2ADDR, "mul-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xb3 */ {DIV_INT_2ADDR, "div-int/2addr", k12x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xb4 */ {REM_INT_2ADDR, "rem-int/2addr", k12x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xb5 */ {AND_INT_2ADDR, "and-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xb6 */ {OR_INT_2ADDR, "or-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xb7 */ {XOR_INT_2ADDR, "xor-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xb8 */ {SHL_INT_2ADDR, "shl-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xb9 */ {SHR_INT_2ADDR, "shr-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xba */ {USHR_INT_2ADDR, "ushr-int/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xbb */ {ADD_LONG_2ADDR, "add-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xbc */ {SUB_LONG_2ADDR, "sub-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xbd */ {MUL_LONG_2ADDR, "mul-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xbe */ {DIV_LONG_2ADDR, "div-long/2addr", k12x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xbf */ {REM_LONG_2ADDR, "rem-long/2addr", k12x, None, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xc0 */ {AND_LONG_2ADDR, "and-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xc1 */ {OR_LONG_2ADDR, "or-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xc2 */ {XOR_LONG_2ADDR, "xor-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xc3 */ {SHL_LONG_2ADDR, "shl-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, - /* 0xc4 */ {SHR_LONG_2ADDR, "shr-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, - /* 0xc5 */ {USHR_LONG_2ADDR, "ushr-long/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, - /* 0xc6 */ {ADD_FLOAT_2ADDR, "add-float/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xc7 */ {SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xc8 */ {MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xc9 */ {DIV_FLOAT_2ADDR, "div-float/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xca */ {REM_FLOAT_2ADDR, "rem-float/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xcb */ {ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xcc */ {SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xcd */ {MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xce */ {DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xcf */ {REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, None, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, - /* 0xd0 */ {ADD_INT_LIT16, "add-int/lit16", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xd1 */ {RSUB_INT, "rsub-int", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xd2 */ {MUL_INT_LIT16, "mul-int/lit16", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xd3 */ {DIV_INT_LIT16, "div-int/lit16", k22s, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xd4 */ {REM_INT_LIT16, "rem-int/lit16", k22s, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xd5 */ {AND_INT_LIT16, "and-int/lit16", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xd6 */ {OR_INT_LIT16, "or-int/lit16", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xd7 */ {XOR_INT_LIT16, "xor-int/lit16", k22s, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xd8 */ {ADD_INT_LIT8, "add-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xd9 */ {RSUB_INT_LIT8, "rsub-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xda */ {MUL_INT_LIT8, "mul-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xdb */ {DIV_INT_LIT8, "div-int/lit8", k22b, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xdc */ {REM_INT_LIT8, "rem-int/lit8", k22b, None, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xdd */ {AND_INT_LIT8, "and-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xde */ {OR_INT_LIT8, "or-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xdf */ {XOR_INT_LIT8, "xor-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xe0 */ {SHL_INT_LIT8, "shl-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xe1 */ {SHR_INT_LIT8, "shr-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, - /* 0xe2 */ {USHR_INT_LIT8, "ushr-int/lit8", k22b, None, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x7b */ {NEG_INT, "neg-int", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x7c */ {NOT_INT, "not-int", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x7d */ {NEG_LONG, "neg-long", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x7e */ {NOT_LONG, "not-long", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x7f */ {NEG_FLOAT, "neg-float", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x80 */ {NEG_DOUBLE, "neg-double", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x81 */ {INT_TO_LONG, "int-to-long", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0x82 */ {INT_TO_FLOAT, "int-to-float", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x83 */ {INT_TO_DOUBLE, "int-to-double", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0x84 */ {LONG_TO_INT, "long-to-int", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, + /* 0x85 */ {LONG_TO_FLOAT, "long-to-float", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, + /* 0x86 */ {LONG_TO_DOUBLE, "long-to-double", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x87 */ {FLOAT_TO_INT, "float-to-int", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x88 */ {FLOAT_TO_LONG, "float-to-long", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0x89 */ {FLOAT_TO_DOUBLE, "float-to-double", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0x8a */ {DOUBLE_TO_INT, "double-to-int", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, + /* 0x8b */ {DOUBLE_TO_LONG, "double-to-long", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0x8c */ {DOUBLE_TO_FLOAT, "double-to-float", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegBWide}, + /* 0x8d */ {INT_TO_BYTE, "int-to-byte", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x8e */ {INT_TO_CHAR, "int-to-char", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x8f */ {INT_TO_SHORT, "int-to-short", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0x90 */ {ADD_INT, "add-int", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x91 */ {SUB_INT, "sub-int", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x92 */ {MUL_INT, "mul-int", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x93 */ {DIV_INT, "div-int", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x94 */ {REM_INT, "rem-int", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x95 */ {AND_INT, "and-int", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x96 */ {OR_INT, "or-int", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x97 */ {XOR_INT, "xor-int", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x98 */ {SHL_INT, "shl-int", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x99 */ {SHR_INT, "shr-int", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x9a */ {USHR_INT, "ushr-int", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0x9b */ {ADD_LONG, "add-long", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x9c */ {SUB_LONG, "sub-long", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x9d */ {MUL_LONG, "mul-long", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x9e */ {DIV_LONG, "div-long", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0x9f */ {REM_LONG, "rem-long", k23x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xa0 */ {AND_LONG, "and-long", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xa1 */ {OR_LONG, "or-long", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xa2 */ {XOR_LONG, "xor-long", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xa3 */ {SHL_LONG, "shl-long", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegC}, + /* 0xa4 */ {SHR_LONG, "shr-long", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegC}, + /* 0xa5 */ {USHR_LONG, "ushr-long", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegC}, + /* 0xa6 */ {ADD_FLOAT, "add-float", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0xa7 */ {SUB_FLOAT, "sub-float", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0xa8 */ {MUL_FLOAT, "mul-float", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0xa9 */ {DIV_FLOAT, "div-float", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0xaa */ {REM_FLOAT, "rem-float", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB | verify_flags::VerifyRegC}, + /* 0xab */ {ADD_DOUBLE, "add-double", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xac */ {SUB_DOUBLE, "sub-double", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xad */ {MUL_DOUBLE, "mul-double", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xae */ {DIV_DOUBLE, "div-double", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xaf */ {REM_DOUBLE, "rem-double", k23x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide | verify_flags::VerifyRegCWide}, + /* 0xb0 */ {ADD_INT_2ADDR, "add-int/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb1 */ {SUB_INT_2ADDR, "sub-int/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb2 */ {MUL_INT_2ADDR, "mul-int/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb3 */ {DIV_INT_2ADDR, "div-int/2addr", k12x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb4 */ {REM_INT_2ADDR, "rem-int/2addr", k12x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb5 */ {AND_INT_2ADDR, "and-int/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb6 */ {OR_INT_2ADDR, "or-int/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb7 */ {XOR_INT_2ADDR, "xor-int/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb8 */ {SHL_INT_2ADDR, "shl-int/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xb9 */ {SHR_INT_2ADDR, "shr-int/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xba */ {USHR_INT_2ADDR, "ushr-int/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xbb */ {ADD_LONG_2ADDR, "add-long/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xbc */ {SUB_LONG_2ADDR, "sub-long/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xbd */ {MUL_LONG_2ADDR, "mul-long/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xbe */ {DIV_LONG_2ADDR, "div-long/2addr", k12x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xbf */ {REM_LONG_2ADDR, "rem-long/2addr", k12x, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xc0 */ {AND_LONG_2ADDR, "and-long/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xc1 */ {OR_LONG_2ADDR, "or-long/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xc2 */ {XOR_LONG_2ADDR, "xor-long/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xc3 */ {SHL_LONG_2ADDR, "shl-long/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0xc4 */ {SHR_LONG_2ADDR, "shr-long/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0xc5 */ {USHR_LONG_2ADDR, "ushr-long/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegB}, + /* 0xc6 */ {ADD_FLOAT_2ADDR, "add-float/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xc7 */ {SUB_FLOAT_2ADDR, "sub-float/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xc8 */ {MUL_FLOAT_2ADDR, "mul-float/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xc9 */ {DIV_FLOAT_2ADDR, "div-float/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xca */ {REM_FLOAT_2ADDR, "rem-float/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xcb */ {ADD_DOUBLE_2ADDR, "add-double/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xcc */ {SUB_DOUBLE_2ADDR, "sub-double/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xcd */ {MUL_DOUBLE_2ADDR, "mul-double/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xce */ {DIV_DOUBLE_2ADDR, "div-double/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xcf */ {REM_DOUBLE_2ADDR, "rem-double/2addr", k12x, NoIndex, flags::Continue, verify_flags::VerifyRegAWide | verify_flags::VerifyRegBWide}, + /* 0xd0 */ {ADD_INT_LIT16, "add-int/lit16", k22s, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd1 */ {RSUB_INT, "rsub-int", k22s, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd2 */ {MUL_INT_LIT16, "mul-int/lit16", k22s, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd3 */ {DIV_INT_LIT16, "div-int/lit16", k22s, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd4 */ {REM_INT_LIT16, "rem-int/lit16", k22s, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd5 */ {AND_INT_LIT16, "and-int/lit16", k22s, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd6 */ {OR_INT_LIT16, "or-int/lit16", k22s, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd7 */ {XOR_INT_LIT16, "xor-int/lit16", k22s, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd8 */ {ADD_INT_LIT8, "add-int/lit8", k22b, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xd9 */ {RSUB_INT_LIT8, "rsub-int/lit8", k22b, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xda */ {MUL_INT_LIT8, "mul-int/lit8", k22b, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xdb */ {DIV_INT_LIT8, "div-int/lit8", k22b, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xdc */ {REM_INT_LIT8, "rem-int/lit8", k22b, NoIndex, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xdd */ {AND_INT_LIT8, "and-int/lit8", k22b, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xde */ {OR_INT_LIT8, "or-int/lit8", k22b, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xdf */ {XOR_INT_LIT8, "xor-int/lit8", k22b, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xe0 */ {SHL_INT_LIT8, "shl-int/lit8", k22b, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xe1 */ {SHR_INT_LIT8, "shr-int/lit8", k22b, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, + /* 0xe2 */ {USHR_INT_LIT8, "ushr-int/lit8", k22b, NoIndex, flags::Continue, verify_flags::VerifyRegA | verify_flags::VerifyRegB}, /* 0xe3 */ {UNUSED_E3, "unused-e3", k10x, Unknown, 0, verify_flags::VerifyError}, /* 0xe4 */ {UNUSED_E4, "unused-e4", k10x, Unknown, 0, verify_flags::VerifyError}, /* 0xe5 */ {UNUSED_E5, "unused-e5", k10x, Unknown, 0, verify_flags::VerifyError}, @@ -887,3 +1053,19 @@ insn_desc_table!( /* 0xfe */ {CONST_METHOD_HANDLE, "const-method-handle", k21c, MethodHandleRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBMethodHandle}, /* 0xff */ {CONST_METHOD_TYPE, "const-method-type", k21c, ProtoRef, flags::Continue | flags::Throw, verify_flags::VerifyRegA | verify_flags::VerifyRegBPrototype}, ); + +// >>> begin python module export +#[cfg(feature = "python")] +#[pyo3::pymodule(name = "code")] +pub(crate) mod py_code { + #[pymodule_export] + use super::{PyDexCode, PyDexFormat, PyDexIndexType, PyInstruction}; + + #[pymodule_export] + use crate::file::PyCodeItemAccessor; + + // constants + #[pymodule_export] + use super::{py_code_flags, py_signatures, py_flags, py_verify_flags}; +} +// <<< end python module export diff --git a/src/file/structs.rs b/src/file/structs.rs index b9fafd9..83a1eb6 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -1,5 +1,13 @@ use plain::Plain; +#[cfg(feature = "python")] +use crate::py::{rs_struct_fields, rs_struct_wrapper}; +#[cfg(feature = "python")] +use std::sync::Arc; + +// -------------------------------------------------------------------- +// StringId +// -------------------------------------------------------------------- pub type StringIndex = u32; #[repr(C)] @@ -17,6 +25,18 @@ impl StringId { } } +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("StringId", PyDexStringId, StringId); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexStringId, { + (string_data_off, StringIndex), +},); +/// <<< end python export + +// -------------------------------------------------------------------- +// TypeId +// -------------------------------------------------------------------- pub type TypeIndex = u16; #[repr(C)] @@ -27,6 +47,18 @@ pub struct TypeId { unsafe impl plain::Plain for TypeId {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("TypeId", PyDexTypeId, TypeId); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexTypeId, { + (descriptor_idx, StringIndex), +},); +/// <<< end python export + +// -------------------------------------------------------------------- +// FieldId +// -------------------------------------------------------------------- pub type FieldIndex = u32; #[repr(C)] @@ -39,6 +71,20 @@ pub struct FieldId { unsafe impl plain::Plain for FieldId {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("FieldId", PyDexFieldId, FieldId); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexFieldId, { + (class_idx, TypeIndex), + (type_idx, TypeIndex), + (name_idx, StringIndex), +},); +/// <<< end python export + +// -------------------------------------------------------------------- +// ProtoId +// -------------------------------------------------------------------- pub type ProtoIndex = u16; #[repr(C)] @@ -52,6 +98,22 @@ pub struct ProtoId { unsafe impl plain::Plain for ProtoId {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("ProtoId", PyDexProtoId, ProtoId); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexProtoId, { + (shorty_idx, StringIndex), + (return_type_idx, TypeIndex), + (parameters_off, u32), +},); +/// <<< end python export + +// -------------------------------------------------------------------- +// MethodId +// -------------------------------------------------------------------- +pub type MethodIndex = u32; + #[repr(C)] #[derive(Debug, Clone)] pub struct MethodId { @@ -62,6 +124,22 @@ pub struct MethodId { unsafe impl plain::Plain for MethodId {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("MethodId", PyDexMethodId, MethodId); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexMethodId, { + (class_idx, TypeIndex), + (proto_idx, ProtoIndex), + (name_idx, StringIndex), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// ClassDef +// -------------------------------------------------------------------- +pub type ClassDefIndex = u32; + #[repr(C)] #[derive(Debug, Clone)] pub struct ClassDef { @@ -79,6 +157,25 @@ pub struct ClassDef { unsafe impl plain::Plain for ClassDef {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("ClassDef", PyDexClassDef, ClassDef); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexClassDef, { + (class_idx, TypeIndex), + (access_flags, u32), + (superclass_idx, TypeIndex), + (interfaces_off, u32), + (source_file_idx, StringIndex), + (annotations_off, u32), + (class_data_off, u32), + (static_values_off, u32), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// Typeitem +// -------------------------------------------------------------------- #[repr(C)] #[derive(Debug, Clone)] pub struct TypeItem { @@ -89,6 +186,18 @@ unsafe impl plain::Plain for TypeItem {} pub type TypeList<'a> = &'a [TypeItem]; +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("TypeItem", PyDexTypeItem, TypeItem); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexTypeItem, { + (type_idx, TypeIndex), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// MapItem (private) +// -------------------------------------------------------------------- #[repr(C)] #[derive(Debug)] pub struct MapItem { @@ -129,8 +238,11 @@ pub enum MapItemType { HiddenapiClassData = 0xF000, } +// -------------------------------------------------------------------- +// MethodHandleItem +// -------------------------------------------------------------------- #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct MethodHandleItem { pub method_handle_type: TypeIndex, reserved1_: u16, @@ -140,14 +252,39 @@ pub struct MethodHandleItem { unsafe impl plain::Plain for MethodHandleItem {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("MethodHandleItem", PyDexMethodHandleItem, MethodHandleItem); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexMethodHandleItem, { + (method_handle_type, TypeIndex), + (field_or_method_idx, u16), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// CallSiteIdItem +// -------------------------------------------------------------------- #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct CallSiteIdItem { pub data_off: u32, // Offset into data section pointing to encoded array items. } unsafe impl plain::Plain for CallSiteIdItem {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("CallSiteIdItem", PyDexCallSiteIdItem, CallSiteIdItem); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexCallSiteIdItem, { + (data_off, u32), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// HiddenapiClassData (private) +// -------------------------------------------------------------------- #[repr(C)] #[derive(Debug)] pub struct HiddenapiClassData<'a> { @@ -169,8 +306,11 @@ impl<'a> HiddenapiClassData<'a> { unsafe impl<'a> plain::Plain for HiddenapiClassData<'a> {} +// -------------------------------------------------------------------- +// CodeItem +// -------------------------------------------------------------------- #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct CodeItem { pub registers_size: u16, pub ins_size: u16, @@ -182,8 +322,25 @@ pub struct CodeItem { unsafe impl plain::Plain for CodeItem {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("CodeItem", PyDexCodeItem, CodeItem); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexCodeItem, { + (registers_size, u16), + (ins_size, u16), + (outs_size, u16), + (tries_size, u16), + (debug_info_off, u32), + (insns_size, u32), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// TryItem +// -------------------------------------------------------------------- #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct TryItem { pub start_addr: u32, pub insn_count: u16, @@ -192,8 +349,22 @@ pub struct TryItem { unsafe impl plain::Plain for TryItem {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("TryItem", PyDexTryItem, TryItem); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexTryItem, { + (start_addr, u32), + (insn_count, u16), + (handler_off, u16), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// AnnotationsDirectoryItem +// -------------------------------------------------------------------- #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct AnnotationsDirectoryItem { pub class_annotations_off: u32, pub fields_size: u32, @@ -203,8 +374,27 @@ pub struct AnnotationsDirectoryItem { unsafe impl plain::Plain for AnnotationsDirectoryItem {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!( + "AnnotationsDirectoryItem", + PyDexAnnotationsDirectoryItem, + AnnotationsDirectoryItem +); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexAnnotationsDirectoryItem, { + (class_annotations_off, u32), + (fields_size, u32), + (methods_size, u32), + (parameters_size, u32), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// FieldAnnotationsItem +// -------------------------------------------------------------------- #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct FieldAnnotationsItem { pub field_idx: u32, pub annotations_off: u32, @@ -212,8 +402,25 @@ pub struct FieldAnnotationsItem { unsafe impl plain::Plain for FieldAnnotationsItem {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!( + "FieldAnnotationsItem", + PyDexFieldAnnotationsItem, + FieldAnnotationsItem +); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexFieldAnnotationsItem, { + (field_idx, u32), + (annotations_off, u32), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// MethodAnnotationsItem +// -------------------------------------------------------------------- #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct MethodAnnotationsItem { pub method_idx: u32, pub annotations_off: u32, @@ -221,8 +428,25 @@ pub struct MethodAnnotationsItem { unsafe impl plain::Plain for MethodAnnotationsItem {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!( + "MethodAnnotationsItem", + PyDexMethodAnnotationsItem, + MethodAnnotationsItem +); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexMethodAnnotationsItem, { + (method_idx, u32), + (annotations_off, u32), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// ParameterAnnotationsItem +// -------------------------------------------------------------------- #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ParameterAnnotationsItem { pub method_idx: u32, pub annotations_off: u32, @@ -230,6 +454,23 @@ pub struct ParameterAnnotationsItem { unsafe impl plain::Plain for ParameterAnnotationsItem {} +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!( + "ParameterAnnotationsItem", + PyDexParameterAnnotationsItem, + ParameterAnnotationsItem +); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexParameterAnnotationsItem, { + (method_idx, u32), + (annotations_off, u32), +},); +// <<< end python export + +// -------------------------------------------------------------------- +// Annotations (private for now) +// -------------------------------------------------------------------- pub type AnnotationSetItem<'a> = &'a [u32]; pub type EncodedArray = Vec; @@ -273,4 +514,25 @@ pub struct EncodedAnnotation { pub struct AnnotationItem { pub visibility: u8, pub annotation: EncodedAnnotation, -} \ No newline at end of file +} + +// -------------------------------------------------------------------- +// Python API +// -------------------------------------------------------------------- +// >>> begin python module export +#[cfg(feature = "python")] +#[pyo3::pymodule(name = "structs")] +pub(crate) mod py_structs { + + #[pymodule_export] + use super::{ + PyDexAnnotationsDirectoryItem, PyDexCallSiteIdItem, PyDexClassDef, PyDexCodeItem, + PyDexFieldAnnotationsItem, PyDexFieldId, PyDexMethodAnnotationsItem, PyDexMethodHandleItem, + PyDexMethodId, PyDexParameterAnnotationsItem, PyDexProtoId, PyDexStringId, PyDexTryItem, + PyDexTypeId, PyDexTypeItem + }; + + #[pymodule_export] + use crate::file::header::PyDexHeader; +} +// <<< end python module export diff --git a/src/leb128.rs b/src/leb128.rs index b7d3069..e800f7d 100644 --- a/src/leb128.rs +++ b/src/leb128.rs @@ -40,3 +40,21 @@ pub fn decode_leb128p1_off(data_in: &[u8], ptr_pos: &mut usize) -> Result { *ptr_pos += size; Ok(value) } + +// python exports +#[cfg(feature = "python")] +#[pyo3::pymodule(name = "leb128")] +pub(crate) mod py_leb128 { + use pyo3::PyResult; + + #[pyo3::pyfunction] + pub fn decode_leb128(data_in: &[u8]) -> PyResult<(u32, usize)> { + Ok(super::decode_leb128::(data_in)?) + } + + #[pyo3::pyfunction] + pub fn decode_leb128p1(data_in: &[u8]) -> PyResult<(i32, usize)> { + Ok(super::decode_leb128p1(data_in)?) + } +} +// end python exports diff --git a/src/lib.rs b/src/lib.rs index 0acc23f..eddb7d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,20 +17,26 @@ pub(crate) mod py; mod _internal { #[pymodule_export] - use crate::py::container::py_container; + use crate::file::container::py_container; #[pymodule_export] use crate::py::file::py_file; #[pymodule_export] - use crate::py::error::py_error; + use crate::error::py_error; #[pymodule_export] - use crate::py::structs::py_structs; + use crate::file::structs::py_structs; #[pymodule_export] - use crate::py::mutf8::py_mutf8; + use crate::utf::py_utf; #[pymodule_export] - use crate::py::class_accessor::py_class_accessor; -} \ No newline at end of file + use crate::leb128::py_leb128; + + #[pymodule_export] + use crate::file::class_accessor::py_class_accessor; + + #[pymodule_export] + use crate::file::instruction::py_code; +} diff --git a/src/py.rs b/src/py.rs new file mode 100644 index 0000000..797c803 --- /dev/null +++ b/src/py.rs @@ -0,0 +1,80 @@ +pub(crate) mod file; + +macro_rules! rs_type_wrapper { + ($src_type:ty, $py_type:ident, $rs_type:ident, name: $name:literal, module: $module:literal) => { + #[cfg(feature = "python")] + pub struct $rs_type($src_type); + + #[cfg(feature = "python")] + #[pyo3::pyclass(name = $name, module = $module)] + pub struct $py_type { + inner: Arc<$rs_type>, + } + + #[cfg(feature = "python")] + impl From<$src_type> for $py_type { + fn from(value: $src_type) -> Self { + $py_type { + inner: Arc::new($rs_type(value)), + } + } + } + + #[cfg(feature = "python")] + impl $py_type { + pub fn from_instance(value: $src_type) -> Self { + $py_type::from(value) + } + } + }; + ($src_type:ty, $py_type:ident, name: $name:literal, module: $module:literal) => { + #[cfg(feature = "python")] + #[pyo3::pyclass(name = $name, module = $module)] + pub struct $py_type(Arc<$src_type>); + + #[cfg(feature = "python")] + impl From<$src_type> for $py_type { + fn from(value: $src_type) -> Self { + $py_type(Arc::new(value)) + } + } + }; +} + +macro_rules! rs_struct_wrapper { + ($name:literal, $py_type:ident, $rust_type:ident) => { + #[cfg(feature = "python")] + #[pyo3::pyclass(name = $name, module = "dexrs._internal.structs")] + pub struct $py_type(pub Arc<$rust_type>); + + #[cfg(feature = "python")] + impl<'a> From<&'a $rust_type> for $py_type { + fn from(value: &'a $rust_type) -> Self { + $py_type(Arc::new(value.clone())) + } + } + }; +} + +macro_rules! rs_struct_fields { + ($py_type:ident, { $(($name:ident, $rtype:ty),)+ }, $($extra:tt)*) => { + #[cfg(feature = "python")] + #[pyo3::pymethods] + impl $py_type { + $( + #[getter] + pub fn $name(&self) -> $rtype { + self.0.$name + } + )+ + + $( + $extra + )* + } + }; +} + +pub(crate) use rs_type_wrapper; +pub(crate) use rs_struct_wrapper; +pub(crate) use rs_struct_fields; \ No newline at end of file diff --git a/src/py/class_accessor.rs b/src/py/class_accessor.rs deleted file mode 100644 index 99eb656..0000000 --- a/src/py/class_accessor.rs +++ /dev/null @@ -1,159 +0,0 @@ -use std::sync::Arc; - -use pyo3::PyResult; - -use crate::file::{ClassAccessor, Field, Method}; - -// Rust-side of the class accessor -// -// However, this may seem invalid, there's actually no need for us to -// use mem::transmute here, because only Rust can create instances of -// this type. -pub struct RsClassAccessor(ClassAccessor<'static>); - -#[pyo3::pyclass(name = "ClassAccessor", module = "dexrs._internal.class_accessor")] -pub struct PyClassAccessor { - inner: Arc, -} - -impl PyClassAccessor { - // Rust interface - pub fn from_instance(class_accessor: ClassAccessor<'static>) -> PyClassAccessor { - PyClassAccessor { - inner: Arc::new(RsClassAccessor(class_accessor)), - } - } -} - -impl From> for PyClassAccessor { - fn from(class_accessor: ClassAccessor<'static>) -> Self { - PyClassAccessor::from_instance(class_accessor) - } -} - - -#[pyo3::pymethods] -impl PyClassAccessor { - // no constructor - #[getter] - pub fn num_fields(&self) -> usize { - self.inner.0.num_fields() - } - - #[getter] - pub fn num_methods(&self) -> usize { - self.inner.0.num_methods() - } - - #[getter] - pub fn num_static_fields(&self) -> u32 { - self.inner.0.num_static_fields - } - - #[getter] - pub fn num_instance_fields(&self) -> u32 { - self.inner.0.num_instance_fields - } - - #[getter] - pub fn num_direct_methods(&self) -> u32 { - self.inner.0.num_direct_methods - } - - #[getter] - pub fn num_virtual_methods(&self) -> u32 { - self.inner.0.num_virtual_methods - } - - pub fn get_fields(&self) -> PyResult> { - Ok(self.inner.0.get_fields().map(Into::into).collect()) - } - - pub fn get_static_fieds(&self) -> PyResult> { - Ok(self.inner.0.get_static_fieds().map(Into::into).collect()) - } - - pub fn get_instance_fields(&self) -> PyResult> { - Ok(self.inner.0.get_instance_fields().map(Into::into).collect()) - } - - pub fn get_methods(&self) -> PyResult> { - Ok(self.inner.0.get_methods()?.map(Into::into).collect()) - } - - pub fn get_virtual_methods(&self) -> PyResult> { - Ok(self - .inner - .0 - .get_virtual_methods()? - .map(Into::into) - .collect()) - } - - pub fn get_direct_methods(&self) -> PyResult> { - Ok(self.inner.0.get_direct_methods()?.map(Into::into).collect()) - } -} - -#[pyo3::pyclass(name = "Method", module = "dexrs._internal.class_accessor")] -pub struct PyDexMethod(Arc); - -impl From for PyDexMethod { - fn from(method: Method) -> Self { - PyDexMethod(Arc::new(method)) - } -} - -#[pyo3::pymethods] -impl PyDexMethod { - #[getter] - pub fn index(&self) -> u32 { - self.0.index - } - - #[getter] - pub fn access_flags(&self) -> u32 { - self.0.access_flags - } - - #[getter] - pub fn code_offset(&self) -> u32 { - self.0.code_offset - } - - pub fn is_static_or_direct(&self) -> bool { - self.0.is_static_or_direct - } -} - -#[pyo3::pyclass(name = "Field", module = "dexrs._internal.class_accessor")] -pub struct PyDexField(Arc); - -impl From for PyDexField { - fn from(field: Field) -> Self { - PyDexField(Arc::new(field)) - } -} - -#[pyo3::pymethods] -impl PyDexField { - #[getter] - pub fn index(&self) -> u32 { - self.0.index - } - - #[getter] - pub fn access_flags(&self) -> u32 { - self.0.access_flags - } - - pub fn is_static(&self) -> bool { - self.0.is_static - } -} - -#[pyo3::pymodule] -pub mod py_class_accessor { - #[pymodule_export] - use super::{PyClassAccessor, PyDexField, PyDexMethod}; -} diff --git a/src/py/code_item.rs b/src/py/code_item.rs new file mode 100644 index 0000000..f53243e --- /dev/null +++ b/src/py/code_item.rs @@ -0,0 +1,114 @@ +use std::sync::Arc; + +use pyo3::PyResult; + +use crate::file::{Code, CodeItemAccessor, Instruction, PyDexCode}; + +use super::rs_type_wrapper; + +rs_type_wrapper!( + CodeItemAccessor<'static>, + PyCodeItemAccessor, + RsCodeItemAccessor, + name: "CodeItemAccessor", + module: "dexrs._internal.code" +); + +#[pyo3::pymethods] +impl PyCodeItemAccessor { + #[getter] + pub fn insns_size_in_code_units(&self) -> u32 { + self.inner.0.insns_size_in_code_units() + } + + #[getter] + pub fn insns_size_in_bytes(&self) -> u32 { + self.inner.0.insns_size_in_bytes() + } + + pub fn has_code(&self) -> bool { + self.inner.0.has_code() + } + + #[getter] + pub fn code_off(&self) -> u32 { + self.inner.0.code_off() + } + + #[getter] + pub fn code_item(&self) -> PyDexCodeItem { + self.inner.0.code_item().into() + } + + #[getter] + pub fn registers_size(&self) -> u16 { + self.inner.0.registers_size() + } + + #[getter] + pub fn ins_size(&self) -> u16 { + self.inner.0.ins_size() + } + + #[getter] + pub fn outs_size(&self) -> u16 { + self.inner.0.outs_size() + } + + #[getter] + pub fn tries_size(&self) -> u16 { + self.inner.0.tries_size() + } + + pub fn insns_raw(&self) -> &[u16] { + self.inner.0.insns() + } + + pub fn inst_at(&self, pc: u32) -> PyInstruction { + self.inner.0.inst_at(pc).into() + } +} + +rs_type_wrapper!( + Instruction<'static>, + PyInstruction, + RsInstruction, + name: "Instruction", + module: "dexrs._internal.code" +); + +#[pyo3::pymethods] +impl PyInstruction { + pub fn fetch16(&self, offset: u32) -> PyResult { + Ok(self.inner.0.fetch16(offset as usize)?) + } + + pub fn fetch32(&self, offset: u32) -> PyResult { + Ok(self.inner.0.fetch32(offset as usize)?) + } + + #[staticmethod] + pub fn opcode_of(inst_data: u16) -> PyDexCode { + let opcode = Instruction::opcode_of(inst_data); + Instruction::format_desc_of(opcode).py_opcode + } + + #[staticmethod] + pub fn name_of(opcode: PyDexCode) -> &'static str { + Instruction::format_desc_of(opcode.into()).name + } +} + +// opcodes +impl Into for PyDexCode { + #[inline] + fn into(self) -> Code { + Instruction::opcode_of(self as u8 as u16) + } +} + +#[pyo3::pymodule(name = "code")] +pub mod py_code { + #[pymodule_export] + use super::{PyCodeItemAccessor, PyDexCode, PyInstruction}; +} diff --git a/src/py/container.rs b/src/py/container.rs deleted file mode 100644 index 22685fc..0000000 --- a/src/py/container.rs +++ /dev/null @@ -1,161 +0,0 @@ -use std::{ops::Deref, sync::Arc}; - -use pyo3::{exceptions::PyNotImplementedError, types::PyBytes, Py, PyRef, PyResult, Python}; - -use crate::file::DexContainer; - -use super::error::GenericError; - -#[pyo3::pyclass(name = "DexContainer", module = "dexrs._internal.container", subclass)] -pub struct PyDexContainer {} - -#[pyo3::pymethods] -impl PyDexContainer { - #[new] - pub fn new() -> Self { - PyDexContainer {} - } - - pub fn data(&self) -> PyResult<&[u8]> { - Err(PyNotImplementedError::new_err("foobar")) - } - - pub fn file_size(&self) -> PyResult { - Err(PyNotImplementedError::new_err("foobar")) - } -} - -// custom implementation of DexFileContainer to support python values - -#[pyo3::pyclass( - name = "InMemoryDexContainer", - module = "dexrs._internal.container", - frozen -)] -pub struct PyInMemoryDexContainer { - pub(crate) data: Py, - length: usize, -} - -impl AsRef<[u8]> for PyInMemoryDexContainer { - #[inline] - fn as_ref(&self) -> &[u8] { - self.deref() - } -} - -impl Deref for PyInMemoryDexContainer { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - Python::with_gil(|py| { - self.data.as_bytes(py) - }) - } -} - -impl DexContainer<'_> for PyInMemoryDexContainer {} - -impl PyInMemoryDexContainer { - pub fn open<'py>(py: Python, data: Py) -> Self { - Self { - data: data.clone_ref(py), - length: data.as_bytes(py).len(), - } - } -} - -#[pyo3::pymethods] -impl PyInMemoryDexContainer { - #[new] - pub fn new<'py>(py: Python<'py>, data: Py) -> PyResult { - Ok(PyInMemoryDexContainer::open(py, data)) - } - - // TODO: measure performance overhead if data is huge - pub fn data<'py>(py_self: PyRef<'_, Self>, py: Python<'py>) -> PyResult> { - Ok(py_self.data.clone_ref(py)) - } - - #[getter] - pub fn file_size(py_self: PyRef<'_, Self>) -> PyResult { - Ok(py_self.length) - } - - pub fn __len__(py_self: PyRef<'_, Self>) -> usize { - py_self.length - } -} - -#[pyo3::pyclass( - name = "FileDexContainer", - module = "dexrs._internal.container", - frozen -)] -pub struct PyFileDexContainer { - pub(crate) path: String, - _fp: std::fs::File, - data: Arc, -} - -impl AsRef<[u8]> for PyFileDexContainer { - #[inline] - fn as_ref(&self) -> &[u8] { - &self.data.as_ref() - } -} - -impl Deref for PyFileDexContainer { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - &self.data.deref() - } -} - -impl DexContainer<'_> for PyFileDexContainer {} - -impl PyFileDexContainer { - pub fn open(path: String) -> Result { - let fp = std::fs::File::open(path.clone())?; - let mmap = unsafe { memmap2::Mmap::map(&fp)? }; - Ok(PyFileDexContainer { - path, - _fp: fp, - data: Arc::new(mmap), - }) - } -} - -#[pyo3::pymethods] -impl PyFileDexContainer { - #[new] - pub fn new(path: String) -> PyResult { - Ok(PyFileDexContainer::open(path)?) - } - - pub fn data<'py>(&self, py: Python<'py>) -> PyResult> { - Ok(PyBytes::new(py, self.data.as_ref()).into()) - } - - #[getter] - pub fn file_size(&self) -> PyResult { - Ok(self.data.len()) - } - - #[getter] - pub fn location(&self) -> PyResult { - Ok(self.path.clone()) - } - - pub fn __len__(&self) -> usize { - self.data.len() - } -} - -#[pyo3::pymodule(name = "container")] -pub(crate) mod py_container { - - #[pymodule_export] - use super::{PyDexContainer, PyFileDexContainer, PyInMemoryDexContainer}; -} diff --git a/src/py/error.rs b/src/py/error.rs deleted file mode 100644 index 7a66080..0000000 --- a/src/py/error.rs +++ /dev/null @@ -1,34 +0,0 @@ -use pyo3::{ - create_exception, - exceptions::{PyIOError, PyRuntimeError}, - PyErr, -}; - -use crate::error::DexError; - -create_exception!(dexrs._internal.error, PyDexError, PyRuntimeError); - -impl From for PyErr { - fn from(err: DexError) -> PyErr { - PyDexError::new_err(err.to_string()) - } -} - -#[derive(Debug, thiserror::Error)] -pub enum GenericError { - #[error(transparent)] - IOError(#[from] std::io::Error), -} - -impl From for PyErr { - fn from(err: GenericError) -> PyErr { - PyIOError::new_err(err.to_string()) - } -} - -#[pyo3::pymodule(name = "error")] -pub(crate) mod py_error { - - #[pymodule_export] - use super::PyDexError; -} diff --git a/src/py/file.rs b/src/py/file.rs index b04eb72..b6bb278 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -3,17 +3,12 @@ use std::sync::Arc; use pyo3::{exceptions::PyValueError, Py, PyResult, Python}; use crate::file::{ - verifier::VerifyPreset, DexFile, DexLocation, FieldIndex, ProtoIndex, StringIndex, TypeIndex, + verifier::VerifyPreset, DexFile, DexLocation, FieldIndex, ProtoIndex, PyDexClassDef, + PyDexFieldId, PyDexHeader, PyDexMethodId, PyDexProtoId, PyDexStringId, PyDexTypeId, + PyDexTypeItem, PyFileDexContainer, PyInMemoryDexContainer, StringIndex, TypeIndex, }; -use super::{ - class_accessor::PyClassAccessor, - container::{PyFileDexContainer, PyInMemoryDexContainer}, - structs::{ - PyDexClassDef, PyDexFieldId, PyDexHeader, PyDexMethodId, PyDexProtoId, PyDexStringId, - PyDexTypeId, PyDexTypeItem, - }, -}; +use crate::file::class_accessor::PyClassAccessor; #[allow(non_camel_case_types)] #[derive(Clone, Copy, PartialEq, Eq)] diff --git a/src/py/mod.rs b/src/py/mod.rs deleted file mode 100644 index 5a43b9e..0000000 --- a/src/py/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -pub(crate) mod container; -pub(crate) mod file; -pub(crate) mod error; -pub(crate) mod structs; -pub(crate) mod mutf8; -pub(crate) mod class_accessor; \ No newline at end of file diff --git a/src/py/mutf8.rs b/src/py/mutf8.rs deleted file mode 100644 index fd2a05d..0000000 --- a/src/py/mutf8.rs +++ /dev/null @@ -1,37 +0,0 @@ -use pyo3::PyResult; - -use crate::{error::DexError, utf}; - -#[pyo3::pyfunction] -pub fn mutf8_to_str(utf8_data_in: &[u8]) -> PyResult { - if let Some(end) = utf8_data_in.iter().position(|&x| x == 0) { - Ok(utf::mutf8_to_str(&utf8_data_in[0..=end])?) - } else { - Err(DexError::BadStringDataMissingNullByte(utf8_data_in.as_ptr() as usize).into()) - } -} - -#[pyo3::pyfunction] -pub fn mutf8_to_str_lossy(utf8_data_in: &[u8]) -> PyResult { - if let Some(end) = utf8_data_in.iter().position(|&x| x == 0) { - Ok(utf::mutf8_to_str_lossy(&utf8_data_in[0..=end])?) - } else { - Err(DexError::BadStringDataMissingNullByte(utf8_data_in.as_ptr() as usize).into()) - } -} - -#[pyo3::pyfunction] -pub fn str_to_mutf8(str_data_in: &str) -> Vec { - utf::str_to_mutf8(str_data_in) -} - -#[pyo3::pyfunction] -pub fn str_to_mutf8_lossy(str_data_in: &str) -> Vec { - utf::str_to_mutf8_lossy(str_data_in) -} - -#[pyo3::pymodule(name = "mutf8")] -pub(crate) mod py_mutf8 { - #[pymodule_export] - use super::{mutf8_to_str, mutf8_to_str_lossy, str_to_mutf8, str_to_mutf8_lossy}; -} diff --git a/src/py/structs.rs b/src/py/structs.rs deleted file mode 100644 index d2eb629..0000000 --- a/src/py/structs.rs +++ /dev/null @@ -1,163 +0,0 @@ -use std::sync::Arc; - -use crate::file::{ - ClassDef, FieldId, Header, MethodId, ProtoId, ProtoIndex, StringId, StringIndex, TypeId, - TypeIndex, TypeItem, -}; - -macro_rules! py_struct_wrapper { - ($name:literal, $py_type:ident, $rust_type:ident) => { - #[pyo3::pyclass(name = $name, module = "dexrs._internal.structs")] - pub struct $py_type(pub Arc<$rust_type>); - - impl<'a> From<&'a $rust_type> for $py_type { - fn from(value: &'a $rust_type) -> Self { - $py_type(Arc::new(value.clone())) - } - } - }; -} - -macro_rules! py_struct_fields { - ($py_type:ident, { $(($name:ident, $rtype:ty),)+ }, $($extra:tt)*) => { - #[pyo3::pymethods] - impl $py_type { - $( - #[getter] - pub fn $name(&self) -> $rtype { - self.0.$name - } - )+ - - $( - $extra - )* - } - }; -} - -// -------------------------------------------------------------------- -// Header -// -------------------------------------------------------------------- -py_struct_wrapper!("Header", PyDexHeader, Header); -py_struct_fields!(PyDexHeader, { - (checksum, u32), - (file_size, u32), - (header_size, u32), - (endian_tag, u32), - (link_size, u32), - (link_off, u32), - (string_ids_size, u32), - (string_ids_off, u32), - (type_ids_size, u32), - (type_ids_off, u32), - (proto_ids_size, u32), - (proto_ids_off, u32), - (field_ids_size, u32), - (field_ids_off, u32), - (method_ids_size, u32), - (method_ids_off, u32), - (class_defs_size, u32), - (class_defs_off, u32), - (data_size, u32), - (data_off, u32), -}, - -#[getter] -pub fn signature(&self) -> Vec { - self.0.get_signature().to_vec() -} - -#[getter] -pub fn version_int(&self) -> u32 { - self.0.get_version() -} - -#[getter] -pub fn get_magic(&self) -> Vec { - self.0.get_magic().to_vec() -} -); - -// -------------------------------------------------------------------- -// StringId -// -------------------------------------------------------------------- -py_struct_wrapper!("StringId", PyDexStringId, StringId); -py_struct_fields!(PyDexStringId, { - (string_data_off, StringIndex), -}, -pub fn __repr__(&self) -> String { - format!("StringId(string_data_off={})", self.0.string_data_off) -}); - -// -------------------------------------------------------------------- -// TypeId -// -------------------------------------------------------------------- -py_struct_wrapper!("TypeId", PyDexTypeId, TypeId); -py_struct_fields!(PyDexTypeId, { - (descriptor_idx, StringIndex), -},); - -// -------------------------------------------------------------------- -// FieldId -// -------------------------------------------------------------------- -py_struct_wrapper!("FieldId", PyDexFieldId, FieldId); -py_struct_fields!(PyDexFieldId, { - (class_idx, TypeIndex), - (type_idx, TypeIndex), - (name_idx, StringIndex), -},); - -// -------------------------------------------------------------------- -// ProtoId -// -------------------------------------------------------------------- -py_struct_wrapper!("ProtoId", PyDexProtoId, ProtoId); -py_struct_fields!(PyDexProtoId, { - (shorty_idx, StringIndex), - (return_type_idx, TypeIndex), - (parameters_off, u32), -},); - -// -------------------------------------------------------------------- -// MethodId -// -------------------------------------------------------------------- -py_struct_wrapper!("MethodId", PyDexMethodId, MethodId); -py_struct_fields!(PyDexMethodId, { - (class_idx, TypeIndex), - (proto_idx, ProtoIndex), - (name_idx, StringIndex), -},); - -// -------------------------------------------------------------------- -// ClassDef -// -------------------------------------------------------------------- -py_struct_wrapper!("ClassDef", PyDexClassDef, ClassDef); -py_struct_fields!(PyDexClassDef, { - (class_idx, TypeIndex), - (access_flags, u32), - (superclass_idx, TypeIndex), - (interfaces_off, u32), - (source_file_idx, StringIndex), - (annotations_off, u32), - (class_data_off, u32), - (static_values_off, u32), -},); - -// -------------------------------------------------------------------- -// TypeItem -// -------------------------------------------------------------------- -py_struct_wrapper!("TypeItem", PyDexTypeItem, TypeItem); -py_struct_fields!(PyDexTypeItem, { - (type_idx, TypeIndex), -},); - - -#[pyo3::pymodule(name = "structs")] -pub(crate) mod py_structs { - - #[pymodule_export] - use super::{ - PyDexClassDef, PyDexFieldId, PyDexHeader, PyDexMethodId, PyDexProtoId, PyDexStringId, - PyDexTypeId, PyDexTypeItem, - }; -} diff --git a/src/utf.rs b/src/utf.rs index e1c022a..dd66135 100644 --- a/src/utf.rs +++ b/src/utf.rs @@ -1,8 +1,7 @@ - // TODO: these functions are highly unsafe and does not stand any chance against fuzzing // -> resolved for now with Result<> as return type and additional checks -use crate::{dex_err, Result, error::DexError}; +use crate::{dex_err, error::DexError, Result}; pub fn mutf8_to_str(utf8_data_in: &[u8]) -> crate::Result { let utf16_data = mutf8_to_utf16(utf8_data_in)?; @@ -25,6 +24,44 @@ pub fn str_to_mutf8_lossy(str_data_in: &str) -> Vec { utf16_to_mutf8(&utf16_data_in, &options) } +// python exports +#[cfg(feature = "python")] +#[pyo3::pymodule(name = "mutf8")] +pub(crate) mod py_utf { + + use crate::error::DexError; + use pyo3::PyResult; + + #[pyo3::pyfunction] + pub fn mutf8_to_str(utf8_data_in: &[u8]) -> PyResult { + if let Some(end) = utf8_data_in.iter().position(|&x| x == 0) { + Ok(super::mutf8_to_str(&utf8_data_in[0..=end])?) + } else { + Err(DexError::BadStringDataMissingNullByte(utf8_data_in.as_ptr() as usize).into()) + } + } + + #[pyo3::pyfunction] + pub fn mutf8_to_str_lossy(utf8_data_in: &[u8]) -> PyResult { + if let Some(end) = utf8_data_in.iter().position(|&x| x == 0) { + Ok(super::mutf8_to_str_lossy(&utf8_data_in[0..=end])?) + } else { + Err(DexError::BadStringDataMissingNullByte(utf8_data_in.as_ptr() as usize).into()) + } + } + + #[pyo3::pyfunction] + pub fn str_to_mutf8(str_data_in: &str) -> Vec { + super::str_to_mutf8(str_data_in) + } + + #[pyo3::pyfunction] + pub fn str_to_mutf8_lossy(str_data_in: &str) -> Vec { + super::str_to_mutf8_lossy(str_data_in) + } +} +// end python exports + #[inline] fn utf16_from_utf8(utf8_data_in: &[u8], offset: &mut usize) -> u32 { let one = utf8_data_in[*offset]; @@ -127,7 +164,7 @@ pub fn mutf8_len(utf8_data_in: &[u8], utf8_in_len: usize) -> Result { return dex_err!(MalformedMUTF8Sequence { idx: in_idx, len: utf8_in_len - }) + }); } Ok(len) } @@ -142,11 +179,7 @@ fn mutf8_to_utf16(utf8_data_in: &[u8]) -> Result> { Ok(convert_mutf8_to_utf16(utf8_data_in, utf8_in_len, out_chars)) } -fn convert_mutf8_to_utf16( - utf8_data_in: &[u8], - utf8_in_len: usize, - out_chars: usize, -) -> Vec { +fn convert_mutf8_to_utf16(utf8_data_in: &[u8], utf8_in_len: usize, out_chars: usize) -> Vec { if utf8_data_in.len() == out_chars { // common case where all chars are ASCII return utf8_data_in.iter().map(|i| *i as u16).collect(); @@ -183,7 +216,6 @@ fn utf16_to_mutf8(utf16_in: &[u16], options: &Options) -> Vec { // append trailing null mutf8_out.push(0x00); mutf8_out - } pub struct Options { From cce89defd9b1017af4f46fdd93f6bae1a332a7dd Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 16 Feb 2025 09:27:14 +0100 Subject: [PATCH 32/46] Add binding for Instruction related API --- + vreg module is still missing, but all classes have been added to the Python API + stubs added + Instruction::next now returns an Option<> instead of Result<> --- python/dexrs/_internal/code/__init__.pyi | 22 ++++- python/dexrs/_internal/code/code_flags.pyi | 2 + python/dexrs/_internal/code/flags.pyi | 8 ++ python/dexrs/_internal/code/verify_flags.pyi | 27 +++++++ python/dexrs/code.py | 25 ++++++ src/file/instruction.rs | 85 +++++++++++++++++--- 6 files changed, 158 insertions(+), 11 deletions(-) create mode 100644 python/dexrs/_internal/code/code_flags.pyi create mode 100644 python/dexrs/_internal/code/flags.pyi create mode 100644 python/dexrs/_internal/code/verify_flags.pyi create mode 100644 python/dexrs/code.py diff --git a/python/dexrs/_internal/code/__init__.pyi b/python/dexrs/_internal/code/__init__.pyi index 7f2c831..d2fc59a 100644 --- a/python/dexrs/_internal/code/__init__.pyi +++ b/python/dexrs/_internal/code/__init__.pyi @@ -27,10 +27,28 @@ class CodeItemAccessor: def inst_at(self, pc: int) -> Instruction: ... class Instruction: + @property + def opcode(self) -> Code: ... + @property + def format(self) -> Format: ... + @property + def name(self) -> str: ... + @property + def verify_flags(self) -> int: ... + def size_in_code_units(self) -> int: ... + def next(self) -> Instruction | None: ... + @staticmethod + def get_opcode_of(inst_data: int) -> Code: ... + @staticmethod + def get_name_of(opcode: Code) -> str: ... + @staticmethod + def get_format_of(opcode: Code) -> Format: ... + @staticmethod + def get_verify_flags_of(opcode: Code) -> int: ... @staticmethod - def opcode_of(inst_data: int) -> Code: ... + def get_flags_of(inst_data: int) -> int: ... @staticmethod - def name_of(opcode: Code) -> str: ... + def get_index_type_of(inst_data: int) -> IndexType: ... class Format: k10x: Format diff --git a/python/dexrs/_internal/code/code_flags.pyi b/python/dexrs/_internal/code/code_flags.pyi new file mode 100644 index 0000000..3d293a3 --- /dev/null +++ b/python/dexrs/_internal/code/code_flags.pyi @@ -0,0 +1,2 @@ +Complex: int = ... +Custom: int = ... diff --git a/python/dexrs/_internal/code/flags.pyi b/python/dexrs/_internal/code/flags.pyi new file mode 100644 index 0000000..c86b85a --- /dev/null +++ b/python/dexrs/_internal/code/flags.pyi @@ -0,0 +1,8 @@ +Branch: int = ... +Continue: int = ... +Switch: int = ... +Throw: int = ... +Return: int = ... +Invoke: int = ... +Unconditional: int = ... +Experimental: int = ... diff --git a/python/dexrs/_internal/code/verify_flags.pyi b/python/dexrs/_internal/code/verify_flags.pyi new file mode 100644 index 0000000..c8b4c8d --- /dev/null +++ b/python/dexrs/_internal/code/verify_flags.pyi @@ -0,0 +1,27 @@ +VerifyNothing: int = ... +VerifyRegA: int = ... +VerifyRegAWide: int = ... +VerifyRegB: int = ... +VerifyRegBField: int = ... +VerifyRegBMethod: int = ... +VerifyRegBNewInstance: int = ... +VerifyRegBString: int = ... +VerifyRegBType: int = ... +VerifyRegBWide: int = ... +VerifyRegC: int = ... +VerifyRegCField: int = ... +VerifyRegCNewArray: int = ... +VerifyRegCType: int = ... +VerifyRegCWide: int = ... +VerifyArrayData: int = ... +VerifyBranchTarget: int = ... +VerifySwitchTargets: int = ... +VerifyVarArg: int = ... +VerifyVarArgNonZero: int = ... +VerifyVarArgRange: int = ... +VerifyVarArgRangeNonZero: int = ... +VerifyError: int = ... +VerifyRegHPrototype: int = ... +VerifyRegBCallSite: int = ... +VerifyRegBMethodHandle: int = ... +VerifyRegBPrototype: int = ... diff --git a/python/dexrs/code.py b/python/dexrs/code.py new file mode 100644 index 0000000..8793fb6 --- /dev/null +++ b/python/dexrs/code.py @@ -0,0 +1,25 @@ +from dexrs._internal import code as rust_code + +CodeItemAccessor = rust_code.CodeItemAccessor +Code = rust_code.Code +Instruction = rust_code.Instruction +Format = rust_code.Format +IndexType = rust_code.IndexType + +# sub-modules will be represented as variables here +code_flags = rust_code.code_flags +verify_flags = rust_code.verify_flags +flags = rust_code.flags +signatures = rust_code.signatures + +__all__ = [ + "CodeItemAccessor", + "Code", + "Instruction", + "Format", + "IndexType", + "code_flags", + "verify_flags", + "flags", + "signatures", +] diff --git a/src/file/instruction.rs b/src/file/instruction.rs index 22149d6..f7c5d1d 100644 --- a/src/file/instruction.rs +++ b/src/file/instruction.rs @@ -134,15 +134,68 @@ impl PyInstruction { } #[staticmethod] - pub fn opcode_of(inst_data: u16) -> PyDexCode { + pub fn get_opcode_of(inst_data: u16) -> PyDexCode { let opcode = Instruction::opcode_of(inst_data); Instruction::format_desc_of(opcode).py_opcode } #[staticmethod] - pub fn name_of(opcode: PyDexCode) -> &'static str { + pub fn get_name_of(opcode: PyDexCode) -> &'static str { Instruction::format_desc_of(opcode.into()).name } + + #[staticmethod] + pub fn get_format_of(opcode: PyDexCode) -> PyDexFormat { + Instruction::format_of(opcode.into()).into() + } + + #[staticmethod] + pub fn get_index_type_of(opcode: PyDexCode) -> PyDexIndexType { + Instruction::index_type_of(opcode.into()).into() + } + + #[staticmethod] + pub fn get_flags_of(opcode: PyDexCode) -> u8 { + Instruction::flags_of(opcode.into()) + } + + #[staticmethod] + pub fn get_verify_flags_of(opcode: PyDexCode) -> u32 { + Instruction::verify_flags_of(opcode.into()) + } + + // same interface for instance methods + #[inline] + #[getter] + pub fn opcode(&self) -> PyDexCode { + self.inner.0.opcode().into() + } + + #[inline] + #[getter] + pub fn format(&self) -> PyDexFormat { + self.inner.0.format().into() + } + + #[inline] + #[getter] + pub fn name(&self) -> &'static str { + self.inner.0.name() + } + + #[inline] + #[getter] + pub fn verify_flags(&self) -> u32 { + self.inner.0.verify_flags() + } + + pub fn next(&self) -> Option { + self.inner.0.next().map(Into::into) + } + + pub fn size_in_code_units(&self) -> usize { + self.inner.0.size_in_code_units() + } } // <<< end python export @@ -168,8 +221,8 @@ macro_rules! define_formats { } #[cfg(feature = "python")] - impl From for PyDexFormat { - fn from(f: Format) -> Self { + impl From<&Format> for PyDexFormat { + fn from(f: &Format) -> Self { match f { $(Format::$fmtids => PyDexFormat::$fmtids,)* } @@ -230,8 +283,8 @@ macro_rules! define_index_types { } #[cfg(feature = "python")] - impl From for PyDexIndexType { - fn from(f: IndexType) -> Self { + impl From<&IndexType> for PyDexIndexType { + fn from(f: &IndexType) -> Self { match f { $(IndexType::$index_ty => PyDexIndexType::$index_ty,)* } @@ -377,8 +430,12 @@ impl<'a> Instruction<'a> { &self.format_desc().name } - pub fn next(&self) -> Result> { - self.relative_at(self.size_in_code_units()) + pub fn next(&self) -> Option> { + // we check, if the next instruction stores at least two bytes + if self.0.len() <= self.size_in_code_units() + 2 { + return None; + } + return Some(Instruction::at(&self.0[self.size_in_code_units()..])); } #[inline(always)] @@ -789,6 +846,16 @@ macro_rules! insn_desc_table { Instruction::opcode_of(self as u8 as u16) } } + + #[cfg(feature = "python")] + impl From for PyDexCode { + #[inline] + fn from(code: Code) -> Self { + match code { + $(Code::$code => PyDexCode::$code,)* + } + } + } }; } @@ -1066,6 +1133,6 @@ pub(crate) mod py_code { // constants #[pymodule_export] - use super::{py_code_flags, py_signatures, py_flags, py_verify_flags}; + use super::{py_code_flags, py_flags, py_signatures, py_verify_flags}; } // <<< end python module export From 2a3d621129b28d80c7c7cc4b85fbb096a64051b1 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 16 Feb 2025 10:32:05 +0100 Subject: [PATCH 33/46] Added binding for EncodedValue --- + Fixed parsing parameter names in debug info --- python/dexrs/_internal/structs.pyi | 92 ++++++++++++++++ src/file/annotations.rs | 10 +- src/file/debug.rs | 10 +- src/file/structs.rs | 167 ++++++++++++++++++++++++----- src/py.rs | 1 + src/py/code_item.rs | 114 -------------------- 6 files changed, 239 insertions(+), 155 deletions(-) delete mode 100644 src/py/code_item.rs diff --git a/python/dexrs/_internal/structs.pyi b/python/dexrs/_internal/structs.pyi index e7a1def..24454e9 100644 --- a/python/dexrs/_internal/structs.pyi +++ b/python/dexrs/_internal/structs.pyi @@ -1,3 +1,5 @@ +from typing import List + class Header: checksum: int file_size: int @@ -68,3 +70,93 @@ class CodeItem: tries_size: int debug_info_off: int insns_size: int + +class TryItem: + start_addr: int + insn_count: int + handler_off: int + +class AnnotationsDirectoryItem: + class_annotations_off: int + fields_size: int + methods_size: int + parameters_size: int + +class FieldAnnotationsItem: + field_idx: int + annotations_off: int + +class MethodAnnotationsItem: + method_idx: int + annotations_off: int + +class ParameterAnnotationsItem: + method_idx: int + annotations_off: int + +class EncodedValue: + class Null: + pass + + class Boolean: + value: bool + + class Byte: + value: int + + class Char: + value: int + + class Short: + value: int + + class Integer: + value: int + + class Float: + value: float + + class Long: + value: int + + class Double: + value: float + + class String: + index: int + + class Type: + index: int + + class Field: + index: int + + class Method: + index: int + + class MethodType: + index: int + + class MethodHandle: + index: int + + class Enum: + index: int + + class Array: + elements: List[EncodedValue] + + class Annotation: + annotation: EncodedAnnotation + +class AnnotationElement: + name_idx: int + value: EncodedValue + +class EncodedAnnotation: + type_idx: int + elements: List[AnnotationElement] + +class AnnotationItem: + visibility: int + annotation: EncodedAnnotation diff --git a/src/file/annotations.rs b/src/file/annotations.rs index 3424e0b..857a862 100644 --- a/src/file/annotations.rs +++ b/src/file/annotations.rs @@ -19,7 +19,7 @@ pub struct ClassAnnotationsAccessor<'a> { } impl<'a, C: DexContainer<'a>> DexFile<'a, C> { - pub fn get_class_ann_accessor( + pub fn get_class_annotation_accessor( &'a self, class_def: &'a ClassDef, ) -> Result> { @@ -344,13 +344,7 @@ impl EncodedValue { EncodedValueType::Array => EncodedValue::Array(EncodedValue::from_encoded_array(value, offset)?), EncodedValueType::Annotation => EncodedValue::Annotation(EncodedValue::from_encoded_annotation(value, offset)?), EncodedValueType::Null => EncodedValue::Null, - EncodedValueType::Boolean => { - if value_arg == 0 { - EncodedValue::False - } else { - EncodedValue::True - } - } + EncodedValueType::Boolean => EncodedValue::Boolean(value_arg != 0), }) } diff --git a/src/file/debug.rs b/src/file/debug.rs index adc1070..cbab960 100644 --- a/src/file/debug.rs +++ b/src/file/debug.rs @@ -3,9 +3,11 @@ use crate::{ Result, }; +use super::StringIndex; + pub enum SourceFile { This, - Other(u32), // index to file + Other(StringIndex), // index to file } #[rustfmt::skip] @@ -156,7 +158,7 @@ impl<'dex> DebugInfoParameterNamesIterator<'dex> { let size = decode_leb128_off::(&ptr, &mut pos)? as usize; Ok(Self { ptr, - offset, + offset: pos, size, idx: 0, }) @@ -172,9 +174,7 @@ impl<'a> Iterator for DebugInfoParameterNamesIterator<'a> { } self.idx += 1; match decode_leb128p1_off(&self.ptr, &mut self.offset) { - Ok(v) => { - Some(v as u32) - } + Ok(v) => Some(v as u32), Err(_) => None, } } diff --git a/src/file/structs.rs b/src/file/structs.rs index 83a1eb6..ec4a9ed 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -475,47 +475,157 @@ pub type AnnotationSetItem<'a> = &'a [u32]; pub type EncodedArray = Vec; -#[derive(Debug)] -pub enum EncodedValue { - Byte(i8), - Short(i16), - Char(u16), - Int(i32), - Long(i64), - Float(f32), - Double(f64), - MethodType(u32), - MethodHandle(u32), - String(u32), - Type(u32), - Field(u32), - Method(u32), - Enum(u32), - Array(EncodedArray), - Annotation(EncodedAnnotation), - Null, - True, - False, +// -------------------------------------------------------------------- +// Encoded Value +// -------------------------------------------------------------------- +macro_rules! define_encoded_value { + ({ $(($primitive_name:ident: $primitive_py_name:ident=$primitive_ty:ty),)* }) => { + #[derive(Debug, Clone)] + pub enum EncodedValue { + $( + $primitive_name($primitive_ty), + )* + Array(EncodedArray), + Annotation(EncodedAnnotation), + Null, + } + +// >>> begin python export + // Python type will be an enum with variants from EncodedValue + #[cfg(feature = "python")] + #[derive(Clone)] + #[pyo3::pyclass(name = "EncodedValue", module = "dexrs._internal.structs")] + pub enum PyDexEncodedValue { + $( + $primitive_name { $primitive_py_name: $primitive_ty }, + )* + Array{ elements: Vec }, + Annotation{ annotation: PyDexEncodedAnnotation }, + Null(), + } + + #[cfg(feature = "python")] + impl From<&EncodedValue> for PyDexEncodedValue { + fn from(value: &EncodedValue) -> Self { + match value { + $( + EncodedValue::$primitive_name(value) => PyDexEncodedValue::$primitive_name { $primitive_py_name: *value }, + )* + EncodedValue::Array(v) => PyDexEncodedValue::Array { + elements: v.iter().map(Into::into).collect(), + }, + EncodedValue::Annotation(v) => PyDexEncodedValue::Annotation { + annotation: v.into(), + }, + EncodedValue::Null => PyDexEncodedValue::Null(), + } + } + } +// <<< end python export + }; } -#[derive(Debug)] +define_encoded_value!({ + (Byte: value=i8), + (Short: value=i16), + (Char: value=u16), + (Int: value=i32), + (Long: value=i64), + (Float: value=f32), + (Double: value=f64), + (MethodType: index=u32), + (MethodHandle: index=u32), + (String: index=u32), + (Type: index=u32), + (Field: index=u32), + (Method: index=u32), + (Enum: index=u32), + (Boolean: value=bool), +}); + + +// -------------------------------------------------------------------- +// Annotation Element +// -------------------------------------------------------------------- +#[derive(Debug, Clone)] pub struct AnnotationElement { pub name_idx: u32, pub(crate) value: EncodedValue, } -#[derive(Debug)] +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!( + "AnnotationElement", + PyDexAnnotationElement, + AnnotationElement +); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexAnnotationElement, { + (name_idx, u32), +}, + +#[getter] +pub fn value(&self) -> PyDexEncodedValue { + let value = &self.0.value; + value.into() +} +); +// <<< end python export + +// -------------------------------------------------------------------- +// Encoded Annotation +// -------------------------------------------------------------------- +#[derive(Debug, Clone)] pub struct EncodedAnnotation { pub type_idx: u32, pub(crate) elements: Vec, } -#[derive(Debug)] +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!( + "EncodedAnnotation", + PyDexEncodedAnnotation, + EncodedAnnotation +); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexEncodedAnnotation, { + (type_idx, u32), +}, + +#[getter] +pub fn elements(&self) -> Vec { + self.0.elements.iter().map(Into::into).collect() +} +); +// <<< end python export + +// -------------------------------------------------------------------- +// Annotation Item +// -------------------------------------------------------------------- +#[derive(Debug, Clone)] pub struct AnnotationItem { pub visibility: u8, pub annotation: EncodedAnnotation, } +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("AnnotationItem", PyDexAnnotationItem, AnnotationItem); +#[cfg(feature = "python")] +rs_struct_fields!(PyDexAnnotationItem, { + (visibility, u8), +}, + +#[getter] +pub fn annotation(&self) -> PyDexEncodedAnnotation { + let a = &self.0.annotation; + a.into() +} +); +// <<< end python export + // -------------------------------------------------------------------- // Python API // -------------------------------------------------------------------- @@ -526,10 +636,11 @@ pub(crate) mod py_structs { #[pymodule_export] use super::{ - PyDexAnnotationsDirectoryItem, PyDexCallSiteIdItem, PyDexClassDef, PyDexCodeItem, - PyDexFieldAnnotationsItem, PyDexFieldId, PyDexMethodAnnotationsItem, PyDexMethodHandleItem, - PyDexMethodId, PyDexParameterAnnotationsItem, PyDexProtoId, PyDexStringId, PyDexTryItem, - PyDexTypeId, PyDexTypeItem + PyDexAnnotationElement, PyDexAnnotationItem, PyDexAnnotationsDirectoryItem, + PyDexCallSiteIdItem, PyDexClassDef, PyDexCodeItem, PyDexEncodedAnnotation, + PyDexEncodedValue, PyDexFieldAnnotationsItem, PyDexFieldId, PyDexMethodAnnotationsItem, + PyDexMethodHandleItem, PyDexMethodId, PyDexParameterAnnotationsItem, PyDexProtoId, + PyDexStringId, PyDexTryItem, PyDexTypeId, PyDexTypeItem, }; #[pymodule_export] diff --git a/src/py.rs b/src/py.rs index 797c803..e31f291 100644 --- a/src/py.rs +++ b/src/py.rs @@ -44,6 +44,7 @@ macro_rules! rs_type_wrapper { macro_rules! rs_struct_wrapper { ($name:literal, $py_type:ident, $rust_type:ident) => { #[cfg(feature = "python")] + #[derive(Debug, Clone)] #[pyo3::pyclass(name = $name, module = "dexrs._internal.structs")] pub struct $py_type(pub Arc<$rust_type>); diff --git a/src/py/code_item.rs b/src/py/code_item.rs deleted file mode 100644 index f53243e..0000000 --- a/src/py/code_item.rs +++ /dev/null @@ -1,114 +0,0 @@ -use std::sync::Arc; - -use pyo3::PyResult; - -use crate::file::{Code, CodeItemAccessor, Instruction, PyDexCode}; - -use super::rs_type_wrapper; - -rs_type_wrapper!( - CodeItemAccessor<'static>, - PyCodeItemAccessor, - RsCodeItemAccessor, - name: "CodeItemAccessor", - module: "dexrs._internal.code" -); - -#[pyo3::pymethods] -impl PyCodeItemAccessor { - #[getter] - pub fn insns_size_in_code_units(&self) -> u32 { - self.inner.0.insns_size_in_code_units() - } - - #[getter] - pub fn insns_size_in_bytes(&self) -> u32 { - self.inner.0.insns_size_in_bytes() - } - - pub fn has_code(&self) -> bool { - self.inner.0.has_code() - } - - #[getter] - pub fn code_off(&self) -> u32 { - self.inner.0.code_off() - } - - #[getter] - pub fn code_item(&self) -> PyDexCodeItem { - self.inner.0.code_item().into() - } - - #[getter] - pub fn registers_size(&self) -> u16 { - self.inner.0.registers_size() - } - - #[getter] - pub fn ins_size(&self) -> u16 { - self.inner.0.ins_size() - } - - #[getter] - pub fn outs_size(&self) -> u16 { - self.inner.0.outs_size() - } - - #[getter] - pub fn tries_size(&self) -> u16 { - self.inner.0.tries_size() - } - - pub fn insns_raw(&self) -> &[u16] { - self.inner.0.insns() - } - - pub fn inst_at(&self, pc: u32) -> PyInstruction { - self.inner.0.inst_at(pc).into() - } -} - -rs_type_wrapper!( - Instruction<'static>, - PyInstruction, - RsInstruction, - name: "Instruction", - module: "dexrs._internal.code" -); - -#[pyo3::pymethods] -impl PyInstruction { - pub fn fetch16(&self, offset: u32) -> PyResult { - Ok(self.inner.0.fetch16(offset as usize)?) - } - - pub fn fetch32(&self, offset: u32) -> PyResult { - Ok(self.inner.0.fetch32(offset as usize)?) - } - - #[staticmethod] - pub fn opcode_of(inst_data: u16) -> PyDexCode { - let opcode = Instruction::opcode_of(inst_data); - Instruction::format_desc_of(opcode).py_opcode - } - - #[staticmethod] - pub fn name_of(opcode: PyDexCode) -> &'static str { - Instruction::format_desc_of(opcode.into()).name - } -} - -// opcodes -impl Into for PyDexCode { - #[inline] - fn into(self) -> Code { - Instruction::opcode_of(self as u8 as u16) - } -} - -#[pyo3::pymodule(name = "code")] -pub mod py_code { - #[pymodule_export] - use super::{PyCodeItemAccessor, PyDexCode, PyInstruction}; -} From f79ee4cb991c910296070c873c127bfd4096c468 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Tue, 18 Feb 2025 18:47:14 +0100 Subject: [PATCH 34/46] Added parsing support for complex opcodes --- + Fuzzing tests for complex instructions + Two new errors regarding complex instructions + Rename Python API candidate of Instruction to [Py|Rs]DexInstruction + Fix lifetime issue with CodeItemAccessor and Python API + Added Python stubs for vreg sunmodule and added struct definitions --- fuzz/Cargo.toml | 4 + fuzz/fuzz_targets/instructions.rs | 23 +- python/dexrs/_internal/code/__init__.pyi | 19 +- python/dexrs/_internal/code/vreg.pyi | 20 ++ python/dexrs/_internal/file.pyi | 4 +- python/dexrs/code.py | 1 + python/tests/test_dex_parse.py | 12 +- src/error.rs | 13 + src/file/code_item_accessors.rs | 9 +- src/file/dump.rs | 12 +- src/file/instruction.rs | 325 ++++++++++++++++++++++- src/file/mod.rs | 2 +- src/file/modifiers.rs | 2 +- src/file/structs.rs | 4 +- src/py.rs | 21 +- src/py/file.rs | 14 +- 16 files changed, 458 insertions(+), 27 deletions(-) create mode 100644 python/dexrs/_internal/code/vreg.pyi diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index ef07ad9..000140a 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -14,6 +14,10 @@ plain = "0.2.3" [dependencies.dexrs] path = ".." +[features] +default = [] +python = ["dexrs/python"] + [[bin]] name = "from_raw_parts" path = "fuzz_targets/from_raw_parts.rs" diff --git a/fuzz/fuzz_targets/instructions.rs b/fuzz/fuzz_targets/instructions.rs index 340e0e2..075e618 100644 --- a/fuzz/fuzz_targets/instructions.rs +++ b/fuzz/fuzz_targets/instructions.rs @@ -4,7 +4,10 @@ #![no_main] #![allow(non_snake_case)] -use dexrs::file::DexInstructionIterator; +use dexrs::file::{ + ComplexFromInst, DexInstructionIterator, FillArrayDataPayload, Instruction, + PackedSwitchPayload, SparseSwitchPayload, +}; extern crate dexrs; extern crate libfuzzer_sys; @@ -19,5 +22,21 @@ libfuzzer_sys::fuzz_target!(|data: &[u8]| { assert!(inst_dump.len() > 0); } } + + // specifically target complex opcodes + // REVISIT: this check should be done when creating a new instruction + if bytes.len() >= 1 { + let inst = Instruction::at(bytes); + // these parsing methods MUST withstand random data + if let Ok(payload) = PackedSwitchPayload::from_inst(&inst) { + let _ = payload; + } + if let Ok(payload) = SparseSwitchPayload::from_inst(&inst) { + let _ = payload; + } + if let Ok(payload) = FillArrayDataPayload::from_inst(&inst) { + let _ = payload; + } + } } -}); \ No newline at end of file +}); diff --git a/python/dexrs/_internal/code/__init__.pyi b/python/dexrs/_internal/code/__init__.pyi index d2fc59a..e930be3 100644 --- a/python/dexrs/_internal/code/__init__.pyi +++ b/python/dexrs/_internal/code/__init__.pyi @@ -1,6 +1,7 @@ -from typing import List +from typing import List, Optional from ..structs import CodeItem +from ..file import DexFile class CodeItemAccessor: code_off: int @@ -49,6 +50,22 @@ class Instruction: def get_flags_of(inst_data: int) -> int: ... @staticmethod def get_index_type_of(inst_data: int) -> IndexType: ... + def to_string(self, dex_file: Optional[DexFile] = ...) -> str: ... + +class FillArrayDataPayload: + data: bytes + element_count: int + element_size: int + +class SparseSwitchPayload: + keys: List[int] + targets: List[int] + case_count: int + +class PackedSwitchPayload: + first_key: List[int] + targets: List[int] + case_count: int class Format: k10x: Format diff --git a/python/dexrs/_internal/code/vreg.pyi b/python/dexrs/_internal/code/vreg.pyi new file mode 100644 index 0000000..95daf9d --- /dev/null +++ b/python/dexrs/_internal/code/vreg.pyi @@ -0,0 +1,20 @@ +from . import ( + Instruction, + PackedSwitchPayload, + FillArrayDataPayload, + SparseSwitchPayload, +) + +def has_a(inst: Instruction) -> bool: ... +def has_b(inst: Instruction) -> bool: ... +def has_c(inst: Instruction) -> bool: ... +def has_h(inst: Instruction) -> bool: ... +def A(inst: Instruction) -> int: ... +def B(inst: Instruction) -> int: ... +def C(inst: Instruction) -> int: ... +def H(inst: Instruction) -> int: ... +def has_wide_b(inst: Instruction) -> bool: ... +def wide_b(inst: Instruction) -> int: ... +def array_data(inst: Instruction) -> FillArrayDataPayload: ... +def packed_switch(inst: Instruction) -> PackedSwitchPayload: ... +def sparse_switch(inst: Instruction) -> SparseSwitchPayload: ... diff --git a/python/dexrs/_internal/file.pyi b/python/dexrs/_internal/file.pyi index 5f9d487..d59f4e0 100644 --- a/python/dexrs/_internal/file.pyi +++ b/python/dexrs/_internal/file.pyi @@ -12,6 +12,7 @@ from .structs import ( TypeItem, ) from .class_accessor import ClassAccessor +from .code import CodeItemAccessor class VerifyPreset: ALL: VerifyPreset @@ -80,4 +81,5 @@ class DexFile: def get_interfaces_list(self, class_def: ClassDef) -> Optional[List[TypeItem]]: ... # class data - def get_class_accessor(self, class_def: ClassDef) -> Optional[ClassAccessor]: ... \ No newline at end of file + def get_class_accessor(self, class_def: ClassDef) -> Optional[ClassAccessor]: ... + def get_code_item_accessor(self, code_off: int) -> CodeItemAccessor: ... \ No newline at end of file diff --git a/python/dexrs/code.py b/python/dexrs/code.py index 8793fb6..0322bf2 100644 --- a/python/dexrs/code.py +++ b/python/dexrs/code.py @@ -11,6 +11,7 @@ verify_flags = rust_code.verify_flags flags = rust_code.flags signatures = rust_code.signatures +vreg = rust_code.vreg __all__ = [ "CodeItemAccessor", diff --git a/python/tests/test_dex_parse.py b/python/tests/test_dex_parse.py index 5d03769..68a1548 100644 --- a/python/tests/test_dex_parse.py +++ b/python/tests/test_dex_parse.py @@ -6,7 +6,6 @@ from . import _util - def test_parse_invalid_dex() -> None: with pytest.raises(PyDexError): data = dexrs.container.InMemoryDexContainer(b"...") @@ -18,4 +17,15 @@ def test_parse_valid_dex() -> None: data = dexrs.container.FileDexContainer(str(path)) dex = dexrs.DexFile.from_file(data) + for i in range(dex.num_class_defs()): + class_def = dex.get_class_def(i) + class_a = dex.get_class_accessor(class_def) + for method in class_a.get_methods(): + if method.code_offset == 0: + continue + + code_a = dex.get_code_item_accessor(method.code_offset) + for insn in code_a.insns(): + pass + assert dex.get_header().version_int == 35 diff --git a/src/error.rs b/src/error.rs index 59ae59c..f817dcf 100644 --- a/src/error.rs +++ b/src/error.rs @@ -177,6 +177,19 @@ pub enum DexError { "Got invalid mUTF8 encoded string that encodes up to {idx} characters with only {len} bytes" )] MalformedMUTF8Sequence { idx: usize, len: usize }, + + #[error("Invalid complex instruction({opcode}): requested length{req_size} exceeds max size of {max_size} in bytecode stream")] + ComplexInstructionError { + opcode: &'static str, + req_size: usize, + max_size: usize, + }, + + #[error("Invalid instruction({opcode}): requested access to complex instruction {target} with non-complex instruction data")] + BadComplexInstructionAccess { + opcode: &'static str, + target: &'static str, + }, } #[macro_export] diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index 33ce9c7..9e423ae 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -11,11 +11,12 @@ use crate::Result; use super::{CodeItem, DexContainer, DexFile, Instruction}; #[cfg(feature = "python")] -use super::{PyDexCodeItem, PyInstruction}; +use super::{PyDexCodeItem, PyDexInstruction}; // ---------------------------------------------------------------------------- // CodeItemAccessor // ---------------------------------------------------------------------------- +#[derive(Debug, Clone)] pub struct CodeItemAccessor<'a> { code_off: u32, code_item: &'a CodeItem, @@ -45,7 +46,7 @@ impl<'a> CodeItemAccessor<'a> { #[inline(always)] pub fn from_code_item( - dex: &'a DexFile<'a, C>, + dex: &DexFile<'a, C>, code_item: &'a CodeItem, code_off: u32, ) -> Result> @@ -180,12 +181,12 @@ impl PyCodeItemAccessor { self.inner.0.insns() } - pub fn inst_at(&self, pc: u32) -> PyInstruction { + pub fn inst_at(&self, pc: u32) -> PyDexInstruction { self.inner.0.inst_at(pc).into() } // REVISIT: dex_pc is unused here - pub fn insns(&self) -> PyResult> { + pub fn insns(&self) -> PyResult> { Ok(DexInstructionIterator::new(self.inner.0.insns) .map(Into::into) .collect()) diff --git a/src/file/dump.rs b/src/file/dump.rs index 40d2608..97ec82c 100644 --- a/src/file/dump.rs +++ b/src/file/dump.rs @@ -87,9 +87,10 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } pub fn pretty_utf16_at(&self, idx: u32) -> String { - self.pretty_utf16(&StringId { - string_data_off: idx, - }) + match self.get_string_id(idx) { + Ok(str_data) => self.pretty_utf16(&str_data), + Err(_) => format!("<>", idx), + } } pub fn pretty_method_at(&self, method_idx: u32, opts: prettify::Method) -> String { @@ -141,7 +142,10 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } impl<'a> Instruction<'a> { - pub fn to_string(&self, dex_file: Option<&DexFile<'_>>) -> Result { + pub fn to_string(&self, dex_file: Option<&DexFile<'a, C>>) -> Result + where + C: DexContainer<'a>, + { let opcode = self.name(); Ok(match self.format() { &Format::k10x => format!("{opcode}"), diff --git a/src/file/instruction.rs b/src/file/instruction.rs index f7c5d1d..16e7ffa 100644 --- a/src/file/instruction.rs +++ b/src/file/instruction.rs @@ -1,6 +1,9 @@ +use core::slice; #[cfg(feature = "python")] use std::sync::Arc; +use plain::Plain; + #[cfg(feature = "python")] use crate::py::rs_type_wrapper; @@ -9,6 +12,7 @@ use crate::{dex_err, error::DexError, Result}; // ---------------------------------------------------------------------------- // Instruction // ---------------------------------------------------------------------------- +#[repr(C)] pub struct Instruction<'a>(&'a [u16]); impl<'a> Instruction<'a> { @@ -110,21 +114,27 @@ impl<'a> Instruction<'a> { code_flags::Custom } } + + fn as_bytes(&self) -> &'a [u8] { + // since [u16] will always be a multiple of [u8], we can "safely" + // cast it + unsafe { slice::from_raw_parts(self.0.as_ptr() as *const u8, self.0.len() * 2) } + } } // >>> begin python export #[cfg(feature = "python")] rs_type_wrapper!( Instruction<'static>, - PyInstruction, - RsInstruction, + PyDexInstruction, + RsDexInstruction, name: "Instruction", module: "dexrs._internal.code" ); #[cfg(feature = "python")] #[pyo3::pymethods] -impl PyInstruction { +impl PyDexInstruction { pub fn fetch16(&self, offset: u32) -> pyo3::PyResult { Ok(self.inner.0.fetch16(offset as usize)?) } @@ -189,13 +199,30 @@ impl PyInstruction { self.inner.0.verify_flags() } - pub fn next(&self) -> Option { + pub fn next(&self) -> Option { self.inner.0.next().map(Into::into) } pub fn size_in_code_units(&self) -> usize { self.inner.0.size_in_code_units() } + + #[pyo3(signature = (py_dex_file=None))] + pub fn to_string<'py>( + &self, + py: pyo3::Python<'py>, + py_dex_file: Option>, + ) -> pyo3::PyResult { + if py_dex_file.is_none() { + return Ok(self.inner.0.to_string::<&[u8]>(None)?); + } + Ok(match &py_dex_file.unwrap().try_borrow(py)?.inner.as_ref() { + crate::py::file::RsDexFile::InMemory { dex, .. } => { + self.inner.0.to_string(Some(dex))? + } + crate::py::file::RsDexFile::File { dex, .. } => self.inner.0.to_string(Some(dex))?, + }) + } } // <<< end python export @@ -412,7 +439,7 @@ define_flags!( impl<'a> Instruction<'a> { #[inline(always)] const fn format_desc(&self) -> &'static InstructionDescriptor { - &Instruction::INSN_DESCRIPTORS[(self.0[0] & 0xFF) as usize] + &Instruction::INSN_DESCRIPTORS[(self.0[0] as u8 & 0xFF) as usize] } #[inline(always)] @@ -469,6 +496,204 @@ impl<'a> Instruction<'a> { } } +pub(crate) trait ComplexFromInst: Plain + Sized { + fn from_inst<'a>(inst: &Instruction<'a>) -> Result<&'a Self> { + let size_in_code_units = inst.size_in_code_units(); + if size_in_code_units >= inst.0.len() { + return dex_err!(ComplexInstructionError { + opcode: inst.name(), + req_size: size_in_code_units, + max_size: inst.0.len() + }); + } + + let size_in_bytes = size_in_code_units * 2; + let inst_data: &[u8] = &inst.as_bytes()[..size_in_bytes]; + Ok(match Plain::from_bytes(inst_data) { + Ok(payload) => payload, + Err(_) => { + return dex_err!(ComplexInstructionError { + opcode: inst.name(), + req_size: size_in_code_units, + max_size: inst.0.len() + }); + } + }) + } +} + +// ---------------------------------------------------------------------------- +// Packed Switch +// ---------------------------------------------------------------------------- +#[repr(C)] +#[derive(Debug, Clone)] +pub struct PackedSwitchPayload<'a> { + pub ident: u16, + pub case_count: u16, + pub first_key: i32, + pub targets: &'a [i32], +} + +unsafe impl plain::Plain for PackedSwitchPayload<'_> {} + +impl ComplexFromInst for PackedSwitchPayload<'_> {} + +// >>> begin python export +#[cfg(feature = "python")] +#[pyo3::pyclass(name = "PackedSwitchPayload", module = "dexrs._internal.code")] +pub struct PyDexPackedSwitchPayload { + pub ident: u16, + pub case_count: u16, + pub first_key: i32, + pub targets: Vec, +} + +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyDexPackedSwitchPayload { + #[getter] + pub fn targets(&self) -> &[i32] { + &self.targets + } + + #[getter] + pub fn first_key(&self) -> i32 { + self.first_key + } + + #[getter] + pub fn case_count(&self) -> u16 { + self.case_count + } +} + +#[cfg(feature = "python")] +impl From<&'_ PackedSwitchPayload<'_>> for PyDexPackedSwitchPayload { + fn from(payload: &'_ PackedSwitchPayload) -> PyDexPackedSwitchPayload { + PyDexPackedSwitchPayload { + ident: payload.ident, + case_count: payload.case_count, + first_key: payload.first_key, + targets: payload.targets.to_vec(), + } + } +} +// <<< end python export + +// ---------------------------------------------------------------------------- +// Sparse Switch +// ---------------------------------------------------------------------------- +#[repr(C)] +#[derive(Debug, Clone)] +pub struct SparseSwitchPayload<'a> { + pub ident: u16, + pub case_count: u16, + pub keys_and_targets: &'a [i32], +} + +unsafe impl plain::Plain for SparseSwitchPayload<'_> {} + +impl ComplexFromInst for SparseSwitchPayload<'_> {} + +// >>> begin python export +#[cfg(feature = "python")] +#[pyo3::pyclass(name = "SparseSwitchPayload", module = "dexrs._internal.code")] +pub struct PyDexSparseSwitchPayload { + pub ident: u16, + pub case_count: u16, + pub keys_and_targets: Vec, +} + +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyDexSparseSwitchPayload { + #[getter] + pub fn keys(&self) -> Vec { + self.keys_and_targets[..self.case_count as usize].to_vec() + } + + #[getter] + pub fn targets(&self) -> &[i32] { + &self.keys_and_targets[self.case_count as usize..] + } + + #[getter] + pub fn case_count(&self) -> u16 { + self.case_count + } +} + +#[cfg(feature = "python")] +impl From<&'_ SparseSwitchPayload<'_>> for PyDexSparseSwitchPayload { + fn from(payload: &'_ SparseSwitchPayload<'_>) -> Self { + PyDexSparseSwitchPayload { + ident: payload.ident, + case_count: payload.case_count, + keys_and_targets: payload.keys_and_targets.to_vec(), + } + } +} + +// <<< end python export + +// ---------------------------------------------------------------------------- +// Fill Array Data +// ---------------------------------------------------------------------------- +#[repr(C)] +#[derive(Debug, Clone)] +pub struct FillArrayDataPayload<'a> { + pub ident: u16, + pub element_width: u16, + pub element_count: u32, + pub data: &'a [u8], +} + +unsafe impl plain::Plain for FillArrayDataPayload<'_> {} + +impl ComplexFromInst for FillArrayDataPayload<'_> {} + +// >>> begin python export +#[cfg(feature = "python")] +#[pyo3::pyclass(name = "FillArrayDataPayload", module = "dexrs._internal.code")] +pub struct PyDexFillArrayDataPayload { + pub ident: u16, + pub element_width: u16, + pub element_count: u32, + pub data: Vec, +} + +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyDexFillArrayDataPayload { + #[getter] + pub fn data(&self) -> Vec { + self.data.clone() + } + + #[getter] + pub fn element_count(&self) -> u32 { + self.element_count + } + + #[getter] + pub fn element_width(&self) -> u16 { + self.element_width + } +} + +#[cfg(feature = "python")] +impl From<&'_ FillArrayDataPayload<'_>> for PyDexFillArrayDataPayload { + fn from(payload: &'_ FillArrayDataPayload) -> PyDexFillArrayDataPayload { + PyDexFillArrayDataPayload { + ident: payload.ident, + element_width: payload.element_width, + element_count: payload.element_count, + data: payload.data.to_vec(), + } + } +} +// <<< end python export + pub struct VarArgs { pub count: u8, pub arg: Vec, @@ -506,6 +731,42 @@ pub mod vreg { Ok((inst.fetch16(0)? >> 12) as u8) } + //------------------------------------------------------------------------------ + // complex instructions + //------------------------------------------------------------------------------ + #[inline] + pub fn sparse_switch<'a>(inst: &'a Instruction<'a>) -> Result<&'a SparseSwitchPayload<'a>> { + match inst.0[0] { + signatures::SparseSwitchSignature => Ok(SparseSwitchPayload::from_inst(inst)?), + _ => dex_err!(BadComplexInstructionAccess { + opcode: inst.name(), + target: std::any::type_name::() + }), + } + } + + #[inline] + pub fn packed_switch<'a>(inst: &'a Instruction<'a>) -> Result<&'a PackedSwitchPayload<'a>> { + match inst.0[0] { + signatures::PackedSwitchSignature => Ok(PackedSwitchPayload::from_inst(inst)?), + _ => dex_err!(BadComplexInstructionAccess { + opcode: inst.name(), + target: std::any::type_name::() + }), + } + } + + #[inline] + pub fn array_data<'a>(inst: &'a Instruction<'a>) -> Result<&'a FillArrayDataPayload<'a>> { + match inst.0[0] { + signatures::ArrayDataSignature => Ok(FillArrayDataPayload::from_inst(inst)?), + _ => dex_err!(BadComplexInstructionAccess { + opcode: inst.name(), + target: std::any::type_name::() + }), + } + } + //------------------------------------------------------------------------------ // VRegA //------------------------------------------------------------------------------ @@ -789,6 +1050,53 @@ pub mod vreg { } } +#[cfg(feature = "python")] +macro_rules! define_vreg_mod { + ({ $($name:ident -> $ret_ty:ty,)* }, {$($name_result:ident -> $py_ret_ty:ty,)*}) => { + #[cfg(feature = "python")] + #[allow(non_snake_case)] + #[pyo3::pymodule(name = "vreg")] + mod py_vreg { + use super::*; + use pyo3::{Py, PyResult, Python}; + + $( + #[pyo3::pyfunction] + pub fn $name<'py>(py: Python<'py>, py_inst: Py) -> PyResult<$ret_ty> { + let rs_inst = &py_inst.try_borrow(py)?.inner; + Ok(vreg::$name(&rs_inst.as_ref().0)) + } + )* + + $( + #[pyo3::pyfunction] + pub fn $name_result<'py>(py: Python<'py>, py_inst: Py) -> PyResult<$py_ret_ty> { + let rs_inst = &py_inst.try_borrow(py)?.inner; + Ok(vreg::$name_result(&rs_inst.as_ref().0)?.into()) + } + )* + } + }; +} + +#[cfg(feature = "python")] +define_vreg_mod!({ + has_a -> bool, + has_b -> bool, + has_c -> bool, + has_h -> bool, + has_wide_b -> bool, +}, { + A -> i32, + B -> i32, + C -> i32, + H -> i32, + wide_b -> u64, + array_data -> PyDexFillArrayDataPayload, + sparse_switch -> PyDexSparseSwitchPayload, + packed_switch -> PyDexPackedSwitchPayload, +}); + //////////////////////////////////////////////////////////////////////////////////////////////////////////// // instruction descriptors //////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1126,13 +1434,16 @@ insn_desc_table!( #[pyo3::pymodule(name = "code")] pub(crate) mod py_code { #[pymodule_export] - use super::{PyDexCode, PyDexFormat, PyDexIndexType, PyInstruction}; + use super::{ + PyDexCode, PyDexFillArrayDataPayload, PyDexFormat, PyDexIndexType, PyDexInstruction, + PyDexPackedSwitchPayload, PyDexSparseSwitchPayload, + }; #[pymodule_export] use crate::file::PyCodeItemAccessor; // constants #[pymodule_export] - use super::{py_code_flags, py_flags, py_signatures, py_verify_flags}; + use super::{py_code_flags, py_flags, py_signatures, py_verify_flags, py_vreg}; } // <<< end python module export diff --git a/src/file/mod.rs b/src/file/mod.rs index ddc1718..8273e9f 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -368,7 +368,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } #[inline(always)] - pub fn get_code_item_accessor(&'a self, offset: u32) -> Result> { + pub fn get_code_item_accessor(&self, offset: u32) -> Result> { check_lt_result!(offset, self.file_size(), "code item offset"); let code_item = self.non_null_data_ptr(offset)?; CodeItemAccessor::from_code_item( diff --git a/src/file/modifiers.rs b/src/file/modifiers.rs index 8a7ee95..2315a9a 100644 --- a/src/file/modifiers.rs +++ b/src/file/modifiers.rs @@ -31,5 +31,5 @@ pub const ACC_ANNOTATION: u32 = 0x2000; // class, ic (1.5) pub const ACC_ENUM: u32 = 0x4000; // class, field, ic (1.5) pub const ACC_CONSTRUCTOR: u32 = 0x00010000; // method (dex only) <(cl)init> -pub const ACC_DECLARED_SYNCHRONIZED: u32 = 0x00020000; // method (dex only) +pub const ACC_DECLARED_SYNCHRONIZED: u32 = 0x00020000; // method (dex only) pub const ACC_CLASSISPROXY: u32 = 0x00040000; // class (dex only) \ No newline at end of file diff --git a/src/file/structs.rs b/src/file/structs.rs index ec4a9ed..15a653c 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -509,7 +509,8 @@ macro_rules! define_encoded_value { fn from(value: &EncodedValue) -> Self { match value { $( - EncodedValue::$primitive_name(value) => PyDexEncodedValue::$primitive_name { $primitive_py_name: *value }, + EncodedValue::$primitive_name(value) => + PyDexEncodedValue::$primitive_name { $primitive_py_name: *value }, )* EncodedValue::Array(v) => PyDexEncodedValue::Array { elements: v.iter().map(Into::into).collect(), @@ -543,7 +544,6 @@ define_encoded_value!({ (Boolean: value=bool), }); - // -------------------------------------------------------------------- // Annotation Element // -------------------------------------------------------------------- diff --git a/src/py.rs b/src/py.rs index e31f291..2818305 100644 --- a/src/py.rs +++ b/src/py.rs @@ -74,8 +74,25 @@ macro_rules! rs_struct_fields { )* } }; + ($py_type:ident, $inner:tt, { $(($name:ident, $rtype:ty),)+ }, $($extra:tt)*) => { + #[cfg(feature = "python")] + #[pyo3::pymethods] + impl $py_type { + $( + #[getter] + pub fn $name(&self) -> $rtype { + self.$inner.0.$name + } + )+ + + $( + $extra + )* + } + }; + } -pub(crate) use rs_type_wrapper; +pub(crate) use rs_struct_fields; pub(crate) use rs_struct_wrapper; -pub(crate) use rs_struct_fields; \ No newline at end of file +pub(crate) use rs_type_wrapper; diff --git a/src/py/file.rs b/src/py/file.rs index b6bb278..e9a8215 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use pyo3::{exceptions::PyValueError, Py, PyResult, Python}; +use crate::file::PyCodeItemAccessor; use crate::file::{ verifier::VerifyPreset, DexFile, DexLocation, FieldIndex, ProtoIndex, PyDexClassDef, PyDexFieldId, PyDexHeader, PyDexMethodId, PyDexProtoId, PyDexStringId, PyDexTypeId, @@ -49,7 +50,7 @@ pub enum RsDexFile { // Python wrapper class that enables mutli-threading operations #[pyo3::pyclass(name = "DexFile", module = "dexrs._internal.file")] pub struct PyDexFileImpl { - inner: Arc, + pub(crate) inner: Arc, } macro_rules! bind_dex { @@ -373,6 +374,17 @@ impl PyDexFileImpl { Ok(dex_action_impl!(self, get_class_accessor?, class_def, py).map(Into::into)) } + // ---------------------------------------------------------------------------- + // code item accessor + // ---------------------------------------------------------------------------- + pub fn get_code_item_accessor<'py>( + &self, + py: Python<'py>, + code_offset: u32, + ) -> PyResult { + Ok(dex_action_impl!(self, get_code_item_accessor?, code_offset, py).into()) + } + // ---------------------------------------------------------------------------- // string data // ---------------------------------------------------------------------------- From b6ce9eea4ae87887f68df274ad4a32803337c0c6 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Tue, 18 Feb 2025 18:53:47 +0100 Subject: [PATCH 35/46] Add complex instruction to pretty dump --- + This should close #3 Fixes #3 --- src/file/dump.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/file/dump.rs b/src/file/dump.rs index 97ec82c..e785210 100644 --- a/src/file/dump.rs +++ b/src/file/dump.rs @@ -5,7 +5,8 @@ use crate::{ }; use super::{ - vreg, Code, DexContainer, DexFile, FieldId, Format, Instruction, MethodId, StringId, TypeId, + signatures, vreg, Code, DexContainer, DexFile, FieldId, Format, Instruction, MethodId, + StringId, TypeId, }; pub mod prettify { @@ -147,6 +148,16 @@ impl<'a> Instruction<'a> { C: DexContainer<'a>, { let opcode = self.name(); + if self.opcode() == Code::NOP { + return Ok((match self.fetch16(0)? { + signatures::ArrayDataSignature => "array-data", + signatures::PackedSwitchSignature => "packed-switch", + signatures::SparseSwitchSignature => "sparse-switch", + _ => opcode, + }) + .to_string()); + } + Ok(match self.format() { &Format::k10x => format!("{opcode}"), Format::k12x => format!("{opcode} v{}, v{}", vreg::A(self)?, vreg::B(self)?), From 1a729111411fd30dbc1afa25b80268b32e82f338 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Fri, 21 Feb 2025 20:15:59 +0100 Subject: [PATCH 36/46] Add support for TryItem and EncodedCatchHandler --- + Removed unnecessary import from dex_basic_ops.rs + Added new fuzzing test for code items + Added CatchHandlerData parser to Rust and Python API + New Error to reflect changes in dependencies (+leb128fmt) for signed variable integers + Added leb128 Python API --- Cargo.toml | 1 + examples/dex_basic_ops.rs | 1 - fuzz/Cargo.toml | 8 ++ fuzz/fuzz_targets/code_item.rs | 15 +++ fuzz/fuzz_targets/instructions.rs | 13 +-- python/dexrs/_internal/file.pyi | 9 +- python/dexrs/_internal/leb128.pyi | 3 + python/dexrs/_internal/structs.pyi | 6 ++ python/dexrs/leb128.py | 12 +++ src/error.rs | 3 + src/file/code_item_accessors.rs | 93 +++++++++++++++- src/file/mod.rs | 52 +++++++-- src/file/structs.rs | 36 ++++++- src/leb128.rs | 25 ++--- src/py.rs | 11 +- src/py/file.rs | 165 +++++++++++++++++++---------- 16 files changed, 359 insertions(+), 94 deletions(-) create mode 100644 fuzz/fuzz_targets/code_item.rs create mode 100644 python/dexrs/_internal/leb128.pyi create mode 100644 python/dexrs/leb128.py diff --git a/Cargo.toml b/Cargo.toml index 54a27de..07b38f9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] adler32 = "1.2.0" +leb128fmt = "0.1.0" memmap2 = "0.9.5" openssl = "0.10.64" plain = "0.2.3" diff --git a/examples/dex_basic_ops.rs b/examples/dex_basic_ops.rs index 41fa6eb..4fa6bcb 100644 --- a/examples/dex_basic_ops.rs +++ b/examples/dex_basic_ops.rs @@ -3,7 +3,6 @@ use dexrs::file::dump::prettify; use dexrs::file::DexFile; use dexrs::Result; -use openssl::string; fn dex_get_method(dex: &DexFile<'_>) -> Result<()> { // the DexFile struct does not provide an interface to query all diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 000140a..0f0c15d 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -52,3 +52,11 @@ path = "fuzz_targets/mutf8.rs" test = false doc = false bench = false + + +[[bin]] +name = "code_item" +path = "fuzz_targets/code_item.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_targets/code_item.rs b/fuzz/fuzz_targets/code_item.rs new file mode 100644 index 0000000..d630289 --- /dev/null +++ b/fuzz/fuzz_targets/code_item.rs @@ -0,0 +1,15 @@ +#![no_main] +#![allow(non_snake_case)] + +use std::hint::black_box; + +use dexrs::file::EncodedCatchHandlerIterator; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if let Ok(iterator) = EncodedCatchHandlerIterator::new(data) { + for handler in iterator { + let _ = black_box(handler); + } + } +}); diff --git a/fuzz/fuzz_targets/instructions.rs b/fuzz/fuzz_targets/instructions.rs index 075e618..99dc9ec 100644 --- a/fuzz/fuzz_targets/instructions.rs +++ b/fuzz/fuzz_targets/instructions.rs @@ -4,10 +4,7 @@ #![no_main] #![allow(non_snake_case)] -use dexrs::file::{ - ComplexFromInst, DexInstructionIterator, FillArrayDataPayload, Instruction, - PackedSwitchPayload, SparseSwitchPayload, -}; +use dexrs::file::{vreg, DexInstructionIterator, Instruction}; extern crate dexrs; extern crate libfuzzer_sys; @@ -18,7 +15,7 @@ libfuzzer_sys::fuzz_target!(|data: &[u8]| { // Two aspects let iter = DexInstructionIterator::new(bytes); for inst in iter { - if let Ok(inst_dump) = inst.to_string(None) { + if let Ok(inst_dump) = inst.to_string::<&[u8]>(None) { assert!(inst_dump.len() > 0); } } @@ -28,13 +25,13 @@ libfuzzer_sys::fuzz_target!(|data: &[u8]| { if bytes.len() >= 1 { let inst = Instruction::at(bytes); // these parsing methods MUST withstand random data - if let Ok(payload) = PackedSwitchPayload::from_inst(&inst) { + if let Ok(payload) = vreg::packed_switch(&inst) { let _ = payload; } - if let Ok(payload) = SparseSwitchPayload::from_inst(&inst) { + if let Ok(payload) = vreg::sparse_switch(&inst) { let _ = payload; } - if let Ok(payload) = FillArrayDataPayload::from_inst(&inst) { + if let Ok(payload) = vreg::array_data(&inst) { let _ = payload; } } diff --git a/python/dexrs/_internal/file.pyi b/python/dexrs/_internal/file.pyi index d59f4e0..17f7f83 100644 --- a/python/dexrs/_internal/file.pyi +++ b/python/dexrs/_internal/file.pyi @@ -10,6 +10,8 @@ from .structs import ( MethodId, ClassDef, TypeItem, + CatchHandlerData, + TryItem, ) from .class_accessor import ClassAccessor from .code import CodeItemAccessor @@ -82,4 +84,9 @@ class DexFile: # class data def get_class_accessor(self, class_def: ClassDef) -> Optional[ClassAccessor]: ... - def get_code_item_accessor(self, code_off: int) -> CodeItemAccessor: ... \ No newline at end of file + def get_code_item_accessor(self, code_off: int) -> CodeItemAccessor: ... + def get_try_items(self, ca: CodeItemAccessor) -> List[TryItem]: ... + def get_catch_handlers(self, ca: CodeItemAccessor, try_item: TryItem) -> List[CatchHandlerData]: ... + def get_catch_handlers_at( + self, ca: CodeItemAccessor, offset: int + ) -> List[CatchHandlerData]: ... diff --git a/python/dexrs/_internal/leb128.pyi b/python/dexrs/_internal/leb128.pyi new file mode 100644 index 0000000..20a8432 --- /dev/null +++ b/python/dexrs/_internal/leb128.pyi @@ -0,0 +1,3 @@ +def decode_uleb128(data: bytes) -> int: ... +def decode_sleb128(data: bytes) -> int: ... +def decode_leb128p1(data: bytes) -> int: ... diff --git a/python/dexrs/_internal/structs.pyi b/python/dexrs/_internal/structs.pyi index 24454e9..e680312 100644 --- a/python/dexrs/_internal/structs.pyi +++ b/python/dexrs/_internal/structs.pyi @@ -76,6 +76,12 @@ class TryItem: insn_count: int handler_off: int +class CatchHandlerData: + type_idx: int + address: int + + def is_catch_all(self) -> bool: ... + class AnnotationsDirectoryItem: class_annotations_off: int fields_size: int diff --git a/python/dexrs/leb128.py b/python/dexrs/leb128.py new file mode 100644 index 0000000..1e603af --- /dev/null +++ b/python/dexrs/leb128.py @@ -0,0 +1,12 @@ +from dexrs._internal import leb128 as rust_leb128 + + +decode_uleb128 = rust_leb128.decode_uleb128 +decode_sleb128 = rust_leb128.decode_sleb128 +decode_leb128p1 = rust_leb128.decode_leb128p1 + +__all__ = [ + "decode_uleb128", + "decode_sleb128", + "decode_leb128p1", +] \ No newline at end of file diff --git a/src/error.rs b/src/error.rs index f817dcf..0be95b7 100644 --- a/src/error.rs +++ b/src/error.rs @@ -90,6 +90,9 @@ pub enum DexError { #[error("Failed to parse varint: {0}")] VarIntError(#[from] varint_simd::VarIntDecodeError), + #[error("Failed to parse signed varint: {0}")] + SignedVarIntError(#[from] leb128fmt::Error), + #[error("Bad string data({offset}) contains invalid LEB128({kind:?}) which can't be converted to a valid u32")] BadStringData { offset: usize, diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index 9e423ae..ce0142a 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -6,9 +6,9 @@ use std::sync::Arc; #[cfg(feature = "python")] use crate::py::rs_type_wrapper; -use crate::Result; +use crate::{leb128, Result}; -use super::{CodeItem, DexContainer, DexFile, Instruction}; +use super::{CatchHandlerData, CodeItem, DexContainer, DexFile, Instruction, TryItem, TypeIndex}; #[cfg(feature = "python")] use super::{PyDexCodeItem, PyDexInstruction}; @@ -39,6 +39,29 @@ impl<'a> CodeItemAccessor<'a> { self.insns } + #[inline] + pub fn get_tries_off(&self) -> Option { + if self.tries_size() == 0 { + return None; // + } + + let offset = (self.code_off() as usize) + + std::mem::size_of::() + + self.insns_size_in_code_units() as usize; + // must be 4-byte aligned + let offset = (offset + 3) & !3; + Some(offset) + } + + #[inline] + pub fn get_catch_handler_data_off(&self) -> usize { + let tries_off = self.code_off() as usize + + std::mem::size_of::() + + self.insns_size_in_code_units() as usize; + + tries_off + (self.tries_size() as usize * std::mem::size_of::()) + } + #[inline] pub fn has_code(&self) -> bool { !self.insns.is_empty() @@ -244,3 +267,69 @@ impl<'a> Iterator for DexInstructionIterator<'a> { } } } + +// ---------------------------------------------------------------------------- +// EncodedCatchHandler Iterator +// ---------------------------------------------------------------------------- + +pub struct EncodedCatchHandlerIterator<'a> { + data: &'a [u8], + offset: usize, + catch_all: bool, + remaining: i32, +} + +impl<'a> EncodedCatchHandlerIterator<'a> { + pub fn new(data: &'a [u8]) -> Result { + let mut pos = 0; + let remaining = leb128::decode_sleb128(&data, &mut pos)?; + Ok(Self { + data, + offset: pos, + catch_all: remaining <= 0, + remaining: if remaining <= 0 { + remaining + } else { + -remaining + }, + }) + } +} + +impl<'a> Iterator for EncodedCatchHandlerIterator<'a> { + type Item = CatchHandlerData; + + fn next(&mut self) -> Option { + if self.remaining == -1 { + return None; + } + + let mut handler = CatchHandlerData::default(); + if self.remaining > 0 { + match leb128::decode_leb128_off::(&self.data, &mut self.offset) { + Ok(v) => handler.type_idx = v as TypeIndex, + Err(_) => return None, + }; + match leb128::decode_leb128_off::(&self.data, &mut self.offset) { + Ok(v) => handler.address = v, + Err(_) => return None, + } + self.remaining -= 1; + return Some(handler); + } + + if self.catch_all { + handler.is_catch_all = true; + handler.type_idx = TypeIndex::MAX; + match leb128::decode_leb128_off::(&self.data, &mut self.offset) { + Ok(v) => handler.address = v, + Err(_) => return None, + } + self.catch_all = false; + return Some(handler); + } + + self.remaining = -1; + None + } +} diff --git a/src/file/mod.rs b/src/file/mod.rs index 8273e9f..b8414ab 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -557,14 +557,15 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { //------------------------------------------------------------------------------ // TryItem //------------------------------------------------------------------------------ - pub fn get_try_item(&'a self, ca: &CodeItemAccessor<'_>) -> Result<&'a [TryItem]> { - let offset = (ca.code_off() as usize) - + std::mem::size_of::() - + ca.insns_size_in_code_units() as usize; - // must be 4-byte aligned - let offset = (offset + 3) & !3; - check_lt_result!(offset, self.file_size(), TryItem); - self.get_try_items_raw(offset as u32, ca.tries_size() as u16) + pub fn get_try_items(&'a self, ca: &CodeItemAccessor<'_>) -> Result<&'a [TryItem]> { + // skip heavy work if there are no try items + match ca.get_tries_off() { + None => return Ok(&[]), + Some(tries_off) => { + check_lt_result!(tries_off, self.file_size(), TryItem); + self.get_try_items_raw(tries_off as u32, ca.tries_size() as u16) + } + } } #[inline] @@ -573,6 +574,41 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.non_null_array_data_ptr(tries_off, tries_size as usize) } + //------------------------------------------------------------------------------ + // EncodedCatchHandler + //------------------------------------------------------------------------------ + #[inline] + pub fn get_catch_handler_data( + &self, + ca: &CodeItemAccessor<'_>, + offset: usize, + ) -> Result<&'a [u8]> { + let data_offset = ca.get_catch_handler_data_off(); + check_lt_result!(data_offset + offset, self.file_size(), CatchHandlerData); + + // TODO: handle values greater than u16 since u16::MAX is maximum offset + Ok(&self.mmap[data_offset + offset..]) + } + + #[inline] + pub fn iter_catch_handlers_at( + &self, + ca: &CodeItemAccessor<'_>, + offset: usize, + ) -> Result> { + let data = self.get_catch_handler_data(ca, offset)?; + EncodedCatchHandlerIterator::new(&data) + } + + #[inline] + pub fn iter_catch_handlers( + &self, + ca: &CodeItemAccessor<'_>, + try_item: &TryItem, + ) -> Result> { + self.iter_catch_handlers_at(ca, try_item.handler_off as usize) + } + //------------------------------------------------------------------------------ // Annotations //------------------------------------------------------------------------------ diff --git a/src/file/structs.rs b/src/file/structs.rs index 15a653c..901baf9 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -360,6 +360,33 @@ rs_struct_fields!(PyDexTryItem, { },); // <<< end python export +// ---------------------------------------------------------------------------- +// CatchHandler Item +// ---------------------------------------------------------------------------- +#[derive(Debug, Clone, Default)] +pub struct CatchHandlerData { + pub type_idx: TypeIndex, + pub address: u32, + pub is_catch_all: bool, +} + +// >>> begin python export +#[cfg(feature = "python")] +rs_struct_wrapper!("CatchHandlerData", PyDexCatchHandlerData, CatchHandlerData); + +#[cfg(feature = "python")] +rs_struct_fields!(PyDexCatchHandlerData, { + (type_idx, TypeIndex), + (address, u32), +}, + +fn is_catch_all(&self) -> bool { + self.0.is_catch_all +} + +); +// <<< end python export + // -------------------------------------------------------------------- // AnnotationsDirectoryItem // -------------------------------------------------------------------- @@ -637,10 +664,11 @@ pub(crate) mod py_structs { #[pymodule_export] use super::{ PyDexAnnotationElement, PyDexAnnotationItem, PyDexAnnotationsDirectoryItem, - PyDexCallSiteIdItem, PyDexClassDef, PyDexCodeItem, PyDexEncodedAnnotation, - PyDexEncodedValue, PyDexFieldAnnotationsItem, PyDexFieldId, PyDexMethodAnnotationsItem, - PyDexMethodHandleItem, PyDexMethodId, PyDexParameterAnnotationsItem, PyDexProtoId, - PyDexStringId, PyDexTryItem, PyDexTypeId, PyDexTypeItem, + PyDexCallSiteIdItem, PyDexCatchHandlerData, PyDexClassDef, PyDexCodeItem, + PyDexEncodedAnnotation, PyDexEncodedValue, PyDexFieldAnnotationsItem, PyDexFieldId, + PyDexMethodAnnotationsItem, PyDexMethodHandleItem, PyDexMethodId, + PyDexParameterAnnotationsItem, PyDexProtoId, PyDexStringId, PyDexTryItem, PyDexTypeId, + PyDexTypeItem, }; #[pymodule_export] diff --git a/src/leb128.rs b/src/leb128.rs index e800f7d..35cc240 100644 --- a/src/leb128.rs +++ b/src/leb128.rs @@ -1,4 +1,5 @@ -use varint_simd; +use varint_simd; // encode/decode unsigned +use leb128fmt; // encode/decode signed use crate::Result; @@ -14,16 +15,6 @@ pub fn decode_leb128p1(data_in: &[u8]) -> Result<(i32, usize)> { Ok((result as i32 - 1, size)) } -#[inline(always)] -pub fn decode_leb128_adv( - data_in: &[u8], - ptr_pos: &mut usize, -) -> Result { - let (value, size) = decode_leb128(data_in)?; - *ptr_pos += size; - Ok(value) -} - #[inline(always)] pub fn decode_leb128_off( data_in: &[u8], @@ -41,6 +32,11 @@ pub fn decode_leb128p1_off(data_in: &[u8], ptr_pos: &mut usize) -> Result { Ok(value) } +#[inline(always)] +pub fn decode_sleb128(data_in: &[u8], ptr_pos: &mut usize) -> Result { + Ok(leb128fmt::decode_sint_slice::(data_in, ptr_pos)?) +} + // python exports #[cfg(feature = "python")] #[pyo3::pymodule(name = "leb128")] @@ -48,10 +44,15 @@ pub(crate) mod py_leb128 { use pyo3::PyResult; #[pyo3::pyfunction] - pub fn decode_leb128(data_in: &[u8]) -> PyResult<(u32, usize)> { + pub fn decode_uleb128(data_in: &[u8]) -> PyResult<(u32, usize)> { Ok(super::decode_leb128::(data_in)?) } + #[pyo3::pyfunction] + pub fn decode_sleb128(data_in: &[u8]) -> PyResult { + Ok(super::decode_sleb128(data_in, &mut 0)?) + } + #[pyo3::pyfunction] pub fn decode_leb128p1(data_in: &[u8]) -> PyResult<(i32, usize)> { Ok(super::decode_leb128p1(data_in)?) diff --git a/src/py.rs b/src/py.rs index 2818305..44aff67 100644 --- a/src/py.rs +++ b/src/py.rs @@ -3,12 +3,12 @@ pub(crate) mod file; macro_rules! rs_type_wrapper { ($src_type:ty, $py_type:ident, $rs_type:ident, name: $name:literal, module: $module:literal) => { #[cfg(feature = "python")] - pub struct $rs_type($src_type); + pub struct $rs_type(pub(crate) $src_type); #[cfg(feature = "python")] #[pyo3::pyclass(name = $name, module = $module)] pub struct $py_type { - inner: Arc<$rs_type>, + pub(crate) inner: Arc<$rs_type>, } #[cfg(feature = "python")] @@ -54,6 +54,13 @@ macro_rules! rs_struct_wrapper { $py_type(Arc::new(value.clone())) } } + + #[cfg(feature = "python")] + impl From<$rust_type> for $py_type { + fn from(value: $rust_type) -> Self { + $py_type(Arc::new(value)) + } + } }; } diff --git a/src/py/file.rs b/src/py/file.rs index e9a8215..9ce01a9 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -2,12 +2,12 @@ use std::sync::Arc; use pyo3::{exceptions::PyValueError, Py, PyResult, Python}; -use crate::file::PyCodeItemAccessor; use crate::file::{ verifier::VerifyPreset, DexFile, DexLocation, FieldIndex, ProtoIndex, PyDexClassDef, PyDexFieldId, PyDexHeader, PyDexMethodId, PyDexProtoId, PyDexStringId, PyDexTypeId, PyDexTypeItem, PyFileDexContainer, PyInMemoryDexContainer, StringIndex, TypeIndex, }; +use crate::file::{PyCodeItemAccessor, PyDexCatchHandlerData, PyDexTryItem}; use crate::file::class_accessor::PyClassAccessor; @@ -94,15 +94,15 @@ macro_rules! dex_action_impl { } } }}; - ($this:ident, $method:ident?, $arg:expr, $py:ident) => {{ + ($this:ident, $method:ident($($args:tt)*)?, $py:ident) => {{ match &$this.inner.as_ref() { RsDexFile::InMemory { dex, container } => { dex_container_check!(container, $py, $method); - dex.$method($arg)? + dex.$method($($args)*)? } RsDexFile::File { dex, container } => { dex_container_check!(container, $py, $method); - dex.$method($arg)? + dex.$method($($args)*)? } } }}; @@ -181,7 +181,7 @@ impl PyDexFileImpl { py: Python<'py>, index: StringIndex, ) -> PyResult { - Ok(dex_action_impl!(self, get_string_id?, index, py).into()) + Ok(dex_action_impl!(self, get_string_id(index)?, py).into()) } pub fn get_string_id_opt<'py>( @@ -189,7 +189,7 @@ impl PyDexFileImpl { py: Python<'py>, index: StringIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_string_id_opt?, index, py).map(Into::into)) + Ok(dex_action_impl!(self, get_string_id_opt(index)?, py).map(Into::into)) } pub fn num_string_ids<'py>(&self, py: Python<'py>) -> PyResult { @@ -200,7 +200,7 @@ impl PyDexFileImpl { // Type Ids // ---------------------------------------------------------------------------- pub fn get_type_id<'py>(&self, py: Python<'py>, index: TypeIndex) -> PyResult { - Ok(dex_action_impl!(self, get_type_id?, index, py).into()) + Ok(dex_action_impl!(self, get_type_id(index)?, py).into()) } pub fn get_type_id_opt<'py>( @@ -208,7 +208,7 @@ impl PyDexFileImpl { py: Python<'py>, index: TypeIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_type_id_opt?, index, py).map(Into::into)) + Ok(dex_action_impl!(self, get_type_id_opt(index)?, py).map(Into::into)) } pub fn num_type_ids<'py>(&self, py: Python<'py>) -> PyResult { @@ -218,34 +218,30 @@ impl PyDexFileImpl { pub fn get_type_desc<'py>( &self, py: Python<'py>, - py_type_id: Py, + type_id: Py, ) -> PyResult { - let type_id = &py_type_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_type_desc_utf16?, type_id, py)) + let rs_type_id = &type_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_type_desc_utf16(rs_type_id)?, py)) } pub fn get_type_desc_at<'py>(&self, py: Python<'py>, index: TypeIndex) -> PyResult { - Ok(dex_action_impl!(self, get_type_desc_utf16_at?, index, py)) + Ok(dex_action_impl!(self, get_type_desc_utf16_at(index)?, py)) } pub fn pretty_type_at<'py>(&self, py: Python<'py>, index: TypeIndex) -> PyResult { Ok(dex_action_impl!(self, pretty_type_at, index, py)) } - pub fn pretty_type<'py>( - &self, - py: Python<'py>, - py_type_id: Py, - ) -> PyResult { - let type_id = &py_type_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, pretty_type, type_id, py)) + pub fn pretty_type<'py>(&self, py: Python<'py>, type_id: Py) -> PyResult { + let rs_type_id = &type_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, pretty_type, rs_type_id, py)) } // ---------------------------------------------------------------------------- // Field Ids // ---------------------------------------------------------------------------- pub fn get_field_id<'py>(&self, py: Python<'py>, index: FieldIndex) -> PyResult { - Ok(dex_action_impl!(self, get_field_id?, index, py).into()) + Ok(dex_action_impl!(self, get_field_id(index)?, py).into()) } pub fn get_field_id_opt<'py>( @@ -253,7 +249,7 @@ impl PyDexFileImpl { py: Python<'py>, index: FieldIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_field_id_opt?, index, py).map(Into::into)) + Ok(dex_action_impl!(self, get_field_id_opt(index)?, py).map(Into::into)) } pub fn num_field_ids<'py>(&self, py: Python<'py>) -> PyResult { @@ -263,21 +259,21 @@ impl PyDexFileImpl { pub fn get_field_name<'py>( &self, py: Python<'py>, - py_field_id: Py, + field_id: Py, ) -> PyResult { - let field_id = &py_field_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_field_name?, field_id, py)) + let rs_field_id = &field_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_field_name(rs_field_id)?, py)) } pub fn get_field_name_at<'py>(&self, py: Python<'py>, index: FieldIndex) -> PyResult { - Ok(dex_action_impl!(self, get_field_name_at?, index, py)) + Ok(dex_action_impl!(self, get_field_name_at(index)?, py)) } // ---------------------------------------------------------------------------- // Proto Ids // ---------------------------------------------------------------------------- pub fn get_proto_id<'py>(&self, py: Python<'py>, index: ProtoIndex) -> PyResult { - Ok(dex_action_impl!(self, get_proto_id?, index, py).into()) + Ok(dex_action_impl!(self, get_proto_id(index)?, py).into()) } pub fn get_proto_id_opt<'py>( @@ -285,31 +281,27 @@ impl PyDexFileImpl { py: Python<'py>, index: ProtoIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_proto_id_opt?, index, py).map(Into::into)) + Ok(dex_action_impl!(self, get_proto_id_opt(index)?, py).map(Into::into)) } pub fn num_proto_ids<'py>(&self, py: Python<'py>) -> PyResult { Ok(dex_action_impl!(self, num_proto_ids, py)) } - pub fn get_shorty<'py>( - &self, - py: Python<'py>, - py_proto_id: Py, - ) -> PyResult { - let proto_id = &py_proto_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_shorty?, proto_id, py)) + pub fn get_shorty<'py>(&self, py: Python<'py>, proto_id: Py) -> PyResult { + let rs_proto_id = &proto_id.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_shorty(rs_proto_id)?, py)) } pub fn get_shorty_at<'py>(&self, py: Python<'py>, index: ProtoIndex) -> PyResult { - Ok(dex_action_impl!(self, get_shorty_at?, index, py)) + Ok(dex_action_impl!(self, get_shorty_at(index)?, py)) } // ---------------------------------------------------------------------------- // method ids // ---------------------------------------------------------------------------- pub fn get_method_id<'py>(&self, py: Python<'py>, index: u32) -> PyResult { - Ok(dex_action_impl!(self, get_method_id?, index, py).into()) + Ok(dex_action_impl!(self, get_method_id(index)?, py).into()) } pub fn get_method_id_opt<'py>( @@ -317,7 +309,7 @@ impl PyDexFileImpl { py: Python<'py>, index: u32, ) -> PyResult> { - Ok(dex_action_impl!(self, get_method_id_opt?, index, py).map(Into::into)) + Ok(dex_action_impl!(self, get_method_id_opt(index)?, py).map(Into::into)) } pub fn num_method_ids<'py>(&self, py: Python<'py>) -> PyResult { @@ -328,7 +320,7 @@ impl PyDexFileImpl { // ClassDefs //------------------------------------------------------------------------------ pub fn get_class_def<'py>(&self, py: Python<'py>, index: u32) -> PyResult { - Ok(dex_action_impl!(self, get_class_def?, index, py).into()) + Ok(dex_action_impl!(self, get_class_def(index)?, py).into()) } pub fn get_class_def_opt<'py>( @@ -336,7 +328,7 @@ impl PyDexFileImpl { py: Python<'py>, index: u32, ) -> PyResult> { - Ok(dex_action_impl!(self, get_class_def_opt?, index, py).map(Into::into)) + Ok(dex_action_impl!(self, get_class_def_opt(index)?, py).map(Into::into)) } pub fn num_class_defs<'py>(&self, py: Python<'py>) -> PyResult { @@ -346,20 +338,26 @@ impl PyDexFileImpl { pub fn get_class_desc<'py>( &self, py: Python<'py>, - py_class_def: Py, + class_def: Py, ) -> PyResult { - let class_def = &py_class_def.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_class_desc_utf16?, class_def, py)) + let rs_class_def = &class_def.try_borrow(py)?.0; + Ok(dex_action_impl!( + self, + get_class_desc_utf16(rs_class_def)?, + py + )) } pub fn get_interfaces_list<'py>( &self, py: Python<'py>, - py_class_def: Py, + class_def: Py, ) -> PyResult>> { - let class_def = &py_class_def.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_interfaces_list?, class_def, py) - .map(|x| x.iter().map(Into::into).collect())) + let rs_class_def = &class_def.try_borrow(py)?.0; + Ok( + dex_action_impl!(self, get_interfaces_list(rs_class_def)?, py) + .map(|x| x.iter().map(Into::into).collect()), + ) } // ---------------------------------------------------------------------------- @@ -368,10 +366,10 @@ impl PyDexFileImpl { pub fn get_class_accessor<'py>( &self, py: Python<'py>, - py_class_def: Py, + class_def: Py, ) -> PyResult> { - let class_def = &py_class_def.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_class_accessor?, class_def, py).map(Into::into)) + let rs_class_def = &class_def.try_borrow(py)?.0; + Ok(dex_action_impl!(self, get_class_accessor(rs_class_def)?, py).map(Into::into)) } // ---------------------------------------------------------------------------- @@ -382,14 +380,69 @@ impl PyDexFileImpl { py: Python<'py>, code_offset: u32, ) -> PyResult { - Ok(dex_action_impl!(self, get_code_item_accessor?, code_offset, py).into()) + Ok(dex_action_impl!(self, get_code_item_accessor(code_offset)?, py).into()) + } + + //------------------------------------------------------------------------------ + // TryItem + //------------------------------------------------------------------------------ + pub fn get_try_items<'py>( + &self, + py: Python<'py>, + ca: Py, + ) -> PyResult> { + let code_item_accessor = &ca.try_borrow(py)?.inner.0; + Ok( + dex_action_impl!(self, get_try_items(code_item_accessor)?, py) + .into_iter() + .map(Into::into) + .collect::>(), + ) + } + + //------------------------------------------------------------------------------ + // Encoded Catch Handlers + //------------------------------------------------------------------------------ + pub fn get_catch_handlers<'py>( + &self, + py: Python<'py>, + ca: Py, + try_item: Py, + ) -> PyResult> { + let code_item_accessor = &ca.try_borrow(py)?.inner.0; + let rs_try_item = &try_item.try_borrow(py)?.0; + Ok(dex_action_impl!( + self, + iter_catch_handlers(code_item_accessor, rs_try_item)?, + py + ) + .into_iter() + .map(Into::into) + .collect::>()) + } + + pub fn get_catch_handlers_at<'py>( + &self, + py: Python<'py>, + ca: Py, + offset: u32, + ) -> PyResult> { + let code_item_accessor = &ca.try_borrow(py)?.inner.0; + Ok(dex_action_impl!( + self, + iter_catch_handlers_at(code_item_accessor, offset as usize)?, + py + ) + .into_iter() + .map(Into::into) + .collect::>()) } // ---------------------------------------------------------------------------- // string data // ---------------------------------------------------------------------------- pub fn get_utf16_at<'py>(&self, py: Python<'py>, index: StringIndex) -> PyResult { - Ok(dex_action_impl!(self, get_utf16_str_at?, index, py)) + Ok(dex_action_impl!(self, get_utf16_str_at(index)?, py)) } pub fn get_utf16<'py>( @@ -398,7 +451,7 @@ impl PyDexFileImpl { py_string_id: Py, ) -> PyResult { let string_id = &py_string_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_utf16_str?, &string_id, py)) + Ok(dex_action_impl!(self, get_utf16_str(string_id)?, py)) } pub fn get_utf16_opt_at<'py>( @@ -406,11 +459,11 @@ impl PyDexFileImpl { py: Python<'py>, index: StringIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_utf16_str_opt_at?, index, py)) + Ok(dex_action_impl!(self, get_utf16_str_opt_at(index)?, py)) } pub fn get_utf16_lossy_at<'py>(&self, py: Python<'py>, index: StringIndex) -> PyResult { - Ok(dex_action_impl!(self, get_utf16_str_lossy_at?, index, py)) + Ok(dex_action_impl!(self, get_utf16_str_lossy_at(index)?, py)) } pub fn get_utf16_lossy<'py>( @@ -419,7 +472,7 @@ impl PyDexFileImpl { py_string_id: Py, ) -> PyResult { let string_id = &py_string_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_utf16_str_lossy?, &string_id, py)) + Ok(dex_action_impl!(self, get_utf16_str_lossy(string_id)?, py)) } pub fn get_string_data<'py>( @@ -428,7 +481,7 @@ impl PyDexFileImpl { py_string_id: Py, ) -> PyResult<(u32, &'py [u8])> { let string_id = &py_string_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_string_data?, &string_id, py)) + Ok(dex_action_impl!(self, get_string_data(string_id)?, py)) } // unsafe string API From f7cfe5306968c0529a4fab08d3b61afc667cc536 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 23 Feb 2025 08:50:11 +0100 Subject: [PATCH 37/46] Fix CatchHandlerData offset calculation --- This should finish #4 --- src/file/code_item_accessors.rs | 85 +++++++++++++++++++++------------ src/file/mod.rs | 29 ++++++----- src/py/file.rs | 28 +++++++---- 3 files changed, 90 insertions(+), 52 deletions(-) diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index ce0142a..ac943b3 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -45,21 +45,36 @@ impl<'a> CodeItemAccessor<'a> { return None; // } - let offset = (self.code_off() as usize) - + std::mem::size_of::() - + self.insns_size_in_code_units() as usize; + let offset = self.insns_size_in_bytes() as usize; // must be 4-byte aligned - let offset = (offset + 3) & !3; - Some(offset) + let padding = if self.insns.len() % 2 == 1 { 2 } else { 0 }; + Some(offset + padding) } #[inline] - pub fn get_catch_handler_data_off(&self) -> usize { - let tries_off = self.code_off() as usize - + std::mem::size_of::() - + self.insns_size_in_code_units() as usize; + pub fn get_tries_abs_off(&self) -> Option { + match self.get_tries_off() { + None => None, + Some(tries_off) => Some(tries_off + self.insns_off() as usize), + } + } + + #[inline] + pub fn get_catch_handler_data_off(&self) -> Option { + if let Some(tries_off) = self.get_tries_off() { + let offset = tries_off + self.tries_size() as usize * std::mem::size_of::(); + Some(offset) + } else { + None + } + } - tries_off + (self.tries_size() as usize * std::mem::size_of::()) + #[inline] + pub fn get_catch_handler_data_abs_off(&self) -> Option { + match self.get_catch_handler_data_off() { + None => None, + Some(data_off) => Some(data_off + self.insns_off() as usize), + } } #[inline] @@ -87,10 +102,14 @@ impl<'a> CodeItemAccessor<'a> { }) } - pub fn code_off(&self) -> u32 { + pub fn insns_off(&self) -> u32 { self.code_off } + pub fn code_item_off(&self) -> u32 { + self.code_off - std::mem::size_of::() as u32 + } + pub fn code_item(&self) -> &'a CodeItem { self.code_item } @@ -172,7 +191,7 @@ impl PyCodeItemAccessor { #[getter] pub fn code_off(&self) -> u32 { - self.inner.0.code_off() + self.inner.0.insns_off() } #[getter] @@ -275,7 +294,7 @@ impl<'a> Iterator for DexInstructionIterator<'a> { pub struct EncodedCatchHandlerIterator<'a> { data: &'a [u8], offset: usize, - catch_all: bool, + has_catch_all: bool, remaining: i32, } @@ -283,17 +302,32 @@ impl<'a> EncodedCatchHandlerIterator<'a> { pub fn new(data: &'a [u8]) -> Result { let mut pos = 0; let remaining = leb128::decode_sleb128(&data, &mut pos)?; + println!("remaining: {}", remaining); Ok(Self { data, offset: pos, - catch_all: remaining <= 0, + has_catch_all: remaining <= 0, + // If remaining is non-positive, then it is the negative of + // the number of catch types, and the catches are followed by a + // catch-all handler. remaining: if remaining <= 0 { - remaining - } else { -remaining + } else { + remaining }, }) } + + fn leb128(&mut self) -> u32 { + match leb128::decode_leb128_off::(&self.data, &mut self.offset) { + Ok(v) => v, + // TODO: + Err(_) => panic!( + "EncodedCatchHandlerIterator::leb128 decode failed at offset {}", + self.offset + ), + } + } } impl<'a> Iterator for EncodedCatchHandlerIterator<'a> { @@ -306,26 +340,17 @@ impl<'a> Iterator for EncodedCatchHandlerIterator<'a> { let mut handler = CatchHandlerData::default(); if self.remaining > 0 { - match leb128::decode_leb128_off::(&self.data, &mut self.offset) { - Ok(v) => handler.type_idx = v as TypeIndex, - Err(_) => return None, - }; - match leb128::decode_leb128_off::(&self.data, &mut self.offset) { - Ok(v) => handler.address = v, - Err(_) => return None, - } + handler.type_idx = self.leb128() as TypeIndex; + handler.address = self.leb128(); self.remaining -= 1; return Some(handler); } - if self.catch_all { + if self.has_catch_all { handler.is_catch_all = true; handler.type_idx = TypeIndex::MAX; - match leb128::decode_leb128_off::(&self.data, &mut self.offset) { - Ok(v) => handler.address = v, - Err(_) => return None, - } - self.catch_all = false; + handler.address = self.leb128(); + self.has_catch_all = false; return Some(handler); } diff --git a/src/file/mod.rs b/src/file/mod.rs index b8414ab..7090704 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -559,10 +559,9 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { //------------------------------------------------------------------------------ pub fn get_try_items(&'a self, ca: &CodeItemAccessor<'_>) -> Result<&'a [TryItem]> { // skip heavy work if there are no try items - match ca.get_tries_off() { + match ca.get_tries_abs_off() { None => return Ok(&[]), Some(tries_off) => { - check_lt_result!(tries_off, self.file_size(), TryItem); self.get_try_items_raw(tries_off as u32, ca.tries_size() as u16) } } @@ -582,12 +581,16 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { &self, ca: &CodeItemAccessor<'_>, offset: usize, - ) -> Result<&'a [u8]> { - let data_offset = ca.get_catch_handler_data_off(); - check_lt_result!(data_offset + offset, self.file_size(), CatchHandlerData); - - // TODO: handle values greater than u16 since u16::MAX is maximum offset - Ok(&self.mmap[data_offset + offset..]) + ) -> Result> { + match ca.get_catch_handler_data_abs_off() { + None => Ok(None), + Some(data_offset) => { + check_lt_result!(data_offset + offset, self.file_size(), CatchHandlerData); + + // TODO: handle values greater than u16 since u16::MAX is maximum offset + Ok(Some(&self.mmap[data_offset + offset..])) + } + } } #[inline] @@ -595,9 +598,11 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { &self, ca: &CodeItemAccessor<'_>, offset: usize, - ) -> Result> { - let data = self.get_catch_handler_data(ca, offset)?; - EncodedCatchHandlerIterator::new(&data) + ) -> Result>> { + match self.get_catch_handler_data(ca, offset)? { + None => Ok(None), + Some(data) => Ok(Some(EncodedCatchHandlerIterator::new(&data)?)), + } } #[inline] @@ -605,7 +610,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { &self, ca: &CodeItemAccessor<'_>, try_item: &TryItem, - ) -> Result> { + ) -> Result>> { self.iter_catch_handlers_at(ca, try_item.handler_off as usize) } diff --git a/src/py/file.rs b/src/py/file.rs index 9ce01a9..5174c33 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -411,14 +411,18 @@ impl PyDexFileImpl { ) -> PyResult> { let code_item_accessor = &ca.try_borrow(py)?.inner.0; let rs_try_item = &try_item.try_borrow(py)?.0; - Ok(dex_action_impl!( + let iterator = dex_action_impl!( self, iter_catch_handlers(code_item_accessor, rs_try_item)?, py - ) - .into_iter() - .map(Into::into) - .collect::>()) + ); + match iterator { + None => Ok(vec![]), + Some(iterator) => Ok(iterator + .into_iter() + .map(Into::into) + .collect::>()), + } } pub fn get_catch_handlers_at<'py>( @@ -428,14 +432,18 @@ impl PyDexFileImpl { offset: u32, ) -> PyResult> { let code_item_accessor = &ca.try_borrow(py)?.inner.0; - Ok(dex_action_impl!( + let iterator = dex_action_impl!( self, iter_catch_handlers_at(code_item_accessor, offset as usize)?, py - ) - .into_iter() - .map(Into::into) - .collect::>()) + ); + match iterator { + None => Ok(vec![]), + Some(iterator) => Ok(iterator + .into_iter() + .map(Into::into) + .collect::>()), + } } // ---------------------------------------------------------------------------- From d53a005de472e5ccf3f7da3a3193b59972a8284b Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 23 Feb 2025 13:20:58 +0100 Subject: [PATCH 38/46] Updated Rust API for CatchHandlerData parser --- python/dexrs/_internal/file.pyi | 3 - src/file/code_item_accessors.rs | 114 +++++++++++++++++--------------- src/file/header.rs | 2 +- src/file/mod.rs | 9 ++- src/py/file.rs | 21 ------ 5 files changed, 67 insertions(+), 82 deletions(-) diff --git a/python/dexrs/_internal/file.pyi b/python/dexrs/_internal/file.pyi index 17f7f83..f5b887b 100644 --- a/python/dexrs/_internal/file.pyi +++ b/python/dexrs/_internal/file.pyi @@ -87,6 +87,3 @@ class DexFile: def get_code_item_accessor(self, code_off: int) -> CodeItemAccessor: ... def get_try_items(self, ca: CodeItemAccessor) -> List[TryItem]: ... def get_catch_handlers(self, ca: CodeItemAccessor, try_item: TryItem) -> List[CatchHandlerData]: ... - def get_catch_handlers_at( - self, ca: CodeItemAccessor, offset: int - ) -> List[CatchHandlerData]: ... diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index ac943b3..eea1b6a 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -21,9 +21,47 @@ pub struct CodeItemAccessor<'a> { code_off: u32, code_item: &'a CodeItem, insns: &'a [u16], + // these values are cached to reduce the number of calculations + tries_off: Option, + catch_handlers_off: Option, } impl<'a> CodeItemAccessor<'a> { + #[inline(always)] + pub fn from_code_item( + dex: &DexFile<'a, C>, + code_item: &'a CodeItem, + code_off: u32, + ) -> Result> + where + C: DexContainer<'a>, + { + let insns = match code_off { + 0 => &[], + _ => dex.get_insns_raw(code_off, code_item.insns_size)?, + }; + + // end of insns must be 4-byte aligned + let tries_off = insns.len() * 2 + if insns.len() % 2 == 1 { 2 } else { 0 }; + let tries_size = code_item.tries_size as usize * std::mem::size_of::(); + Ok(CodeItemAccessor { + code_off, + code_item, + insns, + tries_off: if code_item.tries_size > 0 { + Some(tries_off as u32) + } else { + None + }, + catch_handlers_off: if code_item.tries_size > 0 { + // Start of catch handlers will be at the end of all TryItems + Some((tries_off + tries_size) as u32) + } else { + None + }, + }) + } + #[inline] pub fn insns_size_in_code_units(&self) -> u32 { self.insns.len() as u32 @@ -40,40 +78,28 @@ impl<'a> CodeItemAccessor<'a> { } #[inline] - pub fn get_tries_off(&self) -> Option { - if self.tries_size() == 0 { - return None; // - } - - let offset = self.insns_size_in_bytes() as usize; - // must be 4-byte aligned - let padding = if self.insns.len() % 2 == 1 { 2 } else { 0 }; - Some(offset + padding) + pub fn get_tries_off(&self) -> Option { + self.tries_off } #[inline] - pub fn get_tries_abs_off(&self) -> Option { + pub fn get_tries_abs_off(&self) -> Option { match self.get_tries_off() { None => None, - Some(tries_off) => Some(tries_off + self.insns_off() as usize), + Some(tries_off) => Some(tries_off + self.insns_off()), } } #[inline] - pub fn get_catch_handler_data_off(&self) -> Option { - if let Some(tries_off) = self.get_tries_off() { - let offset = tries_off + self.tries_size() as usize * std::mem::size_of::(); - Some(offset) - } else { - None - } + pub fn get_catch_handler_data_off(&self) -> Option { + self.catch_handlers_off } #[inline] - pub fn get_catch_handler_data_abs_off(&self) -> Option { + pub fn get_catch_handler_data_abs_off(&self) -> Option { match self.get_catch_handler_data_off() { None => None, - Some(data_off) => Some(data_off + self.insns_off() as usize), + Some(data_off) => Some(data_off + self.insns_off()), } } @@ -82,26 +108,6 @@ impl<'a> CodeItemAccessor<'a> { !self.insns.is_empty() } - #[inline(always)] - pub fn from_code_item( - dex: &DexFile<'a, C>, - code_item: &'a CodeItem, - code_off: u32, - ) -> Result> - where - C: DexContainer<'a>, - { - let insns = match code_off { - 0 => &[], - _ => dex.get_insns_raw(code_off, code_item.insns_size)?, - }; - Ok(CodeItemAccessor { - code_off, - code_item, - insns, - }) - } - pub fn insns_off(&self) -> u32 { self.code_off } @@ -191,6 +197,7 @@ impl PyCodeItemAccessor { #[getter] pub fn code_off(&self) -> u32 { + // TODO: add docs self.inner.0.insns_off() } @@ -318,15 +325,9 @@ impl<'a> EncodedCatchHandlerIterator<'a> { }) } - fn leb128(&mut self) -> u32 { - match leb128::decode_leb128_off::(&self.data, &mut self.offset) { - Ok(v) => v, - // TODO: - Err(_) => panic!( - "EncodedCatchHandlerIterator::leb128 decode failed at offset {}", - self.offset - ), - } + #[inline(always)] + fn leb128(&mut self) -> Result { + leb128::decode_leb128_off::(&self.data, &mut self.offset) } } @@ -340,8 +341,14 @@ impl<'a> Iterator for EncodedCatchHandlerIterator<'a> { let mut handler = CatchHandlerData::default(); if self.remaining > 0 { - handler.type_idx = self.leb128() as TypeIndex; - handler.address = self.leb128(); + match self.leb128() { + Ok(v) => handler.type_idx = v as TypeIndex, + Err(_) => return None, + } + match self.leb128() { + Ok(v) => handler.address = v, + Err(_) => return None, + } self.remaining -= 1; return Some(handler); } @@ -349,7 +356,10 @@ impl<'a> Iterator for EncodedCatchHandlerIterator<'a> { if self.has_catch_all { handler.is_catch_all = true; handler.type_idx = TypeIndex::MAX; - handler.address = self.leb128(); + match self.leb128() { + Ok(v) => handler.address = v, + Err(e) => return None, + } self.has_catch_all = false; return Some(handler); } diff --git a/src/file/header.rs b/src/file/header.rs index abcf44f..58f508b 100644 --- a/src/file/header.rs +++ b/src/file/header.rs @@ -2,6 +2,7 @@ use crate::py::{rs_struct_fields, rs_struct_wrapper}; #[cfg(feature = "python")] use std::sync::Arc; + #[repr(C)] #[derive(Debug, Clone)] pub struct Header { @@ -118,7 +119,6 @@ pub struct HeaderV41 { pub header_off: u32, // offset of this dex's header in the container. } - // >>> begin python export #[cfg(feature = "python")] rs_struct_wrapper!("Header", PyDexHeader, Header); diff --git a/src/file/mod.rs b/src/file/mod.rs index 7090704..d2579c4 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -561,9 +561,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { // skip heavy work if there are no try items match ca.get_tries_abs_off() { None => return Ok(&[]), - Some(tries_off) => { - self.get_try_items_raw(tries_off as u32, ca.tries_size() as u16) - } + Some(tries_off) => self.get_try_items_raw(tries_off as u32, ca.tries_size() as u16), } } @@ -585,10 +583,11 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { match ca.get_catch_handler_data_abs_off() { None => Ok(None), Some(data_offset) => { - check_lt_result!(data_offset + offset, self.file_size(), CatchHandlerData); + let offset = data_offset as usize + offset; + check_lt_result!(offset, self.file_size(), CatchHandlerData); // TODO: handle values greater than u16 since u16::MAX is maximum offset - Ok(Some(&self.mmap[data_offset + offset..])) + Ok(Some(&self.mmap[offset..])) } } } diff --git a/src/py/file.rs b/src/py/file.rs index 5174c33..0b8ff20 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -425,27 +425,6 @@ impl PyDexFileImpl { } } - pub fn get_catch_handlers_at<'py>( - &self, - py: Python<'py>, - ca: Py, - offset: u32, - ) -> PyResult> { - let code_item_accessor = &ca.try_borrow(py)?.inner.0; - let iterator = dex_action_impl!( - self, - iter_catch_handlers_at(code_item_accessor, offset as usize)?, - py - ); - match iterator { - None => Ok(vec![]), - Some(iterator) => Ok(iterator - .into_iter() - .map(Into::into) - .collect::>()), - } - } - // ---------------------------------------------------------------------------- // string data // ---------------------------------------------------------------------------- From bcbbe3cdbaac8333a6df8ad21d0fbb1424a7f0c9 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 1 Mar 2025 08:23:20 +0100 Subject: [PATCH 39/46] Add annotation API --- python/dexrs/_internal/annotation.pyi | 15 ++++ python/dexrs/_internal/file.pyi | 7 ++ src/file/annotations.rs | 108 ++++++++++++++++++++------ src/file/code_item_accessors.rs | 2 +- src/file/mod.rs | 21 +++-- src/py/file.rs | 38 ++++++++- 6 files changed, 159 insertions(+), 32 deletions(-) create mode 100644 python/dexrs/_internal/annotation.pyi diff --git a/python/dexrs/_internal/annotation.pyi b/python/dexrs/_internal/annotation.pyi new file mode 100644 index 0000000..1eb707b --- /dev/null +++ b/python/dexrs/_internal/annotation.pyi @@ -0,0 +1,15 @@ +from typing import List + +from .structs import ( + FieldAnnotationsItem, + MethodAnnotationsItem, + ParameterAnnotationsItem, +) + +AnnotationSetItem = List[int] + +class ClassAnnotationAccessor: + def get_class_annotation_set(self) -> AnnotationSetItem: ... + def get_field_annotations_items(self) -> List[FieldAnnotationsItem]: ... + def get_method_annotations_items(self) -> List[MethodAnnotationsItem]: ... + def get_parameter_annotations_items(self) -> List[ParameterAnnotationsItem]: ... diff --git a/python/dexrs/_internal/file.pyi b/python/dexrs/_internal/file.pyi index f5b887b..86334b5 100644 --- a/python/dexrs/_internal/file.pyi +++ b/python/dexrs/_internal/file.pyi @@ -12,9 +12,11 @@ from .structs import ( TypeItem, CatchHandlerData, TryItem, + AnnotationItem ) from .class_accessor import ClassAccessor from .code import CodeItemAccessor +from .annotation import AnnotationSetItem, ClassAnnotationAccessor class VerifyPreset: ALL: VerifyPreset @@ -87,3 +89,8 @@ class DexFile: def get_code_item_accessor(self, code_off: int) -> CodeItemAccessor: ... def get_try_items(self, ca: CodeItemAccessor) -> List[TryItem]: ... def get_catch_handlers(self, ca: CodeItemAccessor, try_item: TryItem) -> List[CatchHandlerData]: ... + + # annotations + def get_annotation_set(self, offset: int) -> AnnotationSetItem: ... + def get_annotation(self, offset: int) -> AnnotationItem: ... + def get_class_annotation_accessor(self, class_def: ClassDef) -> ClassAnnotationAccessor: ... \ No newline at end of file diff --git a/src/file/annotations.rs b/src/file/annotations.rs index 857a862..4fccd8b 100644 --- a/src/file/annotations.rs +++ b/src/file/annotations.rs @@ -1,7 +1,19 @@ +#[cfg(feature = "python")] +use std::sync::Arc; + +#[cfg(feature = "python")] +use crate::{ + file::{ + PyDexFieldAnnotationsItem, PyDexMethodAnnotationsItem, + PyDexParameterAnnotationsItem, + }, + py::rs_type_wrapper, +}; + use crate::{dex_err, error::DexError, leb128::decode_leb128, Result}; use super::{ - AnnotationElement, AnnotationItem, AnnotationSetItem, AnnotationsDirectoryItem, ClassDef, + AnnotationElement, AnnotationItem, AnnotationSetItem, AnnotationsDirectoryItem, DexContainer, DexFile, EncodedAnnotation, EncodedArray, EncodedValue, FieldAnnotationsItem, MethodAnnotationsItem, ParameterAnnotationsItem, }; @@ -10,8 +22,6 @@ use super::{ // ClassAnnotationsAccessor //------------------------------------------------------------------------------ pub struct ClassAnnotationsAccessor<'a> { - class_def: &'a ClassDef, - field_annotations: &'a [FieldAnnotationsItem], method_annotations: &'a [MethodAnnotationsItem], parameter_annotations: &'a [ParameterAnnotationsItem], @@ -20,10 +30,10 @@ pub struct ClassAnnotationsAccessor<'a> { impl<'a, C: DexContainer<'a>> DexFile<'a, C> { pub fn get_class_annotation_accessor( - &'a self, - class_def: &'a ClassDef, + &self, + annotations_off: u32, ) -> Result> { - ClassAnnotationsAccessor::new(self, class_def) + ClassAnnotationsAccessor::new(self, annotations_off) } } @@ -37,15 +47,15 @@ macro_rules! read_annotations { } impl<'a> ClassAnnotationsAccessor<'a> { - pub fn new(dex: &'a DexFile<'a, C>, class_def: &'a ClassDef) -> Result + pub fn new(dex: &DexFile<'a, C>, annotations_off: u32) -> Result where C: DexContainer<'a>, { - match dex.data_ptr::(class_def.annotations_off)? { - None => Ok(ClassAnnotationsAccessor::new_empty(class_def)), + match dex.data_ptr::(annotations_off)? { + None => Ok(ClassAnnotationsAccessor::new_empty()), Some(item) => { - let mut start_offset = class_def.annotations_off as usize - + std::mem::size_of::(); + let mut start_offset = + annotations_off as usize + std::mem::size_of::(); let field_annotations = read_annotations!(dex, start_offset, item.fields_size, FieldAnnotationsItem); @@ -66,7 +76,6 @@ impl<'a> ClassAnnotationsAccessor<'a> { let class_annotations = dex.get_annotation_set(item.class_annotations_off)?; Ok(Self { - class_def, field_annotations, method_annotations, parameter_annotations, @@ -76,9 +85,8 @@ impl<'a> ClassAnnotationsAccessor<'a> { } } - pub fn new_empty(class_def: &'a ClassDef) -> Self { + pub fn new_empty() -> Self { Self { - class_def, field_annotations: &[], method_annotations: &[], parameter_annotations: &[], @@ -87,31 +95,75 @@ impl<'a> ClassAnnotationsAccessor<'a> { } #[inline] - pub fn get_class_def(&self) -> &'a ClassDef { - self.class_def - } - - #[inline] - pub fn get_field_ann(&self) -> &'a [FieldAnnotationsItem] { + pub fn get_field_annotations_items(&self) -> &'a [FieldAnnotationsItem] { self.field_annotations } #[inline] - pub fn get_method_ann(&self) -> &'a [MethodAnnotationsItem] { + pub fn get_method_annotations_items(&self) -> &'a [MethodAnnotationsItem] { self.method_annotations } #[inline] - pub fn get_parameter_ann(&self) -> &'a [ParameterAnnotationsItem] { + pub fn get_parameter_annotations_items(&self) -> &'a [ParameterAnnotationsItem] { self.parameter_annotations } #[inline] - pub fn get_class_ann(&self) -> AnnotationSetItem<'a> { + pub fn get_class_annotation_set(&self) -> AnnotationSetItem<'a> { self.class_annotations } } +// >>> begin python export +#[cfg(feature = "python")] +rs_type_wrapper!( + ClassAnnotationsAccessor<'static>, + PyDexClassAnnotationsAccessor, + RsClassAnnotationsAccessor, + name: "ClassAnnotationsAccessor", + module: "dexrs._internal.annotation" +); + +#[cfg(feature = "python")] +#[pyo3::pymethods] +impl PyDexClassAnnotationsAccessor { + pub fn get_class_annotation_set(&self) -> AnnotationSetItem<'_> { + self.inner.0.get_class_annotation_set() + } + + pub fn get_field_annotations_items(&self) -> Vec { + self.inner + .0 + .get_field_annotations_items() + .iter() + .map(Into::into) + .collect() + } + + pub fn get_method_annotations_items(&self) -> Vec { + self.inner + .0 + .get_method_annotations_items() + .iter() + .map(Into::into) + .collect() + } + + pub fn get_parameter_annotations_items(&self) -> Vec { + self.inner + .0 + .get_parameter_annotations_items() + .iter() + .map(Into::into) + .collect() + } +} +// <<< end python export + +//------------------------------------------------------------------------------ +// EncodedValue +//------------------------------------------------------------------------------ // Encoded values require special handling and they can't be parsed using // zero-copy. #[repr(u8)] @@ -425,3 +477,13 @@ impl From for EncodedValueType { } } } + +// >>> begin python module export +#[cfg(feature = "python")] +#[pyo3::pymodule(name = "annotation")] +pub(crate) mod py_annotations { + #[pymodule_export] + use super::PyDexClassAnnotationsAccessor; +} + +// <<< end python module export diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index eea1b6a..dd2f06a 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -358,7 +358,7 @@ impl<'a> Iterator for EncodedCatchHandlerIterator<'a> { handler.type_idx = TypeIndex::MAX; match self.leb128() { Ok(v) => handler.address = v, - Err(e) => return None, + Err(_) => return None, } self.has_catch_all = false; return Some(handler); diff --git a/src/file/mod.rs b/src/file/mod.rs index d2579c4..68947d9 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -469,11 +469,6 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { EncodedValue::new(&self.mmap[off as usize..]) } - pub fn get_annotation(&self, off: u32) -> Result { - check_lt_result!(off, self.file_size(), Annotation); - AnnotationItem::from_raw_parts(&self.mmap[off as usize..]) - } - //------------------------------------------------------------------------------ // Method Ids //------------------------------------------------------------------------------ @@ -617,9 +612,8 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { // Annotations //------------------------------------------------------------------------------ // see implementation in annotations.rs for accessor - pub fn get_annotation_set(&'a self, off: u32) -> Result> { + pub fn get_annotation_set(&self, off: u32) -> Result> { // this will not panic if offset is zero - check_lt_result!(off, self.file_size(), AnnotationSetItem); match self.data_ptr::(off)? { None => Ok(&[]), Some(size) => { @@ -654,6 +648,16 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { self.get_annotation_set(anno_item.annotations_off) } + #[inline] + pub fn get_annotation(&self, annotation_off: u32) -> Result + { + check_lt_result!(annotation_off, self.file_size(), Annotation); + AnnotationItem::from_raw_parts(&self.mmap[annotation_off as usize..]) + } + + //------------------------------------------------------------------------------ + // internal helpers + //------------------------------------------------------------------------------ #[inline] fn offset_of(&self, buf: &[U], o: &T) -> Result { let start = buf.as_ptr() as usize; @@ -741,6 +745,9 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } } + //------------------------------------------------------------------------------ + // Initialization + //------------------------------------------------------------------------------ fn init(&self) -> Result<()> { let container_size = self.file_size(); if container_size < std::mem::size_of::
() { diff --git a/src/py/file.rs b/src/py/file.rs index 0b8ff20..cba3caf 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -7,7 +7,10 @@ use crate::file::{ PyDexFieldId, PyDexHeader, PyDexMethodId, PyDexProtoId, PyDexStringId, PyDexTypeId, PyDexTypeItem, PyFileDexContainer, PyInMemoryDexContainer, StringIndex, TypeIndex, }; -use crate::file::{PyCodeItemAccessor, PyDexCatchHandlerData, PyDexTryItem}; +use crate::file::{ + AnnotationSetItem, PyCodeItemAccessor, PyDexAnnotationItem, PyDexCatchHandlerData, + PyDexClassAnnotationsAccessor, PyDexTryItem, +}; use crate::file::class_accessor::PyClassAccessor; @@ -425,6 +428,39 @@ impl PyDexFileImpl { } } + //------------------------------------------------------------------------------ + // Annotations + //------------------------------------------------------------------------------ + pub fn get_annotation_set<'py>( + &self, + py: Python<'py>, + offset: u32, + ) -> PyResult> { + Ok(dex_action_impl!(self, get_annotation_set(offset)?, py)) + } + + pub fn get_annotation<'py>( + &self, + py: Python<'py>, + offset: u32, + ) -> PyResult { + Ok(dex_action_impl!(self, get_annotation(offset)?, py).into()) + } + + pub fn get_class_annotation_accessor<'py>( + &self, + py: Python<'py>, + class_def: Py, + ) -> PyResult { + let rs_class_def = &class_def.try_borrow(py)?.0; + Ok(dex_action_impl!( + self, + get_class_annotation_accessor(rs_class_def.annotations_off)?, + py + ) + .into()) + } + // ---------------------------------------------------------------------------- // string data // ---------------------------------------------------------------------------- From a7e72a35c98967d3dc920648bae767e5e603d28d Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 1 Mar 2025 08:43:05 +0100 Subject: [PATCH 40/46] Rust API code cleamup --- src/file/annotations.rs | 18 ++-- src/file/class_accessor.rs | 49 ++++------ src/file/code_item_accessors.rs | 18 ++-- src/file/container.rs | 6 +- src/file/debug.rs | 44 +++++---- src/file/dump.rs | 8 +- src/file/instruction.rs | 158 +++++++++++++++----------------- src/file/mod.rs | 36 ++++---- src/file/structs.rs | 10 +- src/file/verifier.rs | 2 +- src/utf.rs | 13 ++- 11 files changed, 170 insertions(+), 192 deletions(-) diff --git a/src/file/annotations.rs b/src/file/annotations.rs index 4fccd8b..a8d7acc 100644 --- a/src/file/annotations.rs +++ b/src/file/annotations.rs @@ -270,7 +270,7 @@ fn check_size( let size = std::mem::size_of::(); if value_arg as usize + 1 >= size { return dex_err!(BadEncodedValueSize { - value_type: value_type, + value_type, size: value_arg as usize, max: size }); @@ -278,7 +278,7 @@ fn check_size( if offset + width >= value.len() { return dex_err!(InvalidEncodedValue { - value_type: value_type, + value_type, offset: offset + width, size: value.len() }); @@ -371,8 +371,8 @@ impl EncodedValue { } let header_byte = value[*offset]; - let value_type = header_byte & 0x1F_u8 as u8; - let value_arg = ((header_byte & 0xE0) >> 5) as u8; + let value_type = header_byte & 0x1F_u8; + let value_arg = (header_byte & 0xE0) >> 5; if !EncodedValueType::is_valid(value_type) { return dex_err!(BadEncodedValueType, value_type); } @@ -437,18 +437,12 @@ impl EncodedValue { impl EncodedValueType { #[inline] pub fn is_valid(value_type: u8) -> bool { - match value_type { - 0x00 | 0x02..=0x04 | 0x06 | 0x10 | 0x11 | 0x15..=0x1F => true, - _ => false, - } + matches!(value_type, 0x00 | 0x02..=0x04 | 0x06 | 0x10 | 0x11 | 0x15..=0x1F) } #[inline] pub fn is_primitive(value_type: u8) -> bool { - match value_type { - 0x00 | 0x02..=0x06 | 0x10 | 0x11 => true, - _ => false, - } + matches!(value_type, 0x00 | 0x02..=0x06 | 0x10 | 0x11) } } diff --git a/src/file/class_accessor.rs b/src/file/class_accessor.rs index 1b2cd98..431f90a 100644 --- a/src/file/class_accessor.rs +++ b/src/file/class_accessor.rs @@ -25,7 +25,7 @@ pub trait ClassItemBase: Copy + Clone + Default { // ---------------------------------------------------------------------------- // Method // ---------------------------------------------------------------------------- -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Default)] pub struct Method { pub index: u32, pub access_flags: u32, @@ -33,7 +33,7 @@ pub struct Method { pub is_static_or_direct: bool, } -impl<'a> Method { +impl Method { #[inline] pub fn get_direct_invoke_type(&self) -> InvokeType { if self.access_flags & ACC_STATIC != 0 { @@ -56,10 +56,10 @@ impl<'a> Method { } } -impl<'a> ClassItemBase for Method { +impl ClassItemBase for Method { fn read(&mut self, data: &'_ [u8], pos: &mut usize) -> Result<()> { let target = self.index as usize; - let value = decode_leb128_off::(&data, pos)?; + let value = decode_leb128_off::(data, pos)?; if target + value as usize > u32::MAX as usize { return dex_err!(BadEncodedIndex { index: self.index, @@ -68,8 +68,8 @@ impl<'a> ClassItemBase for Method { }); } self.index += value; - self.access_flags = decode_leb128_off::(&data, pos)?; - self.code_offset = decode_leb128_off::(&data, pos)?; + self.access_flags = decode_leb128_off::(data, pos)?; + self.code_offset = decode_leb128_off::(data, pos)?; Ok(()) } @@ -78,17 +78,6 @@ impl<'a> ClassItemBase for Method { } } -impl Default for Method { - fn default() -> Self { - Self { - index: 0, - access_flags: 0, - code_offset: 0, - is_static_or_direct: false, - } - } -} - // >>> begin python export #[cfg(feature = "python")] rs_type_wrapper!( @@ -132,10 +121,10 @@ pub struct Field { pub is_static: bool, } -impl<'a> ClassItemBase for Field { +impl ClassItemBase for Field { fn read(&mut self, data: &'_ [u8], pos: &mut usize) -> Result<()> { let target = self.index as usize; - let value = decode_leb128_off::(&data, pos)?; + let value = decode_leb128_off::(data, pos)?; if target + value as usize > u32::MAX as usize { return dex_err!(BadEncodedIndex { index: self.index, @@ -144,7 +133,7 @@ impl<'a> ClassItemBase for Field { }); } self.index += value; - self.access_flags = decode_leb128_off::(&data, pos)?; + self.access_flags = decode_leb128_off::(data, pos)?; Ok(()) } @@ -247,10 +236,10 @@ impl<'a> ClassAccessor<'a> { num_instance_fields: 0, static_fields_off: 0, }; - accessor.num_static_fields = decode_leb128_off(&class_data, &mut accessor.ptr_pos)?; - accessor.num_instance_fields = decode_leb128_off(&class_data, &mut accessor.ptr_pos)?; - accessor.num_direct_methods = decode_leb128_off(&class_data, &mut accessor.ptr_pos)?; - accessor.num_virtual_methods = decode_leb128_off(&class_data, &mut accessor.ptr_pos)?; + accessor.num_static_fields = decode_leb128_off(class_data, &mut accessor.ptr_pos)?; + accessor.num_instance_fields = decode_leb128_off(class_data, &mut accessor.ptr_pos)?; + accessor.num_direct_methods = decode_leb128_off(class_data, &mut accessor.ptr_pos)?; + accessor.num_virtual_methods = decode_leb128_off(class_data, &mut accessor.ptr_pos)?; accessor.static_fields_off = accessor.ptr_pos as u32; Ok(accessor) } @@ -376,7 +365,7 @@ impl<'a> ClassAccessor<'a> { // switch to instance fields Ok(DataIterator::new( self.class_data, - offset as usize, + offset, self.num_direct_methods as usize, self.num_methods(), )) @@ -405,8 +394,8 @@ impl<'a> ClassAccessor<'a> { F: Fn(&T) -> Result<()>, { for _ in 0..count { - iter.read(&self.class_data, offset)?; - visitor(&iter)?; + iter.read(self.class_data, offset)?; + visitor(iter)?; } Ok(()) } @@ -528,7 +517,7 @@ impl<'a, T: ClassItemBase> DataIterator<'a, T> { } } -impl<'a, T: ClassItemBase> Iterator for DataIterator<'a, T> { +impl Iterator for DataIterator<'_, T> { type Item = T; fn next(&mut self) -> Option { @@ -536,7 +525,7 @@ impl<'a, T: ClassItemBase> Iterator for DataIterator<'a, T> { if self.pos == self.partition_pos { self.value.next_section(); } - match self.value.read(&self.class_data, &mut self.off) { + match self.value.read(self.class_data, &mut self.off) { Ok(()) => {} Err(_) => { self.pos = self.end_pos; @@ -547,7 +536,7 @@ impl<'a, T: ClassItemBase> Iterator for DataIterator<'a, T> { self.pos += 1; return Some(self.value); } - return None; + None } } diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index dd2f06a..16eed7a 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -84,10 +84,8 @@ impl<'a> CodeItemAccessor<'a> { #[inline] pub fn get_tries_abs_off(&self) -> Option { - match self.get_tries_off() { - None => None, - Some(tries_off) => Some(tries_off + self.insns_off()), - } + self.get_tries_off() + .map(|tries_off| tries_off + self.insns_off()) } #[inline] @@ -97,10 +95,8 @@ impl<'a> CodeItemAccessor<'a> { #[inline] pub fn get_catch_handler_data_abs_off(&self) -> Option { - match self.get_catch_handler_data_off() { - None => None, - Some(data_off) => Some(data_off + self.insns_off()), - } + self.get_catch_handler_data_off() + .map(|data_off| data_off + self.insns_off()) } #[inline] @@ -308,7 +304,7 @@ pub struct EncodedCatchHandlerIterator<'a> { impl<'a> EncodedCatchHandlerIterator<'a> { pub fn new(data: &'a [u8]) -> Result { let mut pos = 0; - let remaining = leb128::decode_sleb128(&data, &mut pos)?; + let remaining = leb128::decode_sleb128(data, &mut pos)?; println!("remaining: {}", remaining); Ok(Self { data, @@ -327,11 +323,11 @@ impl<'a> EncodedCatchHandlerIterator<'a> { #[inline(always)] fn leb128(&mut self) -> Result { - leb128::decode_leb128_off::(&self.data, &mut self.offset) + leb128::decode_leb128_off::(self.data, &mut self.offset) } } -impl<'a> Iterator for EncodedCatchHandlerIterator<'a> { +impl Iterator for EncodedCatchHandlerIterator<'_> { type Item = CatchHandlerData; fn next(&mut self) -> Option { diff --git a/src/file/container.rs b/src/file/container.rs index 8729b60..5ae7247 100644 --- a/src/file/container.rs +++ b/src/file/container.rs @@ -64,13 +64,13 @@ impl<'a> InMemoryDexContainer<'a> { impl<'a> Deref for InMemoryDexContainer<'a> { type Target = [u8]; fn deref(&self) -> &'a Self::Target { - &self.0 + self.0 } } impl<'a> AsRef<[u8]> for InMemoryDexContainer<'a> { fn as_ref(&self) -> &'a [u8] { - &self.0 + self.0 } } @@ -180,7 +180,7 @@ impl DexFileContainer { self } - pub fn open<'a>(&'a self) -> Result> { + pub fn open(&self) -> Result> { MmapDexFile::open_file(self) } diff --git a/src/file/debug.rs b/src/file/debug.rs index cbab960..3dac282 100644 --- a/src/file/debug.rs +++ b/src/file/debug.rs @@ -24,7 +24,7 @@ pub mod code { pub const DBG_SET_FILE: u8 = 0x09; pub const DBG_FIRST_SPECIAL: u8 = 0x0a; - pub const DBG_LINE_BASE: u8 = (-4 as i8) as u8; + pub const DBG_LINE_BASE: u8 = -4_i8 as u8; pub const DBG_LINE_RANGE: u8 = 15; } @@ -48,6 +48,12 @@ impl PositionInfo { } } +impl Default for PositionInfo { + fn default() -> Self { + Self::new() + } +} + pub struct CodeItemDebugInfoAccessor<'a> { ptr: &'a [u8], } @@ -74,11 +80,11 @@ impl<'a> CodeItemDebugInfoAccessor<'a> { where F: Fn(u32), { - let line = decode_leb128_off(&self.ptr, offset)?; - let size = decode_leb128_off::(&self.ptr, offset)?; + let line = decode_leb128_off(self.ptr, offset)?; + let size = decode_leb128_off::(self.ptr, offset)?; for _ in 0..size { - let index = decode_leb128p1_off(&self.ptr, offset)?; + let index = decode_leb128p1_off(self.ptr, offset)?; visitor(index as u32); } Ok(line) @@ -100,29 +106,29 @@ impl<'a> CodeItemDebugInfoAccessor<'a> { code::DBG_END_SEQUENCE => break, // This will cause overflow code::DBG_ADVANCE_PC => { - entry.address += decode_leb128_off::(&self.ptr, &mut offset)? + entry.address += decode_leb128_off::(self.ptr, &mut offset)? } code::DBG_ADVANCE_LINE => { - entry.line += decode_leb128_off::(&self.ptr, &mut offset)? + entry.line += decode_leb128_off::(self.ptr, &mut offset)? } code::DBG_START_LOCAL => { - decode_leb128_off::(&self.ptr, &mut offset)?; // reg - decode_leb128p1_off(&self.ptr, &mut offset)?; // name - decode_leb128p1_off(&self.ptr, &mut offset)?; // descriptor + decode_leb128_off::(self.ptr, &mut offset)?; // reg + decode_leb128p1_off(self.ptr, &mut offset)?; // name + decode_leb128p1_off(self.ptr, &mut offset)?; // descriptor } code::DBG_START_LOCAL_EXTENDED => { - decode_leb128_off::(&self.ptr, &mut offset)?; // reg - decode_leb128p1_off(&self.ptr, &mut offset)?; // name - decode_leb128p1_off(&self.ptr, &mut offset)?; // descriptor - decode_leb128p1_off(&self.ptr, &mut offset)?; // signature + decode_leb128_off::(self.ptr, &mut offset)?; // reg + decode_leb128p1_off(self.ptr, &mut offset)?; // name + decode_leb128p1_off(self.ptr, &mut offset)?; // descriptor + decode_leb128p1_off(self.ptr, &mut offset)?; // signature } code::DBG_END_LOCAL | code::DBG_RESTART_LOCAL => { - decode_leb128_off::(&self.ptr, &mut offset)?; // reg + decode_leb128_off::(self.ptr, &mut offset)?; // reg } code::DBG_SET_PROLOGUE_END => entry.prologue_end = true, code::DBG_SET_EPILOGUE_BEGIN => entry.epilogue_begin = true, code::DBG_SET_FILE => { - let file = decode_leb128p1_off(&self.ptr, &mut offset)?; // file + let file = decode_leb128p1_off(self.ptr, &mut offset)?; // file entry.file = SourceFile::Other(file as u32); } _ => { @@ -154,8 +160,8 @@ impl<'dex> DebugInfoParameterNamesIterator<'dex> { pub fn new(ptr: &'dex [u8], offset: usize) -> Result { let mut pos = offset; // skipping line number - decode_leb128_off::(&ptr, &mut pos)?; - let size = decode_leb128_off::(&ptr, &mut pos)? as usize; + decode_leb128_off::(ptr, &mut pos)?; + let size = decode_leb128_off::(ptr, &mut pos)? as usize; Ok(Self { ptr, offset: pos, @@ -165,7 +171,7 @@ impl<'dex> DebugInfoParameterNamesIterator<'dex> { } } -impl<'a> Iterator for DebugInfoParameterNamesIterator<'a> { +impl Iterator for DebugInfoParameterNamesIterator<'_> { type Item = u32; fn next(&mut self) -> Option { @@ -173,7 +179,7 @@ impl<'a> Iterator for DebugInfoParameterNamesIterator<'a> { return None; } self.idx += 1; - match decode_leb128p1_off(&self.ptr, &mut self.offset) { + match decode_leb128p1_off(self.ptr, &mut self.offset) { Ok(v) => Some(v as u32), Err(_) => None, } diff --git a/src/file/dump.rs b/src/file/dump.rs index e785210..f547b2c 100644 --- a/src/file/dump.rs +++ b/src/file/dump.rs @@ -48,11 +48,11 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { let mut result = String::new(); if opts == prettify::Field::WithType { result.push_str(&self.pretty_type_opt_at(field_id.type_idx)?); - result.push_str(" "); + result.push(' '); } result.push_str(&self.pretty_type_opt_at(field_id.class_idx)?); - result.push_str("."); + result.push('.'); result.push_str(&self.get_utf16_str_lossy_at(field_id.name_idx)?); Ok(result) @@ -89,7 +89,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { pub fn pretty_utf16_at(&self, idx: u32) -> String { match self.get_string_id(idx) { - Ok(str_data) => self.pretty_utf16(&str_data), + Ok(str_data) => self.pretty_utf16(str_data), Err(_) => format!("<>", idx), } } @@ -159,7 +159,7 @@ impl<'a> Instruction<'a> { } Ok(match self.format() { - &Format::k10x => format!("{opcode}"), + &Format::k10x => opcode.to_string(), Format::k12x => format!("{opcode} v{}, v{}", vreg::A(self)?, vreg::B(self)?), Format::k11n => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::B(self)?), Format::k11x => format!("{opcode} v{}", vreg::A(self)?), diff --git a/src/file/instruction.rs b/src/file/instruction.rs index 16e7ffa..f33b8c2 100644 --- a/src/file/instruction.rs +++ b/src/file/instruction.rs @@ -28,11 +28,11 @@ impl<'a> Instruction<'a> { #[inline] pub fn relative_at(&self, offset: usize) -> Result> { if offset + 1 >= self.0.len() { - return dex_err!(BadInstructionOffset { + dex_err!(BadInstructionOffset { opcode: self.name(), - offset: offset, + offset, size: self.0.len() - }); + }) } else { Ok(Instruction::at(&self.0[offset..])) } @@ -43,7 +43,7 @@ impl<'a> Instruction<'a> { if offset >= self.0.len() { return dex_err!(BadInstruction { opcode: self.name(), - offset: offset, + offset, size: self.0.len(), target_type: "u16" }); @@ -56,7 +56,7 @@ impl<'a> Instruction<'a> { if offset >= self.0.len() { return dex_err!(BadInstruction { opcode: self.name(), - offset: offset, + offset, size: self.0.len(), target_type: "u32" }); @@ -439,7 +439,7 @@ define_flags!( impl<'a> Instruction<'a> { #[inline(always)] const fn format_desc(&self) -> &'static InstructionDescriptor { - &Instruction::INSN_DESCRIPTORS[(self.0[0] as u8 & 0xFF) as usize] + &Instruction::INSN_DESCRIPTORS[(self.0[0] as u8) as usize] } #[inline(always)] @@ -454,7 +454,7 @@ impl<'a> Instruction<'a> { #[inline(always)] pub const fn name(&self) -> &'static str { - &self.format_desc().name + self.format_desc().name } pub fn next(&self) -> Option> { @@ -462,7 +462,7 @@ impl<'a> Instruction<'a> { if self.0.len() <= self.size_in_code_units() + 2 { return None; } - return Some(Instruction::at(&self.0[self.size_in_code_units()..])); + Some(Instruction::at(&self.0[self.size_in_code_units()..])) } #[inline(always)] @@ -484,8 +484,7 @@ impl<'a> Instruction<'a> { signatures::ArrayDataSignature => { let element_size = self.fetch16(1)? as usize; let length = self.fetch32(2)? as usize; - // The plus 1 is to round up for odd size and width. - 4 + (element_size * length + 1) / 2 + 4 + (element_size * length).div_ceil(2) } _ => 1, }) @@ -772,35 +771,35 @@ pub mod vreg { //------------------------------------------------------------------------------ #[inline] pub fn has_a(inst: &Instruction<'_>) -> bool { - match &inst.format_desc().format { + matches!( + &inst.format_desc().format, Format::k10t - | Format::k10x - | Format::k11n - | Format::k11x - | Format::k12x - | Format::k20t - | Format::k21c - | Format::k21h - | Format::k21s - | Format::k21t - | Format::k22b - | Format::k22c - | Format::k22s - | Format::k22t - | Format::k22x - | Format::k23x - | Format::k30t - | Format::k31c - | Format::k31i - | Format::k31t - | Format::k32x - | Format::k35c - | Format::k3rc - | Format::k45cc - | Format::k4rcc - | Format::k51l => true, - _ => false, - } + | Format::k10x + | Format::k11n + | Format::k11x + | Format::k12x + | Format::k20t + | Format::k21c + | Format::k21h + | Format::k21s + | Format::k21t + | Format::k22b + | Format::k22c + | Format::k22s + | Format::k22t + | Format::k22x + | Format::k23x + | Format::k30t + | Format::k31c + | Format::k31i + | Format::k31t + | Format::k32x + | Format::k35c + | Format::k3rc + | Format::k45cc + | Format::k4rcc + | Format::k51l + ) } #[inline] @@ -847,30 +846,30 @@ pub mod vreg { //------------------------------------------------------------------------------ #[inline] pub fn has_b(inst: &Instruction<'_>) -> bool { - match &inst.format_desc().format { + matches!( + &inst.format_desc().format, Format::k11n - | Format::k12x - | Format::k21c - | Format::k21h - | Format::k21s - | Format::k21t - | Format::k22b - | Format::k22c - | Format::k22s - | Format::k22t - | Format::k22x - | Format::k23x - | Format::k31c - | Format::k31i - | Format::k31t - | Format::k32x - | Format::k35c - | Format::k3rc - | Format::k45cc - | Format::k4rcc - | Format::k51l => true, - _ => false, - } + | Format::k12x + | Format::k21c + | Format::k21h + | Format::k21s + | Format::k21t + | Format::k22b + | Format::k22c + | Format::k22s + | Format::k22t + | Format::k22x + | Format::k23x + | Format::k31c + | Format::k31i + | Format::k31t + | Format::k32x + | Format::k35c + | Format::k3rc + | Format::k45cc + | Format::k4rcc + | Format::k51l + ) } pub fn has_wide_b(inst: &Instruction<'_>) -> bool { @@ -922,18 +921,18 @@ pub mod vreg { //------------------------------------------------------------------------------ #[inline] pub fn has_c(inst: &Instruction<'_>) -> bool { - match &inst.format_desc().format { + matches!( + &inst.format_desc().format, Format::k22b - | Format::k22c - | Format::k22s - | Format::k22t - | Format::k23x - | Format::k35c - | Format::k3rc - | Format::k45cc - | Format::k4rcc => true, - _ => false, - } + | Format::k22c + | Format::k22s + | Format::k22t + | Format::k23x + | Format::k35c + | Format::k3rc + | Format::k45cc + | Format::k4rcc + ) } #[inline] @@ -961,10 +960,7 @@ pub mod vreg { //------------------------------------------------------------------------------ #[inline] pub fn has_h(inst: &Instruction<'_>) -> bool { - match &inst.format_desc().format { - Format::k45cc | Format::k4rcc => true, - _ => false, - } + matches!(&inst.format_desc().format, Format::k45cc | Format::k4rcc) } #[inline] @@ -985,10 +981,7 @@ pub mod vreg { //------------------------------------------------------------------------------ #[inline] pub fn has_var_args(inst: &Instruction<'_>) -> bool { - match &inst.format_desc().format { - Format::k35c | Format::k45cc => true, - _ => false, - } + matches!(&inst.format_desc().format, Format::k35c | Format::k45cc) } #[inline] @@ -1029,10 +1022,7 @@ pub mod vreg { //------------------------------------------------------------------------------ #[inline] pub fn has_args_range(inst: &Instruction<'_>) -> bool { - match &inst.format_desc().format { - Format::k3rc | Format::k4rcc => true, - _ => false, - } + matches!(&inst.format_desc().format, Format::k3rc | Format::k4rcc) } pub fn args_range(inst: &Instruction<'_>) -> Result> { diff --git a/src/file/mod.rs b/src/file/mod.rs index 68947d9..272912a 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -1,3 +1,5 @@ +use std::fmt::Display; + use memmap2::{Mmap, MmapMut}; use plain::Plain; @@ -48,11 +50,11 @@ impl From<&'static str> for DexLocation { } } -impl ToString for DexLocation { - fn to_string(&self) -> String { +impl Display for DexLocation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - DexLocation::InMemory => "[in-memory]".to_string(), - DexLocation::Path(path) => path.to_string(), + DexLocation::InMemory => write!(f, "[in-memory]"), + DexLocation::Path(path) => write!(f, "{}", path), } } } @@ -150,10 +152,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } let data = &base[offset as usize..]; - match T::slice_from_bytes_len(data, len as usize) { - Ok(slice) => slice, - Err(_) => &[], - } + T::slice_from_bytes_len(data, len as usize).unwrap_or_default() } pub fn from_raw_parts(base: &'a C, location: DexLocation) -> Result> { @@ -161,7 +160,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { return dex_err!(TruncatedFile); } - let header = match Header::from_bytes(&base) { + let header = match Header::from_bytes(base) { Ok(header) => header, // REVISIT: we already checked the header Err(_) => return dex_err!(TruncatedFile), @@ -241,7 +240,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { #[inline(always)] pub fn get_header(&self) -> &'a Header { - &self.header + self.header } // ------------------------------------------------------------------------------ @@ -555,8 +554,8 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { pub fn get_try_items(&'a self, ca: &CodeItemAccessor<'_>) -> Result<&'a [TryItem]> { // skip heavy work if there are no try items match ca.get_tries_abs_off() { - None => return Ok(&[]), - Some(tries_off) => self.get_try_items_raw(tries_off as u32, ca.tries_size() as u16), + None => Ok(&[]), + Some(tries_off) => self.get_try_items_raw(tries_off, ca.tries_size()), } } @@ -595,7 +594,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { ) -> Result>> { match self.get_catch_handler_data(ca, offset)? { None => Ok(None), - Some(data) => Ok(Some(EncodedCatchHandlerIterator::new(&data)?)), + Some(data) => Ok(Some(EncodedCatchHandlerIterator::new(data)?)), } } @@ -649,8 +648,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } #[inline] - pub fn get_annotation(&self, annotation_off: u32) -> Result - { + pub fn get_annotation(&self, annotation_off: u32) -> Result { check_lt_result!(annotation_off, self.file_size(), Annotation); AnnotationItem::from_raw_parts(&self.mmap[annotation_off as usize..]) } @@ -705,7 +703,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { ); } match T::from_bytes(&self.mmap[offset as usize..]) { - Ok(v) => Ok(&v), + Ok(v) => Ok(v), Err(plain::Error::TooShort) => { dex_err!(DexLayoutError, self, offset, std::any::type_name::(), 0) } @@ -733,7 +731,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { ); } match T::slice_from_bytes_len(&self.mmap[offset as usize..], len) { - Ok(v) => Ok(&v), + Ok(v) => Ok(v), Err(plain::Error::TooShort) => dex_err!( DexLayoutError, self, @@ -812,7 +810,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { let size = self.file_size(); let end = (self.header.map_off as usize) + std::mem::size_of::(); - end as usize > size || !plain::is_aligned::(&self.mmap[0..end as usize]) + end > size || !plain::is_aligned::(&self.mmap[0..end]) } fn init_sections_from_maplist(&mut self) { @@ -823,7 +821,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { let map_list_size_off = self.header.map_off; let map_list_off = (self.header.map_off as usize) + std::mem::size_of::(); - if map_list_off >= self.file_size() as usize { + if map_list_off >= self.file_size() { // bad offset return; } diff --git a/src/file/structs.rs b/src/file/structs.rs index 901baf9..16d3a30 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -32,7 +32,7 @@ rs_struct_wrapper!("StringId", PyDexStringId, StringId); rs_struct_fields!(PyDexStringId, { (string_data_off, StringIndex), },); -/// <<< end python export +// <<< end python export // -------------------------------------------------------------------- // TypeId @@ -54,7 +54,7 @@ rs_struct_wrapper!("TypeId", PyDexTypeId, TypeId); rs_struct_fields!(PyDexTypeId, { (descriptor_idx, StringIndex), },); -/// <<< end python export +// <<< end python export // -------------------------------------------------------------------- // FieldId @@ -80,7 +80,7 @@ rs_struct_fields!(PyDexFieldId, { (type_idx, TypeIndex), (name_idx, StringIndex), },); -/// <<< end python export +// <<< end python export // -------------------------------------------------------------------- // ProtoId @@ -107,7 +107,7 @@ rs_struct_fields!(PyDexProtoId, { (return_type_idx, TypeIndex), (parameters_off, u32), },); -/// <<< end python export +// <<< end python export // -------------------------------------------------------------------- // MethodId @@ -304,7 +304,7 @@ impl<'a> HiddenapiClassData<'a> { } } -unsafe impl<'a> plain::Plain for HiddenapiClassData<'a> {} +unsafe impl plain::Plain for HiddenapiClassData<'_> {} // -------------------------------------------------------------------- // CodeItem diff --git a/src/file/verifier.rs b/src/file/verifier.rs index 23ca4ba..458f267 100644 --- a/src/file/verifier.rs +++ b/src/file/verifier.rs @@ -187,7 +187,7 @@ where if (file_size - offset as usize) < size as usize { return dex_err!(BadSection { offset: offset + size, - size: file_size as usize, + size: file_size, section: label }); } diff --git a/src/utf.rs b/src/utf.rs index dd66135..b7a296c 100644 --- a/src/utf.rs +++ b/src/utf.rs @@ -91,8 +91,7 @@ fn utf16_from_utf8(utf8_data_in: &[u8], offset: &mut usize) -> u32 { let mut surrogate_pair: u32 = 0x00; surrogate_pair |= ((code_point >> 10) + 0xd7c0) & 0xFFFF; - surrogate_pair |= ((code_point & 0x03FF) + 0xdc80) << 16; - return surrogate_pair; + surrogate_pair | ((code_point & 0x03FF) + 0xdc80) << 16 } #[inline(always)] @@ -223,6 +222,12 @@ pub struct Options { pub replace_bad_surrogates: bool, } +impl Default for Options { + fn default() -> Self { + Options::new() + } +} + impl Options { pub fn new() -> Options { Options { @@ -244,7 +249,7 @@ impl Options { fn convert_utf16_to_mutf8(utf16_in: &[u16], options: &Options, mut append: Append) where - Append: FnMut(u8) -> (), + Append: FnMut(u8), { let mut in_idx = 0; while in_idx < utf16_in.len() { @@ -262,7 +267,7 @@ where && in_idx + 1 != utf16_in.len() && !is_trail(utf16_in[in_idx + 1])) { - append('?' as u8); + append(b'?'); } else { let code_point = get_supplementary(ch, utf16_in[in_idx + 1]); in_idx += 1; From a4a3cd54a0658b7c97d884e343e7f45434e5910b Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Mon, 6 Apr 2026 09:21:32 +0200 Subject: [PATCH 41/46] feat: dex rewrite + CLI support --- Cargo.toml | 20 +- benches/edit.rs | 71 ++ pyproject.toml | 27 +- src/bin/dexrs/cli.rs | 360 ++++++++ src/bin/dexrs/commands/class.rs | 135 +++ src/bin/dexrs/commands/classes.rs | 57 ++ src/bin/dexrs/commands/disasm.rs | 152 ++++ src/bin/dexrs/commands/edit.rs | 221 +++++ src/bin/dexrs/commands/fields.rs | 49 ++ src/bin/dexrs/commands/info.rs | 87 ++ src/bin/dexrs/commands/inspect.rs | 561 ++++++++++++ src/bin/dexrs/commands/map.rs | 46 + src/bin/dexrs/commands/methods.rs | 50 ++ src/bin/dexrs/commands/mod.rs | 42 + src/bin/dexrs/commands/patch.rs | 46 + src/bin/dexrs/commands/strings.rs | 43 + src/bin/dexrs/commands/types.rs | 32 + src/bin/dexrs/commands/vdex.rs | 199 +++++ src/bin/dexrs/highlight.rs | 89 ++ src/bin/dexrs/main.rs | 47 + src/bin/dexrs/output.rs | 109 +++ src/bin/dexrs/tui/app.rs | 977 +++++++++++++++++++++ src/bin/dexrs/tui/events.rs | 226 +++++ src/bin/dexrs/tui/mod.rs | 3 + src/bin/dexrs/tui/ui.rs | 703 +++++++++++++++ src/error.rs | 58 +- src/file/builder.rs | 1323 +++++++++++++++++++++++++++++ src/file/class_accessor.rs | 8 +- src/file/code_item_accessors.rs | 138 ++- src/file/compact_dex.rs | 120 +++ src/file/container.rs | 41 +- src/file/debug.rs | 187 +++- src/file/dex_file.rs | 991 +++++++++++++++++++++ src/file/dump.rs | 1072 ++++++++++++++++------- src/file/editor.rs | 854 +++++++++++++++++++ src/file/instruction.rs | 7 +- src/file/ir.rs | 528 ++++++++++++ src/file/mod.rs | 844 +----------------- src/file/patch.rs | 268 ++++++ src/file/signature.rs | 47 + src/file/structs.rs | 22 - src/file/type_lookup_table.rs | 42 + src/file/verifier.rs | 57 +- src/file/writer.rs | 1113 ++++++++++++++++++++++++ src/leb128.rs | 23 - src/lib.rs | 27 +- src/primitive.rs | 151 ++++ src/py.rs | 10 + src/py/builder.rs | 439 ++++++++++ src/py/class_accessor.rs | 5 + src/py/container.rs | 5 + src/py/editor.rs | 152 ++++ src/py/error.rs | 28 + src/py/file.rs | 373 +++++--- src/py/leb128.rs | 19 + src/py/primitive.rs | 105 +++ src/py/structs.rs | 63 ++ src/py/type_lookup_table.rs | 35 + src/py/utf.rs | 33 + src/utf.rs | 48 +- src/vdex/mod.rs | 695 +++++++++++++++ 61 files changed, 12782 insertions(+), 1501 deletions(-) create mode 100644 benches/edit.rs create mode 100644 src/bin/dexrs/cli.rs create mode 100644 src/bin/dexrs/commands/class.rs create mode 100644 src/bin/dexrs/commands/classes.rs create mode 100644 src/bin/dexrs/commands/disasm.rs create mode 100644 src/bin/dexrs/commands/edit.rs create mode 100644 src/bin/dexrs/commands/fields.rs create mode 100644 src/bin/dexrs/commands/info.rs create mode 100644 src/bin/dexrs/commands/inspect.rs create mode 100644 src/bin/dexrs/commands/map.rs create mode 100644 src/bin/dexrs/commands/methods.rs create mode 100644 src/bin/dexrs/commands/mod.rs create mode 100644 src/bin/dexrs/commands/patch.rs create mode 100644 src/bin/dexrs/commands/strings.rs create mode 100644 src/bin/dexrs/commands/types.rs create mode 100644 src/bin/dexrs/commands/vdex.rs create mode 100644 src/bin/dexrs/highlight.rs create mode 100644 src/bin/dexrs/main.rs create mode 100644 src/bin/dexrs/output.rs create mode 100644 src/bin/dexrs/tui/app.rs create mode 100644 src/bin/dexrs/tui/events.rs create mode 100644 src/bin/dexrs/tui/mod.rs create mode 100644 src/bin/dexrs/tui/ui.rs create mode 100644 src/file/builder.rs create mode 100644 src/file/compact_dex.rs create mode 100644 src/file/dex_file.rs create mode 100644 src/file/editor.rs create mode 100644 src/file/ir.rs create mode 100644 src/file/patch.rs create mode 100644 src/file/signature.rs create mode 100644 src/file/type_lookup_table.rs create mode 100644 src/file/writer.rs create mode 100644 src/primitive.rs create mode 100644 src/py/builder.rs create mode 100644 src/py/class_accessor.rs create mode 100644 src/py/container.rs create mode 100644 src/py/editor.rs create mode 100644 src/py/error.rs create mode 100644 src/py/leb128.rs create mode 100644 src/py/primitive.rs create mode 100644 src/py/structs.rs create mode 100644 src/py/type_lookup_table.rs create mode 100644 src/py/utf.rs create mode 100644 src/vdex/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 07b38f9..ebba7e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,29 +1,45 @@ [package] name = "dexrs" -version = "0.1.0" +version = "1.0.0" edition = "2021" [dependencies] adler32 = "1.2.0" +anyhow = "1" +clap = { version = "4", features = ["derive"] } +comfy-table = "7" +crossterm = "0.28" +ratatui = { version = "0.29", optional = true } leb128fmt = "0.1.0" memmap2 = "0.9.5" -openssl = "0.10.64" plain = "0.2.3" pyo3 = { version = "0.23.4", optional = true, features = ["extension-module"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" thiserror = "2.0.11" varint-simd = "0.4.1" [features] default = [] python = ["pyo3"] +vdex = [] +tui = ["ratatui"] [lib] name = "dexrs" crate-type = ["cdylib", "rlib"] +[[bin]] +name = "dexrs" +path = "src/bin/dexrs/main.rs" + [dev-dependencies] criterion = { version = "0.5", features = ["html_reports"] } [[bench]] name = "parse" harness = false + +[[bench]] +name = "edit" +harness = false diff --git a/benches/edit.rs b/benches/edit.rs new file mode 100644 index 0000000..40e4728 --- /dev/null +++ b/benches/edit.rs @@ -0,0 +1,71 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use dexrs::file::{patch::update_checksum, DexEditor, DexFile, DexLocation}; + +const PRIME: &[u8] = include_bytes!("../tests/prime/prime.dex"); + +fn bench_editor_from_bytes(c: &mut Criterion) { + c.bench_function("editor_from_bytes", |b| { + b.iter(|| black_box(DexEditor::from_bytes(PRIME.to_vec()).unwrap())) + }); +} + +fn bench_set_class_flags(c: &mut Criterion) { + c.bench_function("set_class_flags_and_build", |b| { + b.iter(|| { + let mut ed = DexEditor::from_bytes(PRIME.to_vec()).unwrap(); + ed.set_class_access_flags(black_box("Lprime/prime;"), black_box(0x0011u32)).unwrap(); + black_box(ed.build().unwrap()); + }) + }); +} + +fn bench_rename_same_length(c: &mut Criterion) { + c.bench_function("rename_class_same_length", |b| { + b.iter(|| { + let mut ed = DexEditor::from_bytes(PRIME.to_vec()).unwrap(); + ed.rename_class(black_box("Lprime/prime;"), black_box("Lprime/other;")).unwrap(); + black_box(ed.build().unwrap()); + }) + }); +} + +fn bench_rename_different_length(c: &mut Criterion) { + c.bench_function("rename_class_different_length", |b| { + b.iter(|| { + let mut ed = DexEditor::from_bytes(PRIME.to_vec()).unwrap(); + ed.rename_class(black_box("Lprime/prime;"), black_box("Lprime/renamed;")).unwrap(); + black_box(ed.build().unwrap()); + }) + }); +} + +fn bench_update_checksum(c: &mut Criterion) { + c.bench_function("update_checksum", |b| { + let mut buf = PRIME.to_vec(); + b.iter(|| update_checksum(black_box(&mut buf))) + }); +} + +fn bench_full_pipeline(c: &mut Criterion) { + c.bench_function("full_edit_pipeline", |b| { + b.iter(|| { + let mut ed = DexEditor::from_bytes(PRIME.to_vec()).unwrap(); + ed.set_class_access_flags("Lprime/prime;", 0x0011).unwrap(); + ed.rename_class("Lprime/prime;", "Lprime/renamed;").unwrap(); + let bytes = ed.build().unwrap(); + let dex = DexFile::from_raw_parts(black_box(&bytes), DexLocation::InMemory).unwrap(); + black_box(dex.num_class_defs()); + }) + }); +} + +criterion_group!( + benches, + bench_editor_from_bytes, + bench_set_class_flags, + bench_rename_same_length, + bench_rename_different_length, + bench_update_checksum, + bench_full_pipeline, +); +criterion_main!(benches); diff --git a/pyproject.toml b/pyproject.toml index eed46a7..9397161 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "dexrs" -requires-python = ">=3.8" +requires-python = ">=3.10" classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", @@ -12,10 +12,33 @@ classifiers = [ ] dynamic = ["version"] +[dependency-groups] +dev = [ + "maturin>=1.8,<2.0", + "pytest>=8", + "mypy>=1.10", + "ruff>=0.9", +] [tool.maturin] # allows us to use `cargo install --features python` directly features = ["pyo3/extension-module", "dexrs/python"] python-packages = ["dexrs"] python-source = "python" -module-name = "dexrs._internal" \ No newline at end of file +module-name = "dexrs._internal" + +[tool.pytest.ini_options] +testpaths = ["python/tests"] + +[tool.mypy] +python_version = "3.10" +strict = true +mypy_path = "python" +packages = ["dexrs"] + +[tool.ruff] +src = ["python"] +target-version = "py310" + +[tool.ruff.lint] +select = ["E", "F", "W", "I"] \ No newline at end of file diff --git a/src/bin/dexrs/cli.rs b/src/bin/dexrs/cli.rs new file mode 100644 index 0000000..4511e4f --- /dev/null +++ b/src/bin/dexrs/cli.rs @@ -0,0 +1,360 @@ +use clap::{Args, Parser, Subcommand}; + +#[derive(Parser)] +#[command( + name = "dexrs", + about = "Inspect and analyse Android DEX files", + version +)] +pub struct Cli { + #[command(subcommand)] + pub command: Command, +} + +/// Flags shared by every subcommand that reads a DEX file. +#[derive(Args, Clone)] +pub struct DexArgs { + /// Path to the DEX file + pub file: std::path::PathBuf, + + /// Emit JSON instead of human-readable output + #[arg(long, global = false)] + pub json: bool, + + /// Disable ANSI colour in output + #[arg(long, global = false)] + pub no_color: bool, + + /// Skip DEX file verification + #[arg(long, global = false)] + pub no_verify: bool, +} + +#[derive(Subcommand)] +pub enum Command { + /// Show file header: magic, version, checksums, section counts + Info(InfoArgs), + /// Show the DEX section map (type, offset, size, count) + Map(MapArgs), + /// List all classes (optionally filtered) + Classes(ClassesArgs), + /// Show full details of a single class + Class(ClassArgs), + /// List all methods (optionally filtered by class) + Methods(MethodsArgs), + /// List all fields (optionally filtered by class) + Fields(FieldsArgs), + /// Disassemble a method to Dalvik bytecode + Disasm(DisasmArgs), + /// Dump all strings from the string pool + Strings(StringsArgs), + /// List all type descriptors + Types(TypesArgs), + /// Patch access flags on a class definition in-place (updates checksum) + Patch(PatchArgs), + /// Structural DEX edits written to an output file + Edit(EditArgs), + /// Launch the interactive TUI inspector + #[cfg(feature = "tui")] + Inspect(InspectArgs), + /// Inspect and extract VDEX files (EXPERIMENTAL) + #[cfg(feature = "vdex")] + Vdex(VdexArgs), +} + +#[derive(Args)] +pub struct InfoArgs { + #[command(flatten)] + pub dex: DexArgs, +} + +#[derive(Args)] +pub struct MapArgs { + #[command(flatten)] + pub dex: DexArgs, +} + +#[derive(Args)] +pub struct ClassesArgs { + #[command(flatten)] + pub dex: DexArgs, + /// Filter classes by substring or glob pattern (e.g. "com.example.*") + #[arg(long, short = 'f')] + pub filter: Option, +} + +#[derive(Args)] +pub struct ClassArgs { + #[command(flatten)] + pub dex: DexArgs, + /// Fully-qualified class name or descriptor (e.g. "com.example.Main" or "Lcom/example/Main;") + pub class: String, +} + +#[derive(Args)] +pub struct MethodsArgs { + #[command(flatten)] + pub dex: DexArgs, + /// Only show methods belonging to this class + #[arg(long, short = 'c')] + pub class: Option, +} + +#[derive(Args)] +pub struct FieldsArgs { + #[command(flatten)] + pub dex: DexArgs, + /// Only show fields belonging to this class + #[arg(long, short = 'c')] + pub class: Option, +} + +#[derive(Args)] +pub struct DisasmArgs { + #[command(flatten)] + pub dex: DexArgs, + /// Method to disassemble: "com.example.Foo#methodName" or descriptor form + pub method: String, +} + +#[derive(Args)] +pub struct StringsArgs { + #[command(flatten)] + pub dex: DexArgs, + /// Only show strings containing this substring + #[arg(long, short = 'f')] + pub filter: Option, +} + +#[derive(Args)] +pub struct TypesArgs { + #[command(flatten)] + pub dex: DexArgs, +} + +#[cfg(feature = "tui")] +#[derive(Args)] +pub struct InspectArgs { + /// Path to the DEX file + pub file: std::path::PathBuf, + /// Skip DEX file verification + #[arg(long)] + pub no_verify: bool, + /// Write edited DEX to this path (enables in-TUI editing with [e] and [f]) + #[arg(long, short = 'o')] + pub output: Option, +} + +// --- patch ------------------------------------------------------------------- + +#[derive(Args)] +pub struct PatchArgs { + #[command(subcommand)] + pub command: PatchCommand, +} + +#[derive(Subcommand)] +pub enum PatchCommand { + /// Patch access flags on a class definition + Flags(PatchFlagsArgs), + /// Overwrite a single instruction word (u16) in a code item + Insn(PatchInsnArgs), +} + +#[derive(Args)] +pub struct PatchFlagsArgs { + /// Path to the DEX file (modified in-place) + pub file: std::path::PathBuf, + /// Fully-qualified class name or descriptor + #[arg(long, short = 'c')] + pub class: String, + /// New access flags value (decimal or 0x-prefixed hex) + #[arg(long)] + pub flags: String, +} + +#[derive(Args)] +pub struct PatchInsnArgs { + /// Path to the DEX file (modified in-place) + pub file: std::path::PathBuf, + /// Byte offset of the code item in the file (decimal or 0x-prefixed hex) + #[arg(long)] + pub code_offset: String, + /// Code-unit PC within the code item (decimal) + #[arg(long)] + pub pc: u32, + /// Replacement instruction word (decimal or 0x-prefixed hex) + #[arg(long)] + pub word: String, +} + +// --- edit -------------------------------------------------------------------- + +#[derive(Args)] +pub struct EditArgs { + #[command(subcommand)] + pub command: EditCommand, +} + +#[derive(Subcommand)] +pub enum EditCommand { + /// Rename a class (updates string pool and all cross-references) + RenameClass(EditRenameClassArgs), + /// Set access flags on a class + SetFlags(EditSetFlagsArgs), + /// Set access flags on a method + SetMethodFlags(EditSetMethodFlagsArgs), + /// Remove hidden API restriction flags + ClearHiddenapi(EditClearHiddenapiArgs), + /// Build a new DEX by assembling classes from a smali-like text description + BuildDex(EditBuildDexArgs), +} + +#[derive(Args)] +pub struct EditRenameClassArgs { + pub file: std::path::PathBuf, + pub old_name: String, + pub new_name: String, + #[arg(long, short = 'o')] + pub output: std::path::PathBuf, +} + +#[derive(Args)] +pub struct EditSetFlagsArgs { + pub file: std::path::PathBuf, + #[arg(long, short = 'c')] + pub class: String, + /// New access flags (decimal or 0x-prefixed hex) + #[arg(long)] + pub flags: String, + #[arg(long, short = 'o')] + pub output: std::path::PathBuf, +} + +#[derive(Args)] +pub struct EditSetMethodFlagsArgs { + pub file: std::path::PathBuf, + /// Class descriptor or dotted name + #[arg(long, short = 'c')] + pub class: String, + /// Method name (without signature) + #[arg(long, short = 'm')] + pub method: String, + /// New access flags (decimal or 0x-prefixed hex) + #[arg(long)] + pub flags: String, + #[arg(long, short = 'o')] + pub output: std::path::PathBuf, +} + +#[derive(Args)] +pub struct EditClearHiddenapiArgs { + pub file: std::path::PathBuf, + #[arg(long, short = 'o')] + pub output: std::path::PathBuf, +} + +// --- edit build-dex ----------------------------------------------------------- + +/// Build a new DEX file from a plain-text class/method description. +/// +/// The input file format (one directive per line): +/// +/// ```text +/// .class Lcom/example/Hello; public +/// .super Ljava/lang/Object; +/// .method main ([Ljava/lang/String;)V public static +/// .registers 3 1 2 +/// sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream; +/// const-string v1, "Hello!" +/// invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V +/// return-void +/// .end method +/// .end class +/// ``` +#[derive(Args)] +pub struct EditBuildDexArgs { + /// Path to the plain-text class description (use `-` for stdin) + pub input: String, + /// Output DEX file path + #[arg(long, short = 'o')] + pub output: std::path::PathBuf, + /// DEX version to target (default: 35) + #[arg(long, default_value = "35")] + pub dex_version: u32, +} + +// --- vdex --------------------------------------------------------------------- + +#[cfg(feature = "vdex")] +#[derive(Args)] +pub struct VdexArgs { + #[command(subcommand)] + pub command: VdexCommand, +} + +#[cfg(feature = "vdex")] +#[derive(Subcommand)] +pub enum VdexCommand { + /// Show the VDEX file header, sections, and embedded DEX checksums + Info(VdexInfoArgs), + /// List all embedded DEX files with index, checksum, and size + List(VdexListArgs), + /// Extract an embedded DEX file to disk + Extract(VdexExtractArgs), + /// Launch the interactive TUI inspector on an embedded DEX + #[cfg(feature = "tui")] + Inspect(VdexInspectArgs), +} + +#[cfg(feature = "vdex")] +#[derive(Args)] +pub struct VdexInfoArgs { + /// Path to the VDEX file + pub file: std::path::PathBuf, + /// Emit JSON instead of human-readable output + #[arg(long)] + pub json: bool, + /// Disable ANSI colour in output + #[arg(long)] + pub no_color: bool, +} + +#[cfg(feature = "vdex")] +#[derive(Args)] +pub struct VdexListArgs { + /// Path to the VDEX file + pub file: std::path::PathBuf, + /// Emit JSON instead of human-readable output + #[arg(long)] + pub json: bool, + /// Disable ANSI colour in output + #[arg(long)] + pub no_color: bool, +} + +#[cfg(feature = "vdex")] +#[derive(Args)] +pub struct VdexExtractArgs { + /// Path to the VDEX file + pub file: std::path::PathBuf, + /// Zero-based index of the DEX file to extract + #[arg(long, short = 'i', default_value = "0")] + pub index: u32, + /// Output path for the extracted DEX file + #[arg(long, short = 'o')] + pub output: std::path::PathBuf, +} + +#[cfg(all(feature = "vdex", feature = "tui"))] +#[derive(Args)] +pub struct VdexInspectArgs { + /// Path to the VDEX file + pub file: std::path::PathBuf, + /// Zero-based index of the embedded DEX to inspect (default: 0, or pick interactively) + #[arg(long, short = 'i')] + pub index: Option, + /// Write edited DEX to this path (enables in-TUI editing) + #[arg(long, short = 'o')] + pub output: Option, +} diff --git a/src/bin/dexrs/commands/class.rs b/src/bin/dexrs/commands/class.rs new file mode 100644 index 0000000..7d0fed5 --- /dev/null +++ b/src/bin/dexrs/commands/class.rs @@ -0,0 +1,135 @@ +use anyhow::Result; +use serde_json::json; + +use dexrs::file::{dump::prettify, ClassAccessor}; + +use crate::{ + cli::ClassArgs, + commands::with_dex, + output::{format_flags, pretty_type, to_descriptor, Printer}, +}; + +pub fn run(args: &ClassArgs) -> Result<()> { + let p = Printer::new(args.dex.json, args.dex.no_color); + let target_desc = to_descriptor(&args.class); + + with_dex(&args.dex, |dex| { + // Find class by descriptor + let class_def = { + let mut found = None; + for idx in 0..dex.num_class_defs() { + let cd = dex.get_class_def(idx)?; + let desc = dex.get_class_desc_utf16_lossy(cd)?; + if desc == target_desc + || dexrs::desc_names::pretty_desc(&desc) == args.class + { + found = Some(cd); + break; + } + } + match found { + Some(c) => c, + None => { + p.error(&format!("class '{}' not found", args.class)); + return Ok(()); + } + } + }; + + let desc = dex.get_class_desc_utf16_lossy(class_def)?; + let class_name = pretty_type(&desc); + let flags = format_flags(class_def.access_flags); + + let superclass = if class_def.superclass_idx != u16::MAX { + dex.get_type_desc_utf16_lossy_at(class_def.superclass_idx) + .map(|s| pretty_type(&s)) + .unwrap_or_default() + } else { + String::new() + }; + + let interfaces: Vec = if let Ok(Some(list)) = + dex.get_interfaces_list(class_def) + { + list.iter() + .map(|t| { + dex.get_type_desc_utf16_lossy_at(t.type_idx) + .map(|s| pretty_type(&s)) + .unwrap_or_else(|_| format!("type@{}", t.type_idx)) + }) + .collect() + } else { + vec![] + }; + + let accessor: Option> = dex.get_class_accessor(class_def)?; + + let mut methods: Vec<(String, String, u32)> = vec![]; + let mut fields: Vec<(String, String, String)> = vec![]; + + if let Some(acc) = &accessor { + for m in acc.get_methods()? { + let name = dex.pretty_method_at(m.index, prettify::Method::WithSig); + let kind = if m.is_static_or_direct { "direct" } else { "virtual" }; + methods.push((name, kind.to_string(), m.code_offset)); + } + for f in acc.get_fields() { + let name = dex.pretty_field_at(f.index, prettify::Field::WithType); + let kind = if f.is_static { "static" } else { "instance" }; + fields.push((name, kind.to_string(), format_flags(f.access_flags))); + } + } + + if p.json { + println!( + "{}", + json!({ + "class": class_name, + "descriptor": desc, + "flags": flags, + "superclass": superclass, + "interfaces": interfaces, + "methods": methods.iter().map(|(n, k, off)| json!({ + "name": n, + "kind": k, + "code_offset": off, + })).collect::>(), + "fields": fields.iter().map(|(n, k, f)| json!({ + "name": n, + "kind": k, + "flags": f, + })).collect::>(), + }) + ); + return Ok(()); + } + + p.section("Class"); + p.kv("Name:", &class_name); + p.kv("Descriptor:", &desc); + p.kv("Access flags:", &flags); + if !superclass.is_empty() { + p.kv("Superclass:", &superclass); + } + if !interfaces.is_empty() { + p.kv("Interfaces:", &interfaces.join(", ")); + } + + p.section(&format!("Methods ({})", methods.len())); + for (name, kind, _) in &methods { + p.item(&format!("[{kind}] {name}")); + } + + p.section(&format!("Fields ({})", fields.len())); + for (name, kind, flags) in &fields { + let flag_str = if flags.is_empty() { + String::new() + } else { + format!(" [{flags}]") + }; + p.item(&format!("[{kind}] {name}{flag_str}")); + } + + Ok(()) + }) +} diff --git a/src/bin/dexrs/commands/classes.rs b/src/bin/dexrs/commands/classes.rs new file mode 100644 index 0000000..045abb2 --- /dev/null +++ b/src/bin/dexrs/commands/classes.rs @@ -0,0 +1,57 @@ +use anyhow::Result; +use serde_json::json; + +use crate::{cli::ClassesArgs, commands::with_dex, output::Printer}; + +pub fn run(args: &ClassesArgs) -> Result<()> { + let p = Printer::new(args.dex.json, args.dex.no_color); + let filter = args.filter.as_deref().map(str::to_lowercase); + + with_dex(&args.dex, |dex| { + let count = dex.num_class_defs(); + let mut rows: Vec> = Vec::new(); + + for idx in 0..count { + let cd = dex.get_class_def(idx)?; + let desc = dex.get_class_desc_utf16_lossy(cd)?; + let pretty = dexrs::desc_names::pretty_desc(&desc); + + if let Some(ref f) = filter { + if !pretty.to_lowercase().contains(f.as_str()) + && !desc.to_lowercase().contains(f.as_str()) + { + continue; + } + } + + let flags = cd.access_flags; + let superclass = if cd.superclass_idx != u16::MAX { + dex.get_type_desc_utf16_lossy_at(cd.superclass_idx) + .map(|s| dexrs::desc_names::pretty_desc(&s)) + .unwrap_or_default() + } else { + String::new() + }; + + rows.push(vec![ + pretty, + crate::output::format_flags(flags), + superclass, + ]); + } + + if p.json { + let entries: Vec<_> = rows + .iter() + .map(|r| json!({ "class": r[0], "flags": r[1], "superclass": r[2] })) + .collect(); + println!("{}", json!({ "classes": entries, "total": rows.len() })); + return Ok(()); + } + + let total = rows.len(); + p.table(&["Class", "Access Flags", "Superclass"], rows); + println!("\n {total} class(es)"); + Ok(()) + }) +} diff --git a/src/bin/dexrs/commands/disasm.rs b/src/bin/dexrs/commands/disasm.rs new file mode 100644 index 0000000..47c296c --- /dev/null +++ b/src/bin/dexrs/commands/disasm.rs @@ -0,0 +1,152 @@ +use anyhow::Result; +use serde_json::json; + +use dexrs::file::dump::prettify; + +use crossterm::style::Stylize; + +use crate::{ + cli::DisasmArgs, + commands::with_dex, + highlight, + output::{format_flags, to_descriptor, Printer}, +}; + +/// Parse "com.example.Foo#bar" or "com.example.Foo#bar(int, String)" into +/// (descriptor, method_name_prefix). +fn parse_method_spec(spec: &str) -> (String, String) { + if let Some(pos) = spec.rfind('#') { + let class_part = &spec[..pos]; + let method_part = &spec[pos + 1..]; + (to_descriptor(class_part), method_part.to_string()) + } else { + // No '#', treat entire thing as a method name substring + (String::new(), spec.to_string()) + } +} + +pub fn run(args: &DisasmArgs) -> Result<()> { + let p = Printer::new(args.dex.json, args.dex.no_color); + let (class_desc, method_name_prefix) = parse_method_spec(&args.method); + + with_dex(&args.dex, |dex| { + for idx in 0..dex.num_class_defs() { + let cd = dex.get_class_def(idx)?; + + if !class_desc.is_empty() { + let desc = dex.get_class_desc_utf16_lossy(cd)?; + if desc != class_desc { + continue; + } + } + + let accessor = dex.get_class_accessor(cd)?; + let acc = match accessor { + Some(a) => a, + None => continue, + }; + + for m in acc.get_methods()? { + let method_id = dex.get_method_id(m.index)?; + let name = dex.get_str_lossy_at(method_id.name_idx)?; + if !name.contains(&method_name_prefix) { + continue; + } + + let full_name = dex.pretty_method_at(m.index, prettify::Method::WithSig); + let flags = format_flags(m.access_flags); + let code_off = m.code_offset; + + disasm_method( + dex, + &full_name, + &flags, + code_off, + &p, + )?; + } + } + Ok(()) + }) +} + +fn disasm_method<'a, C>( + dex: &dexrs::file::DexFile<'a, C>, + full_name: &str, + flags: &str, + code_off: u32, + p: &Printer, +) -> Result<()> +where + C: dexrs::file::DexContainer<'a>, +{ + if p.json { + let insns = collect_disasm(dex, code_off)?; + println!( + "{}", + json!({ + "method": full_name, + "flags": flags, + "code_offset": code_off, + "instructions": insns, + }) + ); + return Ok(()); + } + + p.section(&format!(".method {flags} {full_name}")); + + if code_off == 0 { + p.item("(abstract / native — no code)"); + return Ok(()); + } + + let ca = dex.get_code_item_accessor(code_off)?; + p.item(&format!( + " registers: {} ins: {} outs: {} tries: {}", + ca.registers_size(), + ca.ins_size(), + ca.outs_size(), + ca.tries_size(), + )); + println!(); + + let mut pc: u32 = 0; + for insn in ca { + let styled = insn.to_styled(Some(dex)).unwrap_or_else(|_| vec![dexrs::file::dump::Span { + text: "".to_string(), + hl: dexrs::file::dump::Highlight::Plain, + }]); + let colored = highlight::to_cli_string(&styled, p.color); + if p.color { + println!(" {} {colored}", format!("{pc:04x}").dim()); + } else { + println!(" {pc:04x} {colored}"); + } + pc += insn.size_in_code_units() as u32; + } + + println!(".end method\n"); + Ok(()) +} + +fn collect_disasm<'a, C>( + dex: &dexrs::file::DexFile<'a, C>, + code_off: u32, +) -> Result> +where + C: dexrs::file::DexContainer<'a>, +{ + if code_off == 0 { + return Ok(vec![]); + } + let ca = dex.get_code_item_accessor(code_off)?; + let mut insns = Vec::new(); + let mut pc: u32 = 0; + for insn in ca { + let text = insn.to_string(Some(dex)).unwrap_or_else(|_| "".to_string()); + insns.push(json!({ "pc": pc, "text": text })); + pc += insn.size_in_code_units() as u32; + } + Ok(insns) +} diff --git a/src/bin/dexrs/commands/edit.rs b/src/bin/dexrs/commands/edit.rs new file mode 100644 index 0000000..3b698e3 --- /dev/null +++ b/src/bin/dexrs/commands/edit.rs @@ -0,0 +1,221 @@ +use anyhow::{Context, Result}; +use std::io::{self, BufRead}; + +use crate::cli::{ + EditBuildDexArgs, EditClearHiddenapiArgs, EditRenameClassArgs, EditSetFlagsArgs, + EditSetMethodFlagsArgs, +}; + +fn parse_int(s: &str) -> Result { + if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")) { + Ok(u32::from_str_radix(hex, 16).context("invalid hex value")?) + } else { + Ok(s.parse::().context("invalid decimal value")?) + } +} + +pub fn run_rename_class(args: &EditRenameClassArgs) -> Result<()> { + let mut editor = dexrs::file::DexEditor::from_file(&args.file) + .with_context(|| format!("cannot open '{}'", args.file.display()))?; + editor + .rename_class(&args.old_name, &args.new_name) + .context("rename_class")?; + editor + .write_to(&args.output) + .with_context(|| format!("cannot write '{}'", args.output.display()))?; + eprintln!("written: {}", args.output.display()); + Ok(()) +} + +pub fn run_set_flags(args: &EditSetFlagsArgs) -> Result<()> { + let flags = parse_int(&args.flags).context("--flags")?; + let mut editor = dexrs::file::DexEditor::from_file(&args.file) + .with_context(|| format!("cannot open '{}'", args.file.display()))?; + editor + .set_class_access_flags(&args.class, flags) + .context("set_class_access_flags")?; + editor + .write_to(&args.output) + .with_context(|| format!("cannot write '{}'", args.output.display()))?; + eprintln!("written: {}", args.output.display()); + Ok(()) +} + +pub fn run_set_method_flags(args: &EditSetMethodFlagsArgs) -> Result<()> { + let flags = parse_int(&args.flags).context("--flags")?; + let mut editor = dexrs::file::DexEditor::from_file(&args.file) + .with_context(|| format!("cannot open '{}'", args.file.display()))?; + editor + .set_method_access_flags(&args.class, &args.method, flags) + .context("set_method_access_flags")?; + editor + .write_to(&args.output) + .with_context(|| format!("cannot write '{}'", args.output.display()))?; + eprintln!("written: {}", args.output.display()); + Ok(()) +} + +pub fn run_clear_hiddenapi(args: &EditClearHiddenapiArgs) -> Result<()> { + let mut editor = dexrs::file::DexEditor::from_file(&args.file) + .with_context(|| format!("cannot open '{}'", args.file.display()))?; + editor.clear_hiddenapi_flags().context("clear_hiddenapi_flags")?; + editor + .write_to(&args.output) + .with_context(|| format!("cannot write '{}'", args.output.display()))?; + eprintln!("written: {}", args.output.display()); + Ok(()) +} + +// --- build-dex ---------------------------------------------------------------- + +/// Assemble a new DEX file from a plain-text class/method description. +/// +/// **Input format** (lines starting with `#` are comments): +/// +/// ```text +/// .class Lcom/example/Hello; public +/// .super Ljava/lang/Object; +/// .source Hello.java +/// +/// .method main ([Ljava/lang/String;)V public static +/// .registers 3 1 2 +/// sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream; +/// const-string v1, "Hello!" +/// invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V +/// return-void +/// .end method +/// +/// .end class +/// ``` +pub fn run_build_dex(args: &EditBuildDexArgs) -> Result<()> { + let lines: Vec = if args.input == "-" { + io::stdin().lock().lines().collect::>().context("reading stdin")? + } else { + let f = std::fs::File::open(&args.input) + .with_context(|| format!("cannot open '{}'", args.input))?; + io::BufReader::new(f).lines().collect::>().context("reading input")? + }; + + let ir = parse_build_input(&lines, args.dex_version).context("parsing build input")?; + let bytes = dexrs::file::DexWriter::write(ir).context("assembling DEX")?; + std::fs::write(&args.output, &bytes) + .with_context(|| format!("cannot write '{}'", args.output.display()))?; + eprintln!("written: {} ({} bytes)", args.output.display(), bytes.len()); + Ok(()) +} + +fn parse_build_input(lines: &[String], version: u32) -> Result { + use dexrs::file::{builder::CodeBuilder, ir::{ClassDef, MethodDef, ProtoKey}, DexIr}; + + let mut ir = DexIr::new(version); + let mut class_stack: Vec = Vec::new(); + + struct MethodCtx { + name: String, + proto: ProtoKey, + access: u32, + code: Option, + is_direct: bool, + } + let mut method_ctx: Option = None; + + for (lineno, raw) in lines.iter().enumerate() { + let line = raw.trim(); + let line = if let Some(pos) = line.find('#') { &line[..pos] } else { line }.trim(); + if line.is_empty() { + continue; + } + + if let Some(rest) = line.strip_prefix(".class ") { + let parts: Vec<&str> = rest.split_whitespace().collect(); + let desc = parts.first().copied().context(".class needs descriptor")?; + let access = parse_access_flags(&parts[1..]); + class_stack.push(ClassDef::new(desc).access(access)); + + } else if let Some(rest) = line.strip_prefix(".super ") { + class_stack.last_mut().context(".super outside .class")?.superclass = + Some(rest.trim().to_string()); + + } else if let Some(rest) = line.strip_prefix(".source ") { + class_stack.last_mut().context(".source outside .class")?.source_file = + Some(rest.trim().to_string()); + + } else if let Some(rest) = line.strip_prefix(".implements ") { + class_stack.last_mut().context(".implements outside .class")?.interfaces + .push(rest.trim().to_string()); + + } else if line == ".end class" { + ir.add_class(class_stack.pop().context(".end class without .class")?); + + } else if let Some(rest) = line.strip_prefix(".method ") { + anyhow::ensure!(method_ctx.is_none(), "line {}: nested .method", lineno + 1); + let parts: Vec<&str> = rest.split_whitespace().collect(); + anyhow::ensure!(parts.len() >= 2, "line {}: .method needs name and descriptor", lineno + 1); + let name = parts[0]; + let proto = ProtoKey::from_descriptor(parts[1]) + .ok_or_else(|| anyhow::anyhow!("line {}: invalid descriptor {:?}", lineno + 1, parts[1]))?; + let access = parse_access_flags(&parts[2..]); + let is_direct = name.starts_with('<') + || access & 0x0008 != 0 + || access & 0x0002 != 0; + method_ctx = Some(MethodCtx { name: name.to_string(), proto, access, code: None, is_direct }); + + } else if let Some(rest) = line.strip_prefix(".registers ") { + let ctx = method_ctx.as_mut().context(".registers outside .method")?; + let nums: Vec = rest + .split_whitespace() + .map(|s| s.parse::().context("invalid number")) + .collect::>()?; + anyhow::ensure!(nums.len() == 3, "line {}: .registers needs exactly 3 values", lineno + 1); + ctx.code = Some(CodeBuilder::new(nums[0], nums[1], nums[2])); + + } else if line == ".end method" { + let ctx = method_ctx.take().context(".end method without .method")?; + let code = ctx.code.map(|cb| cb.build()).transpose() + .with_context(|| format!("assembling method {:?}", ctx.name))?; + let mut method = MethodDef::new(ctx.name, ctx.proto).access(ctx.access); + if let Some(c) = code { method.code = Some(c); } + let cls = class_stack.last_mut().context(".end method outside .class")?; + if ctx.is_direct { cls.direct_methods.push(method); } else { cls.virtual_methods.push(method); } + + } else if let Some(ctx) = method_ctx.as_mut() { + let cb = ctx.code.get_or_insert_with(|| CodeBuilder::new(0, 0, 0)); + if let Some(lbl) = line.strip_prefix(':') { + cb.label(lbl); + } else { + cb.emit(line).with_context(|| format!("line {}: {:?}", lineno + 1, line))?; + } + + } else { + anyhow::bail!("line {}: unexpected directive {:?}", lineno + 1, line); + } + } + + anyhow::ensure!(class_stack.is_empty(), "unclosed .class block"); + Ok(ir) +} + +fn parse_access_flags(tokens: &[&str]) -> u32 { + tokens.iter().fold(0u32, |acc, t| { + acc | match *t { + "public" => 0x0001, + "private" => 0x0002, + "protected" => 0x0004, + "static" => 0x0008, + "final" => 0x0010, + "synchronized" => 0x0020, + "abstract" => 0x0400, + "interface" => 0x0200, + "native" => 0x0100, + "constructor" => 0x10000, + _ => { + if let Some(hex) = t.strip_prefix("0x").or_else(|| t.strip_prefix("0X")) { + u32::from_str_radix(hex, 16).unwrap_or(0) + } else { + t.parse().unwrap_or(0) + } + } + } + }) +} + diff --git a/src/bin/dexrs/commands/fields.rs b/src/bin/dexrs/commands/fields.rs new file mode 100644 index 0000000..86f91f7 --- /dev/null +++ b/src/bin/dexrs/commands/fields.rs @@ -0,0 +1,49 @@ +use anyhow::Result; +use serde_json::json; + +use dexrs::file::dump::prettify; + +use crate::{cli::FieldsArgs, commands::with_dex, output::{format_flags, to_descriptor, Printer}}; + +pub fn run(args: &FieldsArgs) -> Result<()> { + let p = Printer::new(args.dex.json, args.dex.no_color); + let class_filter = args.class.as_deref().map(to_descriptor); + + with_dex(&args.dex, |dex| { + let mut rows: Vec> = Vec::new(); + + for idx in 0..dex.num_class_defs() { + let cd = dex.get_class_def(idx)?; + + if let Some(ref filter_desc) = class_filter { + let desc = dex.get_class_desc_utf16_lossy(cd)?; + if &desc != filter_desc { + continue; + } + } + + let accessor = dex.get_class_accessor(cd)?; + if let Some(acc) = accessor { + for f in acc.get_fields() { + let name = dex.pretty_field_at(f.index, prettify::Field::WithType); + let kind = if f.is_static { "static" } else { "instance" }; + rows.push(vec![name, kind.to_string(), format_flags(f.access_flags)]); + } + } + } + + if p.json { + let entries: Vec<_> = rows + .iter() + .map(|r| json!({ "field": r[0], "kind": r[1], "flags": r[2] })) + .collect(); + println!("{}", json!({ "fields": entries, "total": rows.len() })); + return Ok(()); + } + + let total = rows.len(); + p.table(&["Field", "Kind", "Access Flags"], rows); + println!("\n {total} field(s)"); + Ok(()) + }) +} diff --git a/src/bin/dexrs/commands/info.rs b/src/bin/dexrs/commands/info.rs new file mode 100644 index 0000000..9832a29 --- /dev/null +++ b/src/bin/dexrs/commands/info.rs @@ -0,0 +1,87 @@ +use anyhow::Result; +use serde_json::json; + +use crate::{ + cli::InfoArgs, + commands::with_dex, + output::{format_flags, Printer}, +}; + +pub fn run(args: &InfoArgs) -> Result<()> { + let p = Printer::new(args.dex.json, args.dex.no_color); + with_dex(&args.dex, |dex| { + let h = dex.get_header(); + let magic = h.get_magic(); + let sig = h.get_signature(); + let sig_hex: String = sig.iter().map(|b| format!("{b:02x}")).collect::>().join(""); + + let format = if dex.is_compact_dex() { "Compact DEX (cdex)" } else { "Standard DEX" }; + let version = h.get_version(); + let location = dex.get_location(); + + if p.json { + println!( + "{}", + json!({ + "format": format, + "version": version, + "location": location.to_string(), + "magic": format!("{}", String::from_utf8_lossy(magic)), + "checksum": h.checksum, + "signature": sig_hex, + "file_size": h.file_size, + "header_size": h.header_size, + "endian_tag": format!("{:#010x}", h.endian_tag), + "map_off": h.map_off, + "strings": h.string_ids_size, + "types": h.type_ids_size, + "protos": h.proto_ids_size, + "fields": h.field_ids_size, + "methods": h.method_ids_size, + "classes": h.class_defs_size, + "data_size": h.data_size, + "data_off": h.data_off, + }) + ); + return Ok(()); + } + + p.section("File"); + p.kv("Format:", format); + p.kv("Version:", &version.to_string()); + p.kv("Location:", &location.to_string()); + p.kv("File size:", &format!("{} bytes", h.file_size)); + + p.section("Integrity"); + p.kv("Checksum:", &format!("{:#010x}", h.checksum)); + p.kv("SHA-1:", &sig_hex); + + p.section("Header"); + p.kv("Header size:", &format!("{} bytes", h.header_size)); + p.kv("Endian tag:", &format!("{:#010x}", h.endian_tag)); + p.kv("Map offset:", &format!("{:#010x}", h.map_off)); + p.kv("Data:", &format!("{} bytes @ {:#x}", h.data_size, h.data_off)); + p.kv("Link:", &format!("{} bytes @ {:#x}", h.link_size, h.link_off)); + + p.section("Counts"); + p.kv("Strings:", &h.string_ids_size.to_string()); + p.kv("Types:", &h.type_ids_size.to_string()); + p.kv("Protos:", &h.proto_ids_size.to_string()); + p.kv("Fields:", &h.field_ids_size.to_string()); + p.kv("Methods:", &h.method_ids_size.to_string()); + p.kv("Classes:", &h.class_defs_size.to_string()); + + p.section("Method handles / call sites"); + p.kv("Method handles:", &dex.num_method_handles().to_string()); + p.kv("Call site IDs:", &dex.num_call_site_ids().to_string()); + + // Access flags of the first class (just as a sample check) + if h.class_defs_size > 0 { + if let Ok(cd) = dex.get_class_def(0) { + let _flags = format_flags(cd.access_flags); + } + } + + Ok(()) + }) +} diff --git a/src/bin/dexrs/commands/inspect.rs b/src/bin/dexrs/commands/inspect.rs new file mode 100644 index 0000000..8feb7a3 --- /dev/null +++ b/src/bin/dexrs/commands/inspect.rs @@ -0,0 +1,561 @@ +use std::fs::File; +#[cfg(feature = "vdex")] +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +use anyhow::Result; +#[cfg(feature = "vdex")] +use anyhow::bail; +use crossterm::{ + event::{self, KeyCode, KeyModifiers}, + execute, + terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen}, +}; +use dexrs::file::{dump::prettify, verifier::VerifyPreset, DexContainer, DexFile, DexFileContainer, DexLocation}; +#[cfg(feature = "vdex")] +use dexrs::vdex::VdexFileContainer; +use ratatui::{backend::CrosstermBackend, Terminal}; +#[cfg(feature = "vdex")] +use ratatui::{ + layout::{Constraint, Layout}, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, List, ListItem, ListState, Paragraph}, + Frame, +}; + +use crate::{ + cli::InspectArgs, + tui::{ + app::{App, ClassEntry, MemberEntry, MemberKind}, + events::{handle_events_with_quit, Action}, + ui::{draw, draw_loading}, + }, +}; + +pub fn run(args: &InspectArgs) -> Result<()> { + let file = File::open(&args.file) + .map_err(|e| anyhow::anyhow!("cannot open '{}': {e}", args.file.display()))?; + let container = DexFileContainer::new(&file).verify(!args.no_verify); + let dex = container.open()?; + + let total_classes = dex.num_class_defs() as usize; + let file_name = args.file.file_name() + .and_then(|n| n.to_str()) + .unwrap_or("(unknown)") + .to_string(); + + // Enter TUI mode early so we can show a loading screen. + enable_raw_mode()?; + let mut stdout = std::io::stdout(); + execute!(stdout, EnterAlternateScreen)?; + let backend = CrosstermBackend::new(stdout); + let mut terminal = Terminal::new(backend)?; + + // Shared atomic progress counter updated by the build thread. + let progress = Arc::new(AtomicUsize::new(0)); + let progress_clone = Arc::clone(&progress); + + // Cancel flag: set by the loading UI loop when the user presses Esc / Ctrl+C. + let cancel = Arc::new(AtomicBool::new(false)); + let cancel_build = Arc::clone(&cancel); + + // Run build_app_state in a scoped thread so it can borrow `dex`. + let build_result: anyhow::Result, String)>> = + std::thread::scope(|s| { + let handle = s.spawn(|| { + build_app_state(&dex, &cancel_build, |current, _total| { + progress_clone.store(current, Ordering::Relaxed); + }) + }); + + // Render loading frames; poll for Esc / Ctrl+C to cancel. + let mut tick: u64 = 0; + loop { + let current = progress.load(Ordering::Relaxed); + terminal + .draw(|f| draw_loading(f, &file_name, current, total_classes, tick))?; + + // Drain all pending key events. + while event::poll(Duration::ZERO)? { + if let event::Event::Key(key) = event::read()? { + match key.code { + KeyCode::Esc | KeyCode::Char('q') => { + cancel.store(true, Ordering::Relaxed); + } + KeyCode::Char('c') + if key.modifiers.contains(KeyModifiers::CONTROL) => + { + cancel.store(true, Ordering::Relaxed); + } + _ => {} + } + } + } + + if handle.is_finished() { + break; + } + + std::thread::sleep(Duration::from_millis(40)); + tick = tick.wrapping_add(1); + } + + handle + .join() + .map_err(|_| anyhow::anyhow!("build thread panicked"))? + }); + + // Drop the mmap handles — all data is now owned by classes/file_info. + drop(dex); + drop(container); + drop(file); + + match build_result { + Err(e) => { + let _ = disable_raw_mode(); + let _ = execute!(terminal.backend_mut(), LeaveAlternateScreen); + Err(e) + } + Ok(None) => { + // User cancelled loading — restore terminal and exit cleanly. + disable_raw_mode()?; + execute!(terminal.backend_mut(), LeaveAlternateScreen)?; + Ok(()) + } + Ok(Some((classes, file_info))) => { + // Re-read bytes for lazy disassembly (OS page cache makes this fast). + let raw_bytes = std::fs::read(&args.file) + .map_err(|e| anyhow::anyhow!("cannot read '{}': {e}", args.file.display())) + .inspect_err(|_| { + let _ = disable_raw_mode(); + let _ = execute!(terminal.backend_mut(), LeaveAlternateScreen); + })?; + let app = App::new(classes, file_info, raw_bytes); + let app = app.with_editable(args.output.clone()); + run_tui_with_terminal(app, terminal) + } + } +} + +pub fn build_app_state<'a, C: DexContainer<'a>>( + dex: &DexFile<'a, C>, + cancel: &AtomicBool, + progress: impl Fn(usize, usize), +) -> Result, String)>> { + let h = dex.get_header(); + let file_info = format!( + "Format: {}\nVersion: {}\nFile: {} bytes\nClasses: {}\nMethods: {}\nStrings: {}", + if dex.is_compact_dex() { "Compact DEX" } else { "Standard DEX" }, + h.get_version(), + h.file_size, + h.class_defs_size, + h.method_ids_size, + h.string_ids_size, + ); + + let total = dex.num_class_defs() as usize; + let mut classes = Vec::with_capacity(total); + + for idx in 0..dex.num_class_defs() { + // Check for user-initiated cancel before each class. + if cancel.load(Ordering::Relaxed) { + return Ok(None); + } + progress(idx as usize, total); + let cd = match dex.get_class_def(idx) { + Ok(c) => c, + Err(_) => continue, + }; + + let descriptor = dex.get_class_desc_utf16_lossy(cd).unwrap_or_default(); + let pretty_name = dexrs::desc_names::pretty_desc(&descriptor); + + let (package, simple_name) = split_class_name(&pretty_name); + + let superclass_desc = if cd.superclass_idx != u16::MAX { + dex.get_type_desc_utf16_lossy_at(cd.superclass_idx).unwrap_or_default() + } else { + String::new() + }; + let superclass = if superclass_desc.is_empty() { + String::new() + } else { + dexrs::desc_names::pretty_desc(&superclass_desc) + }; + + let mut methods: Vec = Vec::new(); + let mut fields: Vec = Vec::new(); + + if let Ok(Some(acc)) = dex.get_class_accessor(cd) { + if let Ok(all_methods) = acc.get_methods() { + for m in all_methods { + let name = dex.pretty_method_at(m.index, prettify::Method::WithSig); + let kind = if m.is_static_or_direct { + MemberKind::DirectMethod + } else { + MemberKind::VirtualMethod + }; + let (raw_name, proto_or_type_desc) = dex + .get_method_id(m.index) + .ok() + .and_then(|mid| { + let raw = dex.get_str_lossy_at(mid.name_idx).ok()?; + let proto = dex.get_proto_id(mid.proto_idx).ok()?; + let ret = dex.get_type_desc_utf16_lossy_at(proto.return_type_idx).ok()?; + let mut desc = String::from("("); + if let Ok(Some(params)) = dex.get_type_list(proto.parameters_off) { + for tp in params { + desc.push_str(&dex.get_type_desc_utf16_lossy_at(tp.type_idx).unwrap_or_default()); + } + } + desc.push(')'); + desc.push_str(&ret); + Some((raw, desc)) + }) + .unwrap_or_else(|| (name.clone(), "()V".to_string())); + // Only read the code_item header (4 × u16) — no instruction scanning. + let code_info = build_code_info(dex, m.code_offset); + methods.push(MemberEntry { + name, + raw_name, + proto_or_type_desc, + kind, + access_flags: m.access_flags, + code_offset: m.code_offset, + code_info, + }); + } + } + + for f in acc.get_fields() { + let name = dex.pretty_field_at(f.index, prettify::Field::WithType); + let kind = if f.is_static { + MemberKind::StaticField + } else { + MemberKind::InstanceField + }; + let (raw_name, proto_or_type_desc) = dex + .get_field_id(f.index) + .ok() + .and_then(|fid| { + let raw = dex.get_str_lossy(dex.get_string_id(fid.name_idx).ok()?).ok()?; + let ftype = dex.get_type_desc_utf16_lossy_at(fid.type_idx).ok()?; + Some((raw, ftype)) + }) + .unwrap_or_else(|| (name.clone(), "Ljava/lang/Object;".to_string())); + fields.push(MemberEntry { + name, + raw_name, + proto_or_type_desc, + kind, + access_flags: f.access_flags, + code_offset: 0, + code_info: None, + }); + } + } + + classes.push(ClassEntry { + descriptor, + pretty_name, + package, + simple_name, + access_flags: cd.access_flags, + superclass, + superclass_desc, + methods, + fields, + }); + } + + Ok(Some((classes, file_info))) +} + +/// Fast code-item header read: registers, ins, outs, tries — zero instruction scanning. +fn build_code_info<'a, C: DexContainer<'a>>(dex: &DexFile<'a, C>, code_off: u32) -> Option { + if code_off == 0 { return None; } + let ca = dex.get_code_item_accessor(code_off).ok()?; + Some(format!( + "registers: {} ins: {} outs: {} tries: {}", + ca.registers_size(), + ca.ins_size(), + ca.outs_size(), + ca.tries_size(), + )) +} + +fn split_class_name(pretty: &str) -> (String, String) { + if let Some(pos) = pretty.rfind('.') { + (pretty[..pos].to_string(), pretty[pos + 1..].to_string()) + } else { + (String::new(), pretty.to_string()) + } +} + +/// Re-parse raw DEX bytes into `(classes, file_info)` for live refresh after edits. +pub fn build_app_state_from_bytes(bytes: &[u8]) -> anyhow::Result<(Vec, String)> { + let cancel = AtomicBool::new(false); + let dex = DexFile::open(&bytes, DexLocation::InMemory, VerifyPreset::None)?; + build_app_state(&dex, &cancel, |_, _| {})? + .ok_or_else(|| anyhow::anyhow!("cancelled")) +} + +fn run_tui_with_terminal( + mut app: App, + mut terminal: Terminal>, +) -> Result<()> { + let result = run_loop(&mut terminal, &mut app); + + disable_raw_mode()?; + execute!(terminal.backend_mut(), LeaveAlternateScreen)?; + terminal.show_cursor()?; + + result +} + +fn run_loop( + terminal: &mut Terminal>, + app: &mut App, +) -> Result<()> { + loop { + terminal.draw(|f| draw(f, app))?; + + match handle_events_with_quit(app)? { + Action::Quit => break, + Action::Continue => {} + } + } + Ok(()) +} + +// -- VDEX inspect -------------------------------------------------------------- + +/// Open a VDEX file and launch the TUI inspector on one of its embedded DEX +/// files. When `dex_index` is `None` and the VDEX contains more than one DEX, +/// a small interactive picker is shown first. +#[cfg(feature = "vdex")] +pub fn run_vdex_inspect( + path: &std::path::Path, + dex_index: Option, + output: Option, +) -> Result<()> { + let file = + File::open(path).map_err(|e| anyhow::anyhow!("cannot open '{}': {e}", path.display()))?; + let container = VdexFileContainer::new(&file); + let vdex = container.open()?; + + let n = vdex.num_dex_files(); + if n == 0 { + bail!("VDEX file contains no embedded DEX files"); + } + if !vdex.has_dex_section() { + bail!("VDEX file does not contain a DEX file section"); + } + + // Determine which DEX index to open (or pick interactively). + let chosen: u32 = match dex_index { + Some(i) => { + if i >= n { + bail!("DEX index {i} is out of range (0..{n})"); + } + i + } + None if n == 1 => 0, + None => { + // Build a label list of "DEX[i] checksum=0x… size=… bytes" + let labels: Vec = (0..n) + .map(|i| { + let cs = vdex.dex_checksum_at(i).unwrap_or(0); + let sz = vdex.get_dex_file_data(i).map(|d| d.len()).unwrap_or(0); + format!("DEX[{i}] checksum={cs:#010x} size={sz} bytes") + }) + .collect(); + + pick_dex_interactively(&labels)? + } + }; + + // Extract the raw DEX bytes — must be copied because the mmap/container + // lifetime ends when this function returns and App owns its bytes. + let dex_bytes = vdex.get_dex_file_data(chosen)?.to_vec(); + drop(vdex); + drop(container); + drop(file); + + // Re-use the normal DEX inspect flow with the in-memory bytes. + run_with_bytes(dex_bytes, output) +} + +/// Show an interactive VDEX DEX-file picker and return the chosen index. +#[cfg(feature = "vdex")] +fn pick_dex_interactively(labels: &[String]) -> Result { + enable_raw_mode()?; + let mut stdout = std::io::stdout(); + execute!(stdout, EnterAlternateScreen)?; + let backend = CrosstermBackend::new(stdout); + let mut terminal = Terminal::new(backend)?; + + let mut selected: usize = 0; + let mut list_state = ListState::default(); + list_state.select(Some(0)); + + let result = loop { + terminal.draw(|f| draw_vdex_picker(f, labels, &mut list_state))?; + + if event::poll(Duration::from_millis(50))? { + if let event::Event::Key(key) = event::read()? { + match key.code { + KeyCode::Up | KeyCode::Char('k') => { + if selected > 0 { + selected -= 1; + list_state.select(Some(selected)); + } + } + KeyCode::Down | KeyCode::Char('j') => { + if selected + 1 < labels.len() { + selected += 1; + list_state.select(Some(selected)); + } + } + KeyCode::Enter => break Ok(selected as u32), + KeyCode::Esc + | KeyCode::Char('q') + | KeyCode::Char('c') + if key.modifiers.contains(KeyModifiers::CONTROL) => + { + break Err(anyhow::anyhow!("cancelled")) + } + KeyCode::Char('c') if key.modifiers.contains(KeyModifiers::CONTROL) => { + break Err(anyhow::anyhow!("cancelled")) + } + _ => {} + } + } + } + }; + + disable_raw_mode()?; + execute!(terminal.backend_mut(), LeaveAlternateScreen)?; + result +} + +#[cfg(feature = "vdex")] +fn draw_vdex_picker(f: &mut Frame, labels: &[String], state: &mut ListState) { + let area = f.area(); + let [header_area, list_area, footer_area] = Layout::vertical([ + Constraint::Length(3), + Constraint::Min(1), + Constraint::Length(1), + ]) + .areas(area); + + let title = Paragraph::new(Line::from(vec![ + Span::styled("VDEX ", Style::default().fg(Color::Rgb(190, 160, 90))), + Span::styled( + "Select an embedded DEX file to inspect", + Style::default().fg(Color::Rgb(200, 200, 200)), + ), + ])) + .block(Block::default().borders(Borders::ALL)); + f.render_widget(title, header_area); + + let items: Vec = labels + .iter() + .map(|l| ListItem::new(l.as_str())) + .collect(); + let list = List::new(items) + .block(Block::default().borders(Borders::ALL).title("DEX Files")) + .highlight_style( + Style::default() + .bg(Color::Rgb(42, 48, 58)) + .add_modifier(Modifier::BOLD), + ) + .highlight_symbol("▶ "); + f.render_stateful_widget(list, list_area, state); + + let hint = Paragraph::new( + " ↑/↓ navigate Enter select q/Esc cancel", + ) + .style(Style::default().fg(Color::Rgb(100, 110, 120))); + f.render_widget(hint, footer_area); +} + +/// Run the full TUI inspector using a pre-loaded byte buffer instead of a file. +#[cfg(feature = "vdex")] +fn run_with_bytes(dex_bytes: Vec, output: Option) -> Result<()> { + let dex = DexFile::open(&dex_bytes, DexLocation::InMemory, VerifyPreset::None)?; + let total_classes = dex.num_class_defs() as usize; + let file_name = "(embedded DEX)".to_string(); + + enable_raw_mode()?; + let mut stdout = std::io::stdout(); + execute!(stdout, EnterAlternateScreen)?; + let backend = CrosstermBackend::new(stdout); + let mut terminal = Terminal::new(backend)?; + + let progress = Arc::new(AtomicUsize::new(0)); + let progress_clone = Arc::clone(&progress); + let cancel = Arc::new(AtomicBool::new(false)); + let cancel_build = Arc::clone(&cancel); + + let build_result: anyhow::Result, String)>> = + std::thread::scope(|s| { + let handle = s.spawn(|| { + build_app_state(&dex, &cancel_build, |current, _total| { + progress_clone.store(current, Ordering::Relaxed); + }) + }); + + let mut tick: u64 = 0; + loop { + let current = progress.load(Ordering::Relaxed); + terminal.draw(|f| draw_loading(f, &file_name, current, total_classes, tick))?; + + while event::poll(Duration::ZERO)? { + if let event::Event::Key(key) = event::read()? { + match key.code { + KeyCode::Esc | KeyCode::Char('q') => { + cancel.store(true, Ordering::Relaxed); + } + KeyCode::Char('c') + if key.modifiers.contains(KeyModifiers::CONTROL) => + { + cancel.store(true, Ordering::Relaxed); + } + _ => {} + } + } + } + + if handle.is_finished() { + break; + } + std::thread::sleep(Duration::from_millis(40)); + tick = tick.wrapping_add(1); + } + + handle.join().map_err(|_| anyhow::anyhow!("build thread panicked"))? + }); + + drop(dex); + + match build_result { + Err(e) => { + let _ = disable_raw_mode(); + let _ = execute!(terminal.backend_mut(), LeaveAlternateScreen); + Err(e) + } + Ok(None) => { + disable_raw_mode()?; + execute!(terminal.backend_mut(), LeaveAlternateScreen)?; + Ok(()) + } + Ok(Some((classes, file_info))) => { + let app = App::new(classes, file_info, dex_bytes).with_editable(output); + run_tui_with_terminal(app, terminal) + } + } +} diff --git a/src/bin/dexrs/commands/map.rs b/src/bin/dexrs/commands/map.rs new file mode 100644 index 0000000..b7eb40d --- /dev/null +++ b/src/bin/dexrs/commands/map.rs @@ -0,0 +1,46 @@ +use anyhow::Result; +use serde_json::json; + +use crate::{cli::MapArgs, commands::with_dex, output::Printer}; + +pub fn run(args: &MapArgs) -> Result<()> { + let p = Printer::new(args.dex.json, args.dex.no_color); + with_dex(&args.dex, |dex| { + let items = match dex.get_map_list() { + Some(m) => m, + None => { + p.error("map list is not available in this DEX file"); + return Ok(()); + } + }; + + if p.json { + let entries: Vec<_> = items + .iter() + .map(|it| { + json!({ + "type": format!("{:?}", it.type_), + "offset": it.off, + "count": it.size, + }) + }) + .collect(); + println!("{}", json!({ "sections": entries })); + return Ok(()); + } + + let rows: Vec> = items + .iter() + .map(|it| { + vec![ + format!("{:?}", it.type_), + format!("{:#010x}", it.off), + it.size.to_string(), + ] + }) + .collect(); + + p.table(&["Section", "Offset", "Count"], rows); + Ok(()) + }) +} diff --git a/src/bin/dexrs/commands/methods.rs b/src/bin/dexrs/commands/methods.rs new file mode 100644 index 0000000..e953761 --- /dev/null +++ b/src/bin/dexrs/commands/methods.rs @@ -0,0 +1,50 @@ +use anyhow::Result; +use serde_json::json; + +use dexrs::file::dump::prettify; + +use crate::{cli::MethodsArgs, commands::with_dex, output::{to_descriptor, Printer}}; + +pub fn run(args: &MethodsArgs) -> Result<()> { + let p = Printer::new(args.dex.json, args.dex.no_color); + let class_filter = args.class.as_deref().map(to_descriptor); + + with_dex(&args.dex, |dex| { + let mut rows: Vec> = Vec::new(); + + for idx in 0..dex.num_class_defs() { + let cd = dex.get_class_def(idx)?; + + if let Some(ref filter_desc) = class_filter { + let desc = dex.get_class_desc_utf16_lossy(cd)?; + if &desc != filter_desc { + continue; + } + } + + let accessor = dex.get_class_accessor(cd)?; + if let Some(acc) = accessor { + for m in acc.get_methods()? { + let name = dex.pretty_method_at(m.index, prettify::Method::WithSig); + let kind = if m.is_static_or_direct { "direct" } else { "virtual" }; + let has_code = if m.code_offset > 0 { "yes" } else { "no" }; + rows.push(vec![name, kind.to_string(), has_code.to_string()]); + } + } + } + + if p.json { + let entries: Vec<_> = rows + .iter() + .map(|r| json!({ "method": r[0], "kind": r[1], "has_code": r[2] == "yes" })) + .collect(); + println!("{}", json!({ "methods": entries, "total": rows.len() })); + return Ok(()); + } + + let total = rows.len(); + p.table(&["Method", "Kind", "Has Code"], rows); + println!("\n {total} method(s)"); + Ok(()) + }) +} diff --git a/src/bin/dexrs/commands/mod.rs b/src/bin/dexrs/commands/mod.rs new file mode 100644 index 0000000..9dafe50 --- /dev/null +++ b/src/bin/dexrs/commands/mod.rs @@ -0,0 +1,42 @@ +pub mod class; +pub mod classes; +pub mod disasm; +pub mod edit; +pub mod fields; +pub mod info; +#[cfg(feature = "tui")] +pub mod inspect; +pub mod map; +pub mod methods; +pub mod patch; +pub mod strings; +pub mod types; +#[cfg(feature = "vdex")] +pub mod vdex; + +use std::fs::File; + +use dexrs::file::{DexFileContainer, MmapDexFile}; + +use crate::cli::DexArgs; + +/// Open a DEX file from CLI args, respecting the `--no-verify` flag. +#[allow(dead_code)] +pub fn open_dex(args: &DexArgs) -> anyhow::Result<(File, DexFileContainer)> { + let file = File::open(&args.file) + .map_err(|e| anyhow::anyhow!("cannot open '{}': {e}", args.file.display()))?; + let container = DexFileContainer::new(&file).verify(!args.no_verify); + Ok((file, container)) +} + +/// Helper that opens a dex file and calls a closure with the parsed file. +pub fn with_dex(args: &DexArgs, f: F) -> anyhow::Result +where + F: for<'a> FnOnce(&MmapDexFile<'a>) -> anyhow::Result, +{ + let file = File::open(&args.file) + .map_err(|e| anyhow::anyhow!("cannot open '{}': {e}", args.file.display()))?; + let container = DexFileContainer::new(&file).verify(!args.no_verify); + let dex = container.open()?; + f(&dex) +} diff --git a/src/bin/dexrs/commands/patch.rs b/src/bin/dexrs/commands/patch.rs new file mode 100644 index 0000000..ac1f547 --- /dev/null +++ b/src/bin/dexrs/commands/patch.rs @@ -0,0 +1,46 @@ +use anyhow::{Context, Result}; + +use crate::cli::{PatchFlagsArgs, PatchInsnArgs}; + +fn parse_int(s: &str) -> Result { + if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")) { + u32::from_str_radix(hex, 16).context("invalid hex value") + } else { + s.parse::().context("invalid decimal value") + } +} + +/// `patch flags` — in-place: set class access flags, then update checksum. +/// +/// Uses `DexEditor` for class lookup; writes the result back to the same file. +pub fn run_flags(args: &PatchFlagsArgs) -> Result<()> { + let flags = parse_int(&args.flags).context("--flags")?; + + let mut editor = dexrs::file::DexEditor::from_file(&args.file) + .with_context(|| format!("cannot open '{}'", args.file.display()))?; + editor + .set_class_access_flags(&args.class, flags) + .context("patch flags")?; + editor + .write_to(&args.file) + .with_context(|| format!("cannot write '{}'", args.file.display()))?; + + eprintln!("patched (in-place): {}", args.file.display()); + Ok(()) +} + +/// `patch insn` — in-place: overwrite one instruction word, then update checksum. +pub fn run_insn(args: &PatchInsnArgs) -> Result<()> { + let code_off = parse_int(&args.code_offset).context("--code-offset")?; + let word = parse_int(&args.word).context("--word")? as u16; + + let mut data = std::fs::read(&args.file) + .with_context(|| format!("cannot read '{}'", args.file.display()))?; + dexrs::file::patch_instruction_word(&mut data, code_off, args.pc, word)?; + dexrs::file::update_checksum(&mut data); + std::fs::write(&args.file, &data) + .with_context(|| format!("cannot write '{}'", args.file.display()))?; + + eprintln!("patched (in-place): {}", args.file.display()); + Ok(()) +} diff --git a/src/bin/dexrs/commands/strings.rs b/src/bin/dexrs/commands/strings.rs new file mode 100644 index 0000000..ead7185 --- /dev/null +++ b/src/bin/dexrs/commands/strings.rs @@ -0,0 +1,43 @@ +use anyhow::Result; +use serde_json::json; + +use crate::{cli::StringsArgs, commands::with_dex, output::Printer}; + +pub fn run(args: &StringsArgs) -> Result<()> { + let p = Printer::new(args.dex.json, args.dex.no_color); + let filter = args.filter.as_deref().map(str::to_lowercase); + + with_dex(&args.dex, |dex| { + let count = dex.num_string_ids(); + let mut results: Vec<(u32, String)> = Vec::new(); + + for idx in 0..count { + let s = dex.get_str_lossy_at(idx)?; + if let Some(ref f) = filter { + if !s.to_lowercase().contains(f.as_str()) { + continue; + } + } + results.push((idx, s)); + } + + if p.json { + let entries: Vec<_> = results + .iter() + .map(|(i, s)| json!({ "index": i, "value": s })) + .collect(); + println!("{}", json!({ "strings": entries, "total": results.len() })); + return Ok(()); + } + + let rows: Vec> = results + .iter() + .map(|(i, s)| vec![i.to_string(), s.clone()]) + .collect(); + + let total = rows.len(); + p.table(&["Index", "String"], rows); + println!("\n {total} string(s)"); + Ok(()) + }) +} diff --git a/src/bin/dexrs/commands/types.rs b/src/bin/dexrs/commands/types.rs new file mode 100644 index 0000000..bb17c2f --- /dev/null +++ b/src/bin/dexrs/commands/types.rs @@ -0,0 +1,32 @@ +use anyhow::Result; +use serde_json::json; + +use crate::{cli::TypesArgs, commands::with_dex, output::{pretty_type, Printer}}; + +pub fn run(args: &TypesArgs) -> Result<()> { + let p = Printer::new(args.dex.json, args.dex.no_color); + + with_dex(&args.dex, |dex| { + let count = dex.num_type_ids(); + let mut rows: Vec> = Vec::new(); + + for idx in 0..count { + let desc = dex.get_type_desc_utf16_lossy_at(idx as u16)?; + rows.push(vec![idx.to_string(), desc.clone(), pretty_type(&desc)]); + } + + if p.json { + let entries: Vec<_> = rows + .iter() + .map(|r| json!({ "index": r[0], "descriptor": r[1], "pretty": r[2] })) + .collect(); + println!("{}", json!({ "types": entries, "total": rows.len() })); + return Ok(()); + } + + let total = rows.len(); + p.table(&["Index", "Descriptor", "Pretty"], rows); + println!("\n {total} type(s)"); + Ok(()) + }) +} diff --git a/src/bin/dexrs/commands/vdex.rs b/src/bin/dexrs/commands/vdex.rs new file mode 100644 index 0000000..7e27bd5 --- /dev/null +++ b/src/bin/dexrs/commands/vdex.rs @@ -0,0 +1,199 @@ +//! CLI handlers for VDEX file operations. +//! +//! Subcommands: +//! - `vdex info` — show header, sections and checksums +//! - `vdex list` — tabular list of all embedded DEX files +//! - `vdex extract` — write a single embedded DEX to disk +//! - `vdex inspect` — launch the TUI inspector on an embedded DEX + +use std::fs::File; + +use anyhow::{bail, Context, Result}; +use serde_json::json; + +use dexrs::vdex::{VdexFileContainer, VdexSection}; + +use crate::{ + cli::{VdexExtractArgs, VdexInfoArgs, VdexListArgs}, + output::Printer, +}; +#[cfg(feature = "tui")] +use crate::cli::VdexInspectArgs; +#[cfg(feature = "tui")] +use crate::commands::inspect::run_vdex_inspect; + +// -- info ---------------------------------------------------------------------- + +pub fn run_info(args: &VdexInfoArgs) -> Result<()> { + let file = File::open(&args.file) + .with_context(|| format!("cannot open '{}'", args.file.display()))?; + let container = VdexFileContainer::new(&file); + let vdex = container.open()?; + + let p = Printer::new(args.json, args.no_color); + let h = vdex.file_header(); + let version = String::from_utf8_lossy(&h.vdex_version) + .trim_end_matches('\0') + .to_string(); + + let checksums: Vec = vdex + .dex_checksums() + .iter() + .map(|c| format!("{c:#010x}")) + .collect(); + + if p.json { + let sections: Vec<_> = (0..vdex.num_sections()) + .filter_map(|i| { + let kind = match i { + 0 => VdexSection::Checksum, + 1 => VdexSection::DexFile, + 2 => VdexSection::VerifierDeps, + 3 => VdexSection::TypeLookupTable, + _ => return None, + }; + let sh = vdex.get_section_header(kind)?; + Some(json!({ + "kind": i, + "offset": sh.section_offset, + "size": sh.section_size, + })) + }) + .collect(); + + println!( + "{}", + json!({ + "magic": String::from_utf8_lossy(&h.magic), + "version": version, + "num_sections": h.number_of_sections, + "num_dex_files": vdex.num_dex_files(), + "has_dex_section": vdex.has_dex_section(), + "checksums": checksums, + "sections": sections, + }) + ); + return Ok(()); + } + + p.section("VDEX File"); + p.kv("Magic:", &String::from_utf8_lossy(&h.magic)); + p.kv("Version:", &version); + p.kv("Sections:", &h.number_of_sections.to_string()); + p.kv("Embedded DEX files:", &vdex.num_dex_files().to_string()); + p.kv("Has DEX section:", &vdex.has_dex_section().to_string()); + + p.section("Sections"); + for i in 0..vdex.num_sections() { + let kind = match i { + 0 => VdexSection::Checksum, + 1 => VdexSection::DexFile, + 2 => VdexSection::VerifierDeps, + 3 => VdexSection::TypeLookupTable, + _ => break, + }; + if let Some(sh) = vdex.get_section_header(kind) { + let name = format!("[{i}] {kind:?}"); + let val = if sh.section_size == 0 { + "(absent)".to_string() + } else { + format!("{} bytes @ {:#x}", sh.section_size, sh.section_offset) + }; + p.kv(&name, &val); + } + } + + if !checksums.is_empty() { + p.section("DEX Checksums"); + for (i, cs) in checksums.iter().enumerate() { + p.kv(&format!("[{i}]:"), cs); + } + } + + Ok(()) +} + +// -- list ---------------------------------------------------------------------- + +pub fn run_list(args: &VdexListArgs) -> Result<()> { + let file = File::open(&args.file) + .with_context(|| format!("cannot open '{}'", args.file.display()))?; + let container = VdexFileContainer::new(&file); + let vdex = container.open()?; + + let p = Printer::new(args.json, args.no_color); + let n = vdex.num_dex_files(); + + if n == 0 { + if p.json { + println!("[]"); + } else { + eprintln!("No embedded DEX files."); + } + return Ok(()); + } + + if p.json { + let entries: Vec<_> = (0..n) + .map(|i| { + let checksum = vdex.dex_checksum_at(i).unwrap_or(0); + let size = vdex.get_dex_file_data(i).map(|d| d.len()).unwrap_or(0); + json!({ "index": i, "checksum": format!("{checksum:#010x}"), "size": size }) + }) + .collect(); + println!("{}", serde_json::to_string_pretty(&entries).unwrap()); + return Ok(()); + } + + p.section("Embedded DEX Files"); + for i in 0..n { + let checksum = vdex.dex_checksum_at(i).unwrap_or(0); + let size = match vdex.get_dex_file_data(i) { + Ok(d) => format!("{} bytes", d.len()), + Err(_) => "(unavailable)".to_string(), + }; + p.kv( + &format!("[{i}]:"), + &format!("checksum={:#010x} size={}", checksum, size), + ); + } + + Ok(()) +} + +// -- extract ------------------------------------------------------------------- + +pub fn run_extract(args: &VdexExtractArgs) -> Result<()> { + let file = File::open(&args.file) + .with_context(|| format!("cannot open '{}'", args.file.display()))?; + let container = VdexFileContainer::new(&file); + let vdex = container.open()?; + + let n = vdex.num_dex_files(); + if !vdex.has_dex_section() { + bail!("VDEX does not contain an embedded DEX file section"); + } + if args.index >= n { + bail!("DEX index {} is out of range (0..{})", args.index, n); + } + + let dex_bytes = vdex.get_dex_file_data(args.index)?; + std::fs::write(&args.output, dex_bytes).with_context(|| { + format!("cannot write '{}'", args.output.display()) + })?; + + eprintln!( + "Extracted DEX[{}] ({} bytes) -> {}", + args.index, + dex_bytes.len(), + args.output.display() + ); + Ok(()) +} + +// -- inspect ------------------------------------------------------------------- + +#[cfg(feature = "tui")] +pub fn run_inspect(args: &VdexInspectArgs) -> Result<()> { + run_vdex_inspect(&args.file, args.index, args.output.clone()) +} diff --git a/src/bin/dexrs/highlight.rs b/src/bin/dexrs/highlight.rs new file mode 100644 index 0000000..9866833 --- /dev/null +++ b/src/bin/dexrs/highlight.rs @@ -0,0 +1,89 @@ +//! Terminal color adapters for disassembly [`Highlight`] spans. +//! +//! This module is intentionally thin: all semantic knowledge about which parts +//! of an instruction carry which meaning lives in the library's +//! [`dexrs::file::dump`] module. Here we only translate [`Highlight`] tags to +//! the two presentation targets we support — ratatui (TUI) and crossterm (CLI). + +use crossterm::style::Stylize; +#[cfg(feature = "tui")] +use dexrs::file::dump::StyledLine; +use dexrs::file::dump::{Highlight, Span}; +#[cfg(feature = "tui")] +use ratatui::{ + style::{Color, Style}, + text::{Line, Span as TuiSpan}, +}; + +// Muted, low-contrast palette — readable on both dark and light backgrounds. +// Opcode dusty gold #C8A96A +// Register soft sage #7BAF7B +// Immediate muted mauve #A07BA0 +// Offset slate blue-gray #7A9BAF +// StringLit warm sand #C8A07A +// Ref soft periwinkle #7A9BC8 +// Comment dim gray #666666 +#[cfg(feature = "tui")] +const C_OPCODE: Color = Color::Rgb(200, 169, 106); +#[cfg(feature = "tui")] +const C_REG: Color = Color::Rgb(123, 175, 123); +#[cfg(feature = "tui")] +const C_IMM: Color = Color::Rgb(160, 123, 160); +#[cfg(feature = "tui")] +const C_OFFSET: Color = Color::Rgb(122, 155, 175); +#[cfg(feature = "tui")] +const C_STR: Color = Color::Rgb(200, 160, 122); +#[cfg(feature = "tui")] +const C_REF: Color = Color::Rgb(122, 155, 200); +#[cfg(feature = "tui")] +const C_COMMENT: Color = Color::Rgb(102, 102, 102); + +#[cfg(feature = "tui")] +fn hl_style(hl: Highlight) -> Style { + match hl { + Highlight::Opcode => Style::default().fg(C_OPCODE), + Highlight::Register => Style::default().fg(C_REG), + Highlight::Immediate => Style::default().fg(C_IMM), + Highlight::Offset => Style::default().fg(C_OFFSET), + Highlight::StringLiteral => Style::default().fg(C_STR), + Highlight::Ref => Style::default().fg(C_REF), + Highlight::Comment => Style::default().fg(C_COMMENT), + Highlight::Plain => Style::default(), + } +} + +/// Convert a [`StyledLine`] to a ratatui [`Line`] with per-span styling. +#[cfg(feature = "tui")] +pub fn to_tui_line(styled: &StyledLine) -> Line<'static> { + Line::from( + styled + .iter() + .map(|s| TuiSpan::styled(s.text.clone(), hl_style(s.hl))) + .collect::>(), + ) +} + +/// Render a [`StyledLine`] to an ANSI-colored string (CLI). +/// +/// When `color` is `false` the spans are concatenated without escape codes. +pub fn to_cli_string(styled: &[Span], color: bool) -> String { + if !color { + return styled.iter().map(|s| s.text.as_str()).collect(); + } + let mut out = String::new(); + for s in styled { + // CLI uses the 8-color ANSI subset (wider terminal compat). + let colored = match s.hl { + Highlight::Opcode => format!("{}", s.text.as_str().yellow()), + Highlight::Register => format!("{}", s.text.as_str().green()), + Highlight::Immediate => format!("{}", s.text.as_str().magenta()), + Highlight::Offset => format!("{}", s.text.as_str().blue()), + Highlight::StringLiteral => format!("{}", s.text.as_str().dark_yellow()), + Highlight::Ref => format!("{}", s.text.as_str().cyan()), + Highlight::Comment => format!("{}", s.text.as_str().dark_grey()), + Highlight::Plain => s.text.clone(), + }; + out.push_str(&colored); + } + out +} diff --git a/src/bin/dexrs/main.rs b/src/bin/dexrs/main.rs new file mode 100644 index 0000000..68663bd --- /dev/null +++ b/src/bin/dexrs/main.rs @@ -0,0 +1,47 @@ +mod cli; +mod commands; +mod highlight; +mod output; +#[cfg(feature = "tui")] +mod tui; + +use anyhow::Result; +use clap::Parser; +use cli::{Cli, Command}; + +fn main() -> Result<()> { + let cli = Cli::parse(); + + match &cli.command { + Command::Info(args) => commands::info::run(args), + Command::Map(args) => commands::map::run(args), + Command::Classes(args) => commands::classes::run(args), + Command::Class(args) => commands::class::run(args), + Command::Methods(args) => commands::methods::run(args), + Command::Fields(args) => commands::fields::run(args), + Command::Disasm(args) => commands::disasm::run(args), + Command::Strings(args) => commands::strings::run(args), + Command::Types(args) => commands::types::run(args), + Command::Patch(args) => match &args.command { + cli::PatchCommand::Flags(a) => commands::patch::run_flags(a), + cli::PatchCommand::Insn(a) => commands::patch::run_insn(a), + }, + Command::Edit(args) => match &args.command { + cli::EditCommand::RenameClass(a) => commands::edit::run_rename_class(a), + cli::EditCommand::SetFlags(a) => commands::edit::run_set_flags(a), + cli::EditCommand::SetMethodFlags(a) => commands::edit::run_set_method_flags(a), + cli::EditCommand::ClearHiddenapi(a) => commands::edit::run_clear_hiddenapi(a), + cli::EditCommand::BuildDex(a) => commands::edit::run_build_dex(a), + }, + #[cfg(feature = "tui")] + Command::Inspect(args) => commands::inspect::run(args), + #[cfg(feature = "vdex")] + Command::Vdex(args) => match &args.command { + cli::VdexCommand::Info(a) => commands::vdex::run_info(a), + cli::VdexCommand::List(a) => commands::vdex::run_list(a), + cli::VdexCommand::Extract(a) => commands::vdex::run_extract(a), + #[cfg(feature = "tui")] + cli::VdexCommand::Inspect(a) => commands::vdex::run_inspect(a), + }, + } +} diff --git a/src/bin/dexrs/output.rs b/src/bin/dexrs/output.rs new file mode 100644 index 0000000..501a1eb --- /dev/null +++ b/src/bin/dexrs/output.rs @@ -0,0 +1,109 @@ +use comfy_table::{presets::UTF8_BORDERS_ONLY, Cell, Color, Table}; +use crossterm::style::Stylize; + +pub struct Printer { + pub json: bool, + pub color: bool, +} + +impl Printer { + pub fn new(json: bool, no_color: bool) -> Self { + Self { json, color: !no_color } + } + + pub fn table(&self, headers: &[&str], rows: Vec>) { + let mut table = Table::new(); + table.load_preset(UTF8_BORDERS_ONLY); + + let header_cells: Vec = headers + .iter() + .map(|h| { + if self.color { + Cell::new(h).fg(Color::Cyan) + } else { + Cell::new(h) + } + }) + .collect(); + table.set_header(header_cells); + + for row in rows { + table.add_row(row); + } + + println!("{table}"); + } + + pub fn section(&self, title: &str) { + if self.color { + println!("\n{}", title.bold().yellow()); + } else { + println!("\n{title}"); + } + } + + pub fn kv(&self, key: &str, value: &str) { + if self.color { + println!(" {:<20} {value}", key.cyan()); + } else { + println!(" {key:<20} {value}"); + } + } + + pub fn item(&self, value: &str) { + println!(" {value}"); + } + + #[allow(dead_code)] + pub fn info(&self, msg: &str) { + if self.color { + eprintln!("{}", msg.dim()); + } else { + eprintln!("{msg}"); + } + } + + pub fn error(&self, msg: &str) { + if self.color { + eprintln!("{} {msg}", "error:".red().bold()); + } else { + eprintln!("error: {msg}"); + } + } +} + +/// Format a DEX type descriptor into a human-readable Java-style name. +pub fn pretty_type(desc: &str) -> String { + dexrs::desc_names::pretty_desc(desc) +} + +/// Format access flags bitmask into a string like "public static final". +pub fn format_flags(flags: u32) -> String { + use dexrs::file::*; + let mut parts = Vec::new(); + if flags & ACC_PUBLIC != 0 { parts.push("public"); } + if flags & ACC_PRIVATE != 0 { parts.push("private"); } + if flags & ACC_PROTECTED != 0 { parts.push("protected"); } + if flags & ACC_STATIC != 0 { parts.push("static"); } + if flags & ACC_FINAL != 0 { parts.push("final"); } + if flags & ACC_SYNCHRONIZED != 0 { parts.push("synchronized"); } + if flags & ACC_NATIVE != 0 { parts.push("native"); } + if flags & ACC_ABSTRACT != 0 { parts.push("abstract"); } + if flags & ACC_STRICT != 0 { parts.push("strictfp"); } + if flags & ACC_INTERFACE != 0 { parts.push("interface"); } + if flags & ACC_ENUM != 0 { parts.push("enum"); } + if flags & ACC_ANNOTATION != 0 { parts.push("@interface"); } + if flags & ACC_SYNTHETIC != 0 { parts.push("synthetic"); } + if flags & ACC_CONSTRUCTOR != 0 { parts.push("constructor"); } + parts.join(" ") +} + +/// Normalise a user-supplied class name to a DEX descriptor. +/// Accepts "com.example.Foo", "Lcom/example/Foo;", or "com/example/Foo". +pub fn to_descriptor(name: &str) -> String { + if name.starts_with('L') && name.ends_with(';') { + return name.to_owned(); + } + let inner = name.replace('.', "/"); + format!("L{inner};") +} diff --git a/src/bin/dexrs/tui/app.rs b/src/bin/dexrs/tui/app.rs new file mode 100644 index 0000000..1a9ab76 --- /dev/null +++ b/src/bin/dexrs/tui/app.rs @@ -0,0 +1,977 @@ +use std::collections::HashMap; +use std::path::PathBuf; + +use dexrs::file::dump::StyledLine; + +// -- Data types carried from the parsed DEX ----------------------------------- + +/// A fully-owned snapshot of a DEX class, pre-computed at startup. +#[derive(Clone)] +pub struct ClassEntry { + pub descriptor: String, + pub pretty_name: String, + pub package: String, + pub simple_name: String, + pub access_flags: u32, + /// Pretty-printed superclass name for display. + pub superclass: String, + /// Raw DEX superclass descriptor (e.g. `"Ljava/lang/Object;"`). + pub superclass_desc: String, + pub methods: Vec, + pub fields: Vec, +} + +#[derive(Clone)] +pub struct MemberEntry { + /// Human-readable display name (with type/sig prefix). + pub name: String, + /// Raw method/field name without class prefix or type (e.g. `""`, `"counter"`). + pub raw_name: String, + /// For methods: full proto descriptor `"([Ljava/lang/String;)V"`. + /// For fields: field type descriptor `"I"`, `"Ljava/lang/Object;"`. + pub proto_or_type_desc: String, + pub kind: MemberKind, + pub access_flags: u32, + /// File offset of the `code_item` in the DEX file. 0 = abstract/native (no code). + /// Disassembly is computed on-demand from this offset and cached in `App::disasm_cache`. + pub code_offset: u32, + /// Code metadata summary (registers, ins, outs, tries). + /// Fast to read — just 4 u16 fields from the code_item header. + pub code_info: Option, +} + +impl MemberEntry { + /// Extract registers/ins/outs from the code_info string. + /// Format: "registers: N ins: N outs: N tries: N" + pub fn parse_registers(&self) -> (u16, u16, u16) { + let Some(ref s) = self.code_info else { return (0, 0, 0) }; + fn extract(s: &str, key: &str) -> u16 { + s.split(key) + .nth(1) + .and_then(|rest| rest.split_whitespace().next()) + .and_then(|n| n.parse().ok()) + .unwrap_or(0) + } + (extract(s, "registers: "), extract(s, "ins: "), extract(s, "outs: ")) + } +} + +/// Cached disassembly for one method — computed lazily on first view. +pub struct DisasmEntry { + /// Styled spans for the code viewer (PC -> highlighted tokens). + pub styled: Vec<(u32, StyledLine)>, + /// Assembler-compatible lines for the inline editor / rebuild pipeline. + pub raw: Vec, +} + +/// Compute disassembly for a single `code_item` from raw DEX bytes. +/// Opens the DEX with `VerifyPreset::None` (pure pointer arithmetic, < 1 ms). +pub fn compute_disasm_from_bytes(bytes: &[u8], code_off: u32) -> DisasmEntry { + if code_off == 0 || bytes.is_empty() { + return DisasmEntry { styled: vec![], raw: vec![] }; + } + use dexrs::file::{DexFile, DexLocation, verifier::VerifyPreset}; + let dex = match DexFile::open(&bytes, DexLocation::InMemory, VerifyPreset::None) { + Ok(d) => d, + Err(_) => return DisasmEntry { styled: vec![], raw: vec![] }, + }; + let ca = match dex.get_code_item_accessor(code_off) { + Ok(c) => c, + Err(_) => return DisasmEntry { styled: vec![], raw: vec![] }, + }; + let mut styled = Vec::new(); + let mut raw = Vec::new(); + let mut pc: u32 = 0; + for insn in ca { + let s = insn.to_styled(Some(&dex)).unwrap_or_else(|_| vec![dexrs::file::dump::Span { + text: "".to_string(), + hl: dexrs::file::dump::Highlight::Plain, + }]); + let r = insn.to_assembler_text(&dex).unwrap_or_else(|_| "".to_string()); + styled.push((pc, s)); + raw.push(r); + pc += insn.size_in_code_units() as u32; + } + DisasmEntry { styled, raw } +} + +#[derive(Clone, PartialEq)] +pub enum MemberKind { + DirectMethod, + VirtualMethod, + StaticField, + InstanceField, +} + +impl MemberKind { + pub fn label(&self) -> &'static str { + match self { + Self::DirectMethod | Self::VirtualMethod => "method", + Self::StaticField => "static field", + Self::InstanceField => "field", + } + } + pub fn is_method(&self) -> bool { + matches!(self, Self::DirectMethod | Self::VirtualMethod) + } +} + +// -- Tree --------------------------------------------------------------------- + +#[derive(Clone, Debug)] +pub enum TreeItem { + Package { + name: String, // "" = (no package) + expanded: bool, + }, + Class { + class_idx: usize, + expanded: bool, + }, + Member { + class_idx: usize, + member_idx: usize, // index into class.methods + class.fields + }, +} + +impl TreeItem { + #[allow(dead_code)] + pub fn indent(&self) -> u16 { + match self { + Self::Package { .. } => 0, + Self::Class { .. } => 1, + Self::Member { .. } => 2, + } + } + #[allow(dead_code)] + pub fn is_package(&self) -> bool { matches!(self, Self::Package { .. }) } + #[allow(dead_code)] + pub fn is_class(&self) -> bool { matches!(self, Self::Class { .. }) } + #[allow(dead_code)] + pub fn is_member(&self) -> bool { matches!(self, Self::Member { .. }) } +} + +// -- App mode ----------------------------------------------------------------- + +#[derive(Clone, PartialEq)] +pub enum AppMode { + /// Normal browsing + Browse, + /// Search bar open (filters tree) + Search, + /// In-TUI code editor for a method's instructions + CodeEdit, + /// Editing a single instruction line within CodeEdit + LineEdit, + /// Rename class modal + RenameModal, + /// Set access flags modal (class or member) + FlagsModal, +} + +/// Which panel has keyboard focus in Browse mode. +#[derive(Clone, PartialEq)] +pub enum Focus { + Tree, + Code, +} + +// -- Inline code edit state --------------------------------------------------- + +#[derive(Clone, Default)] +pub struct CodeEditState { + /// Plain instruction lines (no PC prefix, no register header). + pub lines: Vec, + /// Index of the highlighted line. + pub cursor: usize, + /// Scroll offset. + pub scroll: u16, + /// Per-line compile errors (after a failed save attempt). + pub errors: HashMap, + /// The class descriptor owning this method. + pub class_desc: String, + /// Full method name as stored in MemberEntry.name. + pub method_name: String, + /// (registers, ins, outs) from original code_info. + pub registers: (u16, u16, u16), + /// Dirty flag — true after any modification. + pub dirty: bool, + /// Buffer for the currently-edited line (LineEdit sub-mode). + pub line_buf: String, + /// Whether `dd` prefix was typed (delete-line detection). + pub pending_d: bool, +} + +impl CodeEditState { + /// Number of visible lines. + #[allow(dead_code)] + pub fn len(&self) -> usize { self.lines.len() } + + /// Move cursor up. + pub fn move_up(&mut self) { + if self.cursor > 0 { self.cursor -= 1; } + self.clamp_scroll(); + self.pending_d = false; + } + + /// Move cursor down. + pub fn move_down(&mut self) { + if !self.lines.is_empty() && self.cursor + 1 < self.lines.len() { + self.cursor += 1; + } + self.clamp_scroll(); + self.pending_d = false; + } + + /// Insert a new (empty) line after the cursor. + pub fn append_line(&mut self) { + let pos = if self.lines.is_empty() { 0 } else { self.cursor + 1 }; + self.lines.insert(pos, String::new()); + self.cursor = pos; + self.dirty = true; + self.errors.clear(); + } + + /// Insert a new (empty) line before the cursor. + pub fn insert_line(&mut self) { + self.lines.insert(self.cursor, String::new()); + self.dirty = true; + self.errors.clear(); + } + + /// Delete the current line. + pub fn delete_line(&mut self) { + if !self.lines.is_empty() { + self.lines.remove(self.cursor); + if self.cursor > 0 && self.cursor >= self.lines.len() { + self.cursor = self.lines.len().saturating_sub(1); + } + self.dirty = true; + self.errors.clear(); + } + } + + /// Begin editing the current line: copy its text into line_buf. + pub fn begin_line_edit(&mut self) { + let text = self.lines.get(self.cursor).cloned().unwrap_or_default(); + self.line_buf = text; + } + + /// Commit the edited line back. + pub fn commit_line_edit(&mut self) { + if let Some(line) = self.lines.get_mut(self.cursor) { + *line = self.line_buf.clone(); + } + self.line_buf.clear(); + self.dirty = true; + self.errors.clear(); + } + + /// Abandon the current line edit. + pub fn abort_line_edit(&mut self) { + self.line_buf.clear(); + } + + /// Keep scroll so cursor is always visible. + fn clamp_scroll(&mut self) { + // Will be properly clamped in ui.rs after we know visible height. + // Here we just ensure basic invariants. + if (self.cursor as u16) < self.scroll { + self.scroll = self.cursor as u16; + } + } + + /// Try to compile the current lines into a CodeDef. + /// Returns the compiled code or an error string. + pub fn compile(&self) -> Result { + use dexrs::file::builder::CodeBuilder; + let (regs, ins, outs) = self.registers; + let mut cb = CodeBuilder::new(regs, ins, outs); + for (i, line) in self.lines.iter().enumerate() { + let trimmed = line.trim(); + if trimmed.is_empty() { continue; } + if let Some(lbl) = trimmed.strip_prefix(':') { + cb.label(lbl); + } else if let Err(e) = cb.emit(trimmed) { + return Err(format!("line {}: {e}", i + 1)); + } + } + cb.build().map_err(|e| e.to_string()) + } +} + +// -- Modal state --------------------------------------------------------------- + +#[derive(Clone, Default)] +pub struct ModalState { + pub buffer: String, + pub error: Option, +} + +// -- Main App ----------------------------------------------------------------- + +pub struct App { + // -- Source data ---------------------------------------------------------- + pub classes: Vec, + /// Raw DEX bytes — always populated; used for lazy disassembly and edit mode. + pub raw_bytes: Vec, + /// Path to write the modified DEX file. `Some` enables edit mode. + pub output_path: Option, + pub file_info: String, + /// On-demand disassembly cache: `(class_idx, member_idx)` -> styled + raw lines. + pub disasm_cache: HashMap<(usize, usize), DisasmEntry>, + + // -- Tree state ----------------------------------------------------------- + /// Flat display list, rebuilt on expand/collapse or search. + pub tree: Vec, + /// Cursor index in `tree`. + pub tree_cursor: usize, + /// Scroll offset for the tree pane. + pub tree_scroll: usize, + /// Per-package expansion (key = package name, "" = no-package group). + pub pkg_expanded: HashMap, + /// Per-class expansion state (key = class_idx). + pub class_expanded: HashMap, + + // -- Code pane ------------------------------------------------------------ + pub code_scroll: u16, + pub code_total_lines: usize, + pub code_visible_height: u16, + + // -- Focus & mode --------------------------------------------------------- + pub focus: Focus, + pub mode: AppMode, + + // -- Inline code edit ----------------------------------------------------- + pub code_edit: CodeEditState, + + // -- Modal (rename / flags) ----------------------------------------------- + pub modal: ModalState, + + // -- Search --------------------------------------------------------------- + pub search: String, + + // -- Overlays ------------------------------------------------------------- + pub show_help: bool, + pub show_info: bool, +} + +impl App { + pub fn new(classes: Vec, file_info: String, raw_bytes: Vec) -> Self { + let n = classes.len(); + let mut app = App { + classes, + raw_bytes, + output_path: None, + file_info, + disasm_cache: HashMap::new(), + tree: Vec::new(), + tree_cursor: 0, + tree_scroll: 0, + pkg_expanded: HashMap::new(), + class_expanded: HashMap::new(), + code_scroll: 0, + code_total_lines: 0, + code_visible_height: 0, + focus: Focus::Tree, + mode: AppMode::Browse, + code_edit: CodeEditState::default(), + modal: ModalState::default(), + search: String::new(), + show_help: false, + show_info: false, + }; + // All packages start expanded; no classes expanded by default. + for (i, c) in app.classes.iter().enumerate() { + app.pkg_expanded.entry(c.package.clone()).or_insert(true); + app.class_expanded.insert(i, false); + } + // Expand first class so user sees something interesting immediately. + if n > 0 { + app.class_expanded.insert(0, true); + } + app.rebuild_tree(); + app + } + + pub fn with_editable(mut self, output: Option) -> Self { + self.output_path = output; + self + } + + /// Returns true when edit mode is active (output path configured). + pub fn is_editable(&self) -> bool { + self.output_path.is_some() + } + + /// Return the cached disassembly for `(class_idx, member_idx)`, computing it on first access. + pub fn get_or_compute_disasm(&mut self, ci: usize, mi: usize) -> &DisasmEntry { + let key = (ci, mi); + if !self.disasm_cache.contains_key(&key) { + let code_offset = { + let cls = &self.classes[ci]; + if mi < cls.methods.len() { + cls.methods[mi].code_offset + } else { + cls.fields[mi - cls.methods.len()].code_offset + } + }; + let entry = compute_disasm_from_bytes(&self.raw_bytes, code_offset); + self.disasm_cache.insert(key, entry); + } + self.disasm_cache.get(&key).unwrap() + } + + // -- Tree helpers --------------------------------------------------------- + + /// Rebuild the flat `tree` Vec from the current expansion state and search filter. + pub fn rebuild_tree(&mut self) { + self.tree.clear(); + let q = self.search.to_lowercase(); + + // Group classes by package. + let mut packages: Vec = self.pkg_expanded.keys().cloned().collect(); + packages.sort(); + // Put no-package group first. + if let Some(pos) = packages.iter().position(|p| p.is_empty()) { + packages.remove(pos); + packages.insert(0, String::new()); + } + + for pkg in &packages { + // Collect matching class indices for this package. + let class_indices: Vec = self + .classes + .iter() + .enumerate() + .filter(|(_, c)| &c.package == pkg) + .filter(|(_, c)| { + q.is_empty() + || c.pretty_name.to_lowercase().contains(&q) + || c.descriptor.to_lowercase().contains(&q) + }) + .map(|(i, _)| i) + .collect(); + + if class_indices.is_empty() { + continue; + } + + let expanded = *self.pkg_expanded.get(pkg).unwrap_or(&true); + + // Only show package header when there are multiple packages or a non-empty name. + if !pkg.is_empty() { + self.tree.push(TreeItem::Package { + name: pkg.clone(), + expanded, + }); + } + + if expanded || pkg.is_empty() { + for class_idx in class_indices { + let cls_expanded = *self.class_expanded.get(&class_idx).unwrap_or(&false); + self.tree.push(TreeItem::Class { class_idx, expanded: cls_expanded }); + if cls_expanded { + let cls = &self.classes[class_idx]; + for mi in 0..(cls.methods.len() + cls.fields.len()) { + self.tree.push(TreeItem::Member { class_idx, member_idx: mi }); + } + } + } + } + } + + // Clamp cursor to new length. + if self.tree_cursor >= self.tree.len() && !self.tree.is_empty() { + self.tree_cursor = self.tree.len() - 1; + } + } + + /// Toggle expansion of the tree item under the cursor. + pub fn toggle_expand(&mut self) { + if let Some(item) = self.tree.get(self.tree_cursor).cloned() { + match item { + TreeItem::Package { name, expanded } => { + self.pkg_expanded.insert(name, !expanded); + self.rebuild_tree(); + } + TreeItem::Class { class_idx, expanded } => { + self.class_expanded.insert(class_idx, !expanded); + self.rebuild_tree(); + } + TreeItem::Member { .. } => { + // Enter member: move focus to code pane. + self.focus = Focus::Code; + self.code_scroll = 0; + } + } + } + } + + /// Collapse the item under the cursor (or go to its parent). + pub fn collapse_or_parent(&mut self) { + if let Some(item) = self.tree.get(self.tree_cursor).cloned() { + match item { + TreeItem::Package { name, expanded: true } => { + self.pkg_expanded.insert(name, false); + self.rebuild_tree(); + } + TreeItem::Class { class_idx, expanded: true } => { + self.class_expanded.insert(class_idx, false); + self.rebuild_tree(); + } + TreeItem::Member { class_idx, .. } | TreeItem::Class { class_idx, .. } => { + // Navigate to the parent class/package. + let target_class = class_idx; + for (i, item) in self.tree.iter().enumerate() { + if let TreeItem::Class { class_idx, .. } = item { + if *class_idx == target_class { + self.tree_cursor = i; + self.clamp_tree_scroll(); + return; + } + } + } + } + _ => {} + } + } + } + + /// Move tree cursor up. + pub fn tree_up(&mut self) { + if self.tree_cursor > 0 { + self.tree_cursor -= 1; + self.clamp_tree_scroll(); + } + } + + /// Move tree cursor down. + pub fn tree_down(&mut self) { + if self.tree_cursor + 1 < self.tree.len() { + self.tree_cursor += 1; + self.clamp_tree_scroll(); + } + } + + fn clamp_tree_scroll(&mut self) { + // Basic clamping; fine-tuned in ui.rs once we know visible_height. + if self.tree_cursor < self.tree_scroll { + self.tree_scroll = self.tree_cursor; + } + } + + // -- Current selection helpers --------------------------------------------- + + pub fn current_item(&self) -> Option<&TreeItem> { + self.tree.get(self.tree_cursor) + } + + /// Returns the ClassEntry for the currently focused tree item. + pub fn current_class(&self) -> Option<&ClassEntry> { + match self.current_item()? { + TreeItem::Class { class_idx, .. } => self.classes.get(*class_idx), + TreeItem::Member { class_idx, .. } => self.classes.get(*class_idx), + TreeItem::Package { .. } => None, + } + } + + /// Returns (class_idx, member_idx) if cursor is on a Member item. + pub fn current_member_indices(&self) -> Option<(usize, usize)> { + match self.current_item()? { + TreeItem::Member { class_idx, member_idx } => Some((*class_idx, *member_idx)), + _ => None, + } + } + + /// Returns the MemberEntry for the currently focused tree item (if a member). + pub fn current_member(&self) -> Option<&MemberEntry> { + let (ci, mi) = self.current_member_indices()?; + let cls = self.classes.get(ci)?; + if mi < cls.methods.len() { + cls.methods.get(mi) + } else { + cls.fields.get(mi - cls.methods.len()) + } + } + + /// Returns the class index for the cursor position. + pub fn current_class_idx(&self) -> Option { + match self.current_item()? { + TreeItem::Class { class_idx, .. } => Some(*class_idx), + TreeItem::Member { class_idx, .. } => Some(*class_idx), + _ => None, + } + } + + // -- Code pane helpers ----------------------------------------------------- + + pub fn scroll_code_up(&mut self) { + self.code_scroll = self.code_scroll.saturating_sub(1); + } + + pub fn scroll_code_down(&mut self) { + let max = (self.code_total_lines as u16).saturating_sub(self.code_visible_height); + if self.code_scroll < max { + self.code_scroll += 1; + } + } + + pub fn page_code_up(&mut self) { + self.code_scroll = self.code_scroll.saturating_sub(self.code_visible_height.saturating_sub(1)); + } + + pub fn page_code_down(&mut self) { + let max = (self.code_total_lines as u16).saturating_sub(self.code_visible_height); + self.code_scroll = (self.code_scroll + self.code_visible_height.saturating_sub(1)).min(max); + } + + // -- Search --------------------------------------------------------------- + + pub fn apply_search(&mut self) { + self.tree_cursor = 0; + self.tree_scroll = 0; + self.rebuild_tree(); + } + + pub fn clear_search(&mut self) { + self.search.clear(); + self.apply_search(); + } + + // -- Inline code edit ----------------------------------------------------- + + /// Enter CodeEdit mode for the currently selected method. + pub fn begin_code_edit(&mut self) -> bool { + let (ci, mi) = match self.current_member_indices() { + Some(x) => x, + None => return false, + }; + { + let cls = &self.classes[ci]; + let member = if mi < cls.methods.len() { + &cls.methods[mi] + } else { + &cls.fields[mi - cls.methods.len()] + }; + if !member.kind.is_method() { return false; } + } + + // Compute raw disasm (may be cached already from the viewer). + let raw_lines = { + let entry = self.get_or_compute_disasm(ci, mi); + entry.raw.clone() + }; + + let cls = &self.classes[ci]; + let member = if mi < cls.methods.len() { &cls.methods[mi] } else { &cls.fields[mi - cls.methods.len()] }; + let registers = member.parse_registers(); + + self.code_edit = CodeEditState { + lines: raw_lines, + cursor: 0, + scroll: 0, + errors: HashMap::new(), + class_desc: cls.descriptor.clone(), + method_name: member.name.clone(), + registers, + dirty: false, + line_buf: String::new(), + pending_d: false, + }; + self.mode = AppMode::CodeEdit; + true + } + + /// Enter LineEdit for the line at code_edit.cursor. + pub fn begin_line_edit(&mut self) { + self.code_edit.begin_line_edit(); + self.mode = AppMode::LineEdit; + } + + /// Commit the current line edit. + pub fn commit_line_edit(&mut self) { + self.code_edit.commit_line_edit(); + self.mode = AppMode::CodeEdit; + } + + /// Abort line edit without changes. + pub fn abort_line_edit(&mut self) { + self.code_edit.abort_line_edit(); + self.mode = AppMode::CodeEdit; + } + + /// Cancel the whole code edit session. + pub fn cancel_code_edit(&mut self) { + self.code_edit = CodeEditState::default(); + self.mode = AppMode::Browse; + } + + /// Save the code edit: compile -> DexIr -> DexWriter -> update state. + pub fn save_code_edit(&mut self) -> bool { + let code_def = match self.code_edit.compile() { + Ok(c) => c, + Err(e) => { + self.code_edit.errors.insert(self.code_edit.cursor, e); + return false; + } + }; + if !self.is_editable() { + self.code_edit.errors.insert(0, "Edit mode requires -o flag".into()); + return false; + } + + // Rebuild DexIr from current class data, replacing the target method. + let new_bytes = match rebuild_dex_with_new_code( + &self.raw_bytes, + &self.classes, + &self.code_edit.class_desc, + &self.code_edit.method_name, + code_def, + ) { + Ok(b) => b, + Err(e) => { self.code_edit.errors.insert(0, e); return false; } + }; + + // Write to output path if configured. + if let Some(ref path) = self.output_path.clone() { + if let Err(e) = std::fs::write(path, &new_bytes) { + self.code_edit.errors.insert(0, format!("Write failed: {e}")); + return false; + } + } + + // Refresh app state. + match crate::commands::inspect::build_app_state_from_bytes(&new_bytes) { + Ok((classes, file_info)) => { + let prev_cursor = self.tree_cursor; + self.classes = classes; + self.file_info = file_info; + self.raw_bytes = new_bytes; + self.disasm_cache.clear(); // offsets changed after rewrite + // Rebuild tree preserving expansion state. + self.rebuild_tree(); + self.tree_cursor = prev_cursor.min(self.tree.len().saturating_sub(1)); + self.code_edit = CodeEditState::default(); + self.mode = AppMode::Browse; + true + } + Err(e) => { + self.raw_bytes = new_bytes; + self.disasm_cache.clear(); + self.code_edit.errors.insert(0, format!("Re-parse: {e}")); + false + } + } + } + + // -- Modal helpers --------------------------------------------------------- + + pub fn begin_rename_modal(&mut self) { + if self.current_class().is_none() { return; } + self.modal = ModalState { buffer: String::new(), error: None }; + self.mode = AppMode::RenameModal; + } + + pub fn begin_flags_modal(&mut self) { + self.modal = ModalState { buffer: String::new(), error: None }; + self.mode = AppMode::FlagsModal; + } + + pub fn cancel_modal(&mut self) { + self.modal = ModalState::default(); + self.mode = AppMode::Browse; + } + + pub fn apply_rename(&mut self) -> bool { + let new_desc = self.modal.buffer.trim().to_string(); + if new_desc.is_empty() { + self.modal.error = Some("Descriptor cannot be empty".into()); + return false; + } + let class_desc = match self.current_class() { + Some(c) => c.descriptor.clone(), + None => return false, + }; + self.apply_editor_op(move |ed| ed.rename_class(&class_desc, &new_desc)) + } + + pub fn apply_flags(&mut self) -> bool { + let val = match parse_flags_value(&self.modal.buffer) { + Ok(v) => v, + Err(e) => { self.modal.error = Some(e); return false; } + }; + let is_member = self.current_member_indices().map(|(_, mi)| { + let ci = self.current_class_idx().unwrap(); + let cls = &self.classes[ci]; + mi < cls.methods.len() + }).unwrap_or(false); + + if is_member { + let class_desc = self.current_class().map(|c| c.descriptor.clone()).unwrap_or_default(); + let method_name = self.current_member().map(|m| m.name.clone()).unwrap_or_default(); + self.apply_editor_op(move |ed| ed.set_method_access_flags(&class_desc, &method_name, val)) + } else { + let class_desc = match self.current_class() { + Some(c) => c.descriptor.clone(), + None => return false, + }; + self.apply_editor_op(move |ed| ed.set_class_access_flags(&class_desc, val)) + } + } + + fn apply_editor_op(&mut self, op: F) -> bool + where F: FnOnce(&mut dexrs::file::DexEditor) -> dexrs::Result<()> + { + if self.raw_bytes.is_empty() { + self.modal.error = Some("No source bytes".into()); + return false; + } + let mut editor = match dexrs::file::DexEditor::from_bytes(self.raw_bytes.clone()) { + Ok(e) => e, + Err(e) => { self.modal.error = Some(format!("DexEditor: {e}")); return false; } + }; + if let Err(e) = op(&mut editor) { + self.modal.error = Some(format!("Edit failed: {e}")); + return false; + } + let new_bytes = match editor.build() { + Ok(b) => b, + Err(e) => { self.modal.error = Some(format!("Build failed: {e}")); return false; } + }; + if let Some(ref path) = self.output_path.clone() { + if let Err(e) = std::fs::write(path, &new_bytes) { + self.modal.error = Some(format!("Write failed: {e}")); + return false; + } + } + match crate::commands::inspect::build_app_state_from_bytes(&new_bytes) { + Ok((classes, file_info)) => { + let prev = self.tree_cursor; + self.classes = classes; + self.file_info = file_info; + self.raw_bytes = new_bytes; + self.disasm_cache.clear(); + self.rebuild_tree(); + self.tree_cursor = prev.min(self.tree.len().saturating_sub(1)); + self.modal = ModalState::default(); + self.mode = AppMode::Browse; + true + } + Err(e) => { + self.raw_bytes = new_bytes; + self.disasm_cache.clear(); + self.modal.error = Some(format!("Re-parse: {e}")); + false + } + } + } +} + +// -- ClassEntry -> DexIr + target method override ------------------------------ + +fn rebuild_dex_with_new_code( + raw_bytes: &[u8], + classes: &[ClassEntry], + target_class: &str, + target_method: &str, + new_code: dexrs::file::CodeDef, +) -> Result, String> { + use dexrs::file::{ + builder::CodeBuilder, + ir::{ClassDef, FieldDef, MethodDef, ProtoKey}, + DexFile, DexIr, DexLocation, DexWriter, + verifier::VerifyPreset, + }; + + // Re-open the original DEX so we can re-assemble non-target methods from bytes. + let dex = DexFile::open(&raw_bytes, DexLocation::InMemory, VerifyPreset::None) + .map_err(|e| e.to_string())?; + + let mut ir = DexIr::new(35); + + for ce in classes { + let mut cls = ClassDef::new(&ce.descriptor); + cls.access_flags = ce.access_flags; + if !ce.superclass_desc.is_empty() { + cls.superclass = Some(ce.superclass_desc.clone()); + } + + for m in &ce.methods { + let proto = match ProtoKey::from_descriptor(&m.proto_or_type_desc) { + Some(p) => p, + None => ProtoKey { return_type: "V".into(), params: vec![] }, + }; + + let is_target = ce.descriptor == target_class && m.raw_name == target_method; + + let code = if is_target { + Some(new_code.clone()) + } else if m.code_offset == 0 { + None + } else { + // Re-assemble the original code from the raw bytes. + let ca = match dex.get_code_item_accessor(m.code_offset) { + Ok(c) => c, + Err(_) => { continue; } + }; + let regs = ca.registers_size(); + let ins = ca.ins_size(); + let outs = ca.outs_size(); + let mut cb = CodeBuilder::new(regs, ins, outs); + let mut ok = true; + for insn in ca { + match insn.to_assembler_text(&dex) { + Ok(text) => { + let t = text.trim().to_string(); + if t.is_empty() { continue; } + if let Some(lbl) = t.strip_prefix(':') { + cb.label(lbl); + } else if cb.emit(&t).is_err() { + ok = false; + break; + } + } + Err(_) => { ok = false; break; } + } + } + if ok { cb.build().ok() } else { None } + }; + + let mut method = MethodDef::new(&m.raw_name, proto); + method.access_flags = m.access_flags; + if let Some(c) = code { method.code = Some(c); } + + if matches!(m.kind, MemberKind::DirectMethod) { + cls.direct_methods.push(method); + } else { + cls.virtual_methods.push(method); + } + } + + for f in &ce.fields { + let mut fd = FieldDef::new(&f.raw_name, &f.proto_or_type_desc); + fd.access_flags = f.access_flags; + if matches!(f.kind, MemberKind::StaticField) { + cls.static_fields.push(fd); + } else { + cls.instance_fields.push(fd); + } + } + + ir.add_class(cls); + } + + DexWriter::write(ir).map_err(|e| e.to_string()) +} + +// -- Misc helpers ------------------------------------------------------------- + +fn parse_flags_value(s: &str) -> Result { + let s = s.trim(); + if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")) { + u32::from_str_radix(hex, 16).map_err(|e| format!("invalid hex: {e}")) + } else { + s.parse::().map_err(|e| format!("invalid number: {e}")) + } +} diff --git a/src/bin/dexrs/tui/events.rs b/src/bin/dexrs/tui/events.rs new file mode 100644 index 0000000..62c8e97 --- /dev/null +++ b/src/bin/dexrs/tui/events.rs @@ -0,0 +1,226 @@ +use crossterm::event::{self, Event, KeyCode, KeyEventKind, KeyModifiers}; +use std::time::Duration; + +use super::app::{App, AppMode, Focus}; + +pub enum Action { + Quit, + Continue, +} + +fn handle_browse(app: &mut App, code: KeyCode, mods: KeyModifiers) { + // Global quit + if matches!(code, KeyCode::Char('q') | KeyCode::Char('Q')) + || (code == KeyCode::Char('c') && mods.contains(KeyModifiers::CONTROL)) + { + // Signal quit via a special code — handled by returning Quit in the outer loop. + // We re-use the Action enum by setting a flag. Easier: just set a quit flag. + // Actually we can't return from here, so we'll use an app flag. + app.show_help = false; // placeholder; handled below in outer match + // We need to bubble Quit up. Let's abuse show_help=false as a sentinel. + // Better: add a quit flag to App. + // For now let's leave this handled in the outer function. + return; + } + + match code { + KeyCode::Char('q') | KeyCode::Char('Q') => {} + + // Focus toggle + KeyCode::Tab | KeyCode::BackTab => { + app.focus = match app.focus { + Focus::Tree => Focus::Code, + Focus::Code => Focus::Tree, + }; + } + + // Navigation + KeyCode::Up | KeyCode::Char('k') => match app.focus { + Focus::Tree => app.tree_up(), + Focus::Code => app.scroll_code_up(), + }, + KeyCode::Down | KeyCode::Char('j') => match app.focus { + Focus::Tree => app.tree_down(), + Focus::Code => app.scroll_code_down(), + }, + KeyCode::PageUp => match app.focus { + Focus::Tree => { for _ in 0..10 { app.tree_up(); } } + Focus::Code => app.page_code_up(), + }, + KeyCode::PageDown => match app.focus { + Focus::Tree => { for _ in 0..10 { app.tree_down(); } } + Focus::Code => app.page_code_down(), + }, + + // Tree: expand/collapse/select + KeyCode::Enter | KeyCode::Right | KeyCode::Char('l') => { + app.toggle_expand(); + } + KeyCode::Left | KeyCode::Char('h') => { + if app.focus == Focus::Code { + app.focus = Focus::Tree; + } else { + app.collapse_or_parent(); + } + } + KeyCode::Esc => { + if app.focus == Focus::Code { + app.focus = Focus::Tree; + } + } + + // Search + KeyCode::Char('/') => { + app.focus = Focus::Tree; + app.mode = AppMode::Search; + } + + // Overlays + KeyCode::Char('?') => app.show_help = !app.show_help, + KeyCode::Char('i') => app.show_info = !app.show_info, + + // Edit operations (require source_bytes) + KeyCode::Char('e') if app.is_editable() => { + app.show_help = false; + app.show_info = false; + app.begin_code_edit(); + } + KeyCode::Char('r') if app.is_editable() => { + app.show_help = false; + app.show_info = false; + app.begin_rename_modal(); + } + KeyCode::Char('f') if app.is_editable() => { + app.show_help = false; + app.show_info = false; + app.begin_flags_modal(); + } + + _ => {} + } +} + +fn handle_code_edit(app: &mut App, code: KeyCode) { + match code { + KeyCode::Esc => app.cancel_code_edit(), + + // Navigation between lines + KeyCode::Up | KeyCode::Char('k') => app.code_edit.move_up(), + KeyCode::Down | KeyCode::Char('j') => app.code_edit.move_down(), + + // Edit current line + KeyCode::Enter | KeyCode::Char('i') => app.begin_line_edit(), + + // Append new line after cursor + KeyCode::Char('a') | KeyCode::Char('o') => { + app.code_edit.append_line(); + app.begin_line_edit(); + } + + // Insert new line before cursor + KeyCode::Char('O') => { + app.code_edit.insert_line(); + app.begin_line_edit(); + } + + // Delete line (vim-style: d then d) + KeyCode::Char('d') => { + if app.code_edit.pending_d { + app.code_edit.delete_line(); + app.code_edit.pending_d = false; + } else { + app.code_edit.pending_d = true; + } + } + + // Save + KeyCode::Char('w') => { app.save_code_edit(); } + + _ => { app.code_edit.pending_d = false; } + } +} + +fn handle_line_edit(app: &mut App, code: KeyCode) { + match code { + KeyCode::Enter => app.commit_line_edit(), + KeyCode::Esc => app.abort_line_edit(), + KeyCode::Backspace => { app.code_edit.line_buf.pop(); } + KeyCode::Char(c) => app.code_edit.line_buf.push(c), + _ => {} + } +} + +fn handle_search(app: &mut App, code: KeyCode) { + match code { + KeyCode::Esc => { + app.mode = AppMode::Browse; + app.clear_search(); + } + KeyCode::Enter => { + app.mode = AppMode::Browse; + } + KeyCode::Backspace => { + app.search.pop(); + app.apply_search(); + } + KeyCode::Char(c) => { + app.search.push(c); + app.apply_search(); + } + _ => {} + } +} + +fn handle_modal(app: &mut App, code: KeyCode) { + match code { + KeyCode::Esc => app.cancel_modal(), + KeyCode::Enter => { + let mode = app.mode.clone(); + match mode { + AppMode::RenameModal => { app.apply_rename(); } + AppMode::FlagsModal => { app.apply_flags(); } + _ => {} + } + } + KeyCode::Backspace => { + app.modal.buffer.pop(); + app.modal.error = None; + } + KeyCode::Char(c) => { + app.modal.buffer.push(c); + app.modal.error = None; + } + _ => {} + } +} + +/// External wrapper that handles the Quit action. +pub fn handle_events_with_quit(app: &mut App) -> anyhow::Result { + if !event::poll(Duration::from_millis(100))? { + return Ok(Action::Continue); + } + + let ev = event::read()?; + let Event::Key(key) = ev else { return Ok(Action::Continue) }; + if key.kind != KeyEventKind::Press { + return Ok(Action::Continue); + } + + // Check for quit before dispatching + if app.mode == AppMode::Browse + && (matches!(key.code, KeyCode::Char('q') | KeyCode::Char('Q')) + || (key.code == KeyCode::Char('c') && key.modifiers.contains(KeyModifiers::CONTROL))) + { + return Ok(Action::Quit); + } + + match &app.mode { + AppMode::LineEdit => handle_line_edit(app, key.code), + AppMode::CodeEdit => handle_code_edit(app, key.code), + AppMode::Search => handle_search(app, key.code), + AppMode::RenameModal | AppMode::FlagsModal => handle_modal(app, key.code), + AppMode::Browse => handle_browse(app, key.code, key.modifiers), + } + + Ok(Action::Continue) +} diff --git a/src/bin/dexrs/tui/mod.rs b/src/bin/dexrs/tui/mod.rs new file mode 100644 index 0000000..8a13c14 --- /dev/null +++ b/src/bin/dexrs/tui/mod.rs @@ -0,0 +1,3 @@ +pub mod app; +pub mod events; +pub mod ui; diff --git a/src/bin/dexrs/tui/ui.rs b/src/bin/dexrs/tui/ui.rs new file mode 100644 index 0000000..bc9c167 --- /dev/null +++ b/src/bin/dexrs/tui/ui.rs @@ -0,0 +1,703 @@ +//! TUI renderer — 2-pane layout: collapsible class tree + scrollable code/details pane. +//! +//! Layout: +//! ``` +//! ┌- Tree ------┬- Code / Details ---------------------------------┐ +//! │ packages │ disasm / class info / code editor │ +//! │ classes │ │ +//! │ members │ │ +//! └-------------┴--------------------------------------------------┘ +//! [status bar] +//! ``` + +use ratatui::{ + layout::{Constraint, Layout, Rect}, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{ + Block, Borders, Clear, List, ListItem, ListState, Paragraph, Scrollbar, + ScrollbarOrientation, ScrollbarState, Wrap, + }, + Frame, +}; + +use super::app::{App, AppMode, Focus, MemberKind, TreeItem}; +use crate::highlight; + +// -- Palette ------------------------------------------------------------------- +// All muted RGB values — avoid terminal neons, keep contrast readable but calm. + +/// Active-border / accent (steel blue) +const ACCENT: Color = Color::Rgb(95, 135, 175); +/// Inactive border / secondary text (mid gray) +const DIM: Color = Color::Rgb(110, 110, 110); +/// Selected-row background (very subtle dark) +const HIGHLIGHT_BG: Color = Color::Rgb(42, 48, 58); +/// Error / warning (soft amber-red) +const WARN: Color = Color::Rgb(210, 110, 80); +/// Program-counter column (dim gray) +const PC_COLOR: Color = Color::Rgb(100, 100, 110); +/// Code-edit cursor row background (deep slate) +const EDIT_CURSOR_BG: Color = Color::Rgb(30, 50, 75); +/// Code-editor line-edit background (dark warm gray) +const LINE_EDIT_BG: Color = Color::Rgb(55, 48, 40); + +/// Section / title text (muted gold) +const TITLE: Color = Color::Rgb(190, 160, 90); +/// Package-header text (same as title) +const PKG_COLOR: Color = Color::Rgb(190, 160, 90); +/// Class-name text (light gray) +const CLASS_COLOR: Color = Color::Rgb(200, 200, 200); +/// Header/key labels in the details pane (mid gray) +const KEY_COLOR: Color = Color::Rgb(120, 120, 130); + +/// Hint / informational italic text +const HINT: Color = Color::Rgb(100, 110, 120); + +// Member-icon colors +const DIRECT_METHOD_COLOR: Color = Color::Rgb(120, 165, 210); +const VIRTUAL_METHOD_COLOR: Color = Color::Rgb(100, 185, 175); +const STATIC_FIELD_COLOR: Color = Color::Rgb(195, 160, 80); +const INSTANCE_FIELD_COLOR: Color = Color::Rgb(170, 135, 70); + +// Status-bar backgrounds +const STATUS_BG: Color = Color::Rgb(28, 28, 32); +const STATUS_EDIT_BG: Color = Color::Rgb(30, 50, 75); +const STATUS_LINE_BG: Color = Color::Rgb(50, 38, 30); + +// -- Top-level draw ------------------------------------------------------------ + +pub fn draw(f: &mut Frame, app: &mut App) { + let area = f.area(); + + let [main_area, status_area] = Layout::vertical([ + Constraint::Min(0), + Constraint::Length(1), + ]).areas(area); + + let [tree_area, code_area] = Layout::horizontal([ + Constraint::Percentage(28), + Constraint::Fill(1), + ]).areas(main_area); + + draw_tree(f, app, tree_area); + draw_code(f, app, code_area); + draw_status_bar(f, app, status_area); + + // Overlays (highest z-order last) + if app.show_help { + draw_help_overlay(f, area); + } else if app.show_info { + draw_info_overlay(f, app, area); + } else if matches!(app.mode, AppMode::RenameModal | AppMode::FlagsModal) { + draw_modal_overlay(f, app, area); + } +} + +// -- Loading screen ------------------------------------------------------------ + +const SPINNER: &[char] = &['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; + +/// Render a loading/progress screen shown while `build_app_state` runs. +/// +/// `tick` is an ever-increasing counter used to animate the spinner. +pub fn draw_loading( + f: &mut Frame, + file_name: &str, + current: usize, + total: usize, + tick: u64, +) { + let area = f.area(); + + // Center a narrow box + let box_w = 52u16.min(area.width.saturating_sub(4)); + let box_h = 10u16.min(area.height.saturating_sub(4)); + let popup = centered_rect(box_w, box_h, area); + + let spinner = SPINNER[(tick as usize) % SPINNER.len()]; + + // Gauge: filled / total width + let inner_w = (popup.width.saturating_sub(4)) as usize; + let pct = if total == 0 { 0 } else { (current * inner_w) / total }; + let pct = pct.min(inner_w); + let bar: String = "█".repeat(pct) + &"░".repeat(inner_w.saturating_sub(pct)); + + let progress_line = if total > 0 { + format!(" {current}/{total} classes") + } else { + " Reading file…".to_string() + }; + + let lines = vec![ + Line::default(), + Line::from(vec![ + Span::styled(format!(" {spinner} "), Style::default().fg(ACCENT)), + Span::styled( + format!("Loading {file_name}"), + Style::default().fg(Color::Rgb(200, 200, 210)), + ), + ]), + Line::default(), + Line::from(Span::styled( + format!(" [{bar}]"), + Style::default().fg(Color::Rgb(90, 130, 170)), + )), + Line::from(Span::styled( + progress_line, + Style::default().fg(DIM), + )), + Line::default(), + Line::from(Span::styled( + " Press Esc or q to cancel", + Style::default().fg(Color::Rgb(100, 100, 100)).add_modifier(Modifier::ITALIC), + )), + ]; + + let p = Paragraph::new(lines).block( + Block::default() + .title(Span::styled(" dexrs ", Style::default().fg(TITLE).add_modifier(Modifier::BOLD))) + .borders(Borders::ALL) + .border_style(Style::default().fg(ACCENT)), + ); + f.render_widget(p, popup); +} + +// -- Tree pane ----------------------------------------------------------------- + +fn draw_tree(f: &mut Frame, app: &mut App, area: Rect) { + let active = app.focus == Focus::Tree && app.mode == AppMode::Browse; + let border_style = if active { Style::default().fg(ACCENT) } else { Style::default().fg(DIM) }; + + let title = if app.mode == AppMode::Search { + format!(" / {} ", app.search) + } else { + format!(" Classes [{}] ", app.tree.len()) + }; + + let visible_height = area.height.saturating_sub(2) as usize; + + // Clamp tree_scroll so cursor is always visible. + if app.tree_cursor < app.tree_scroll { + app.tree_scroll = app.tree_cursor; + } + if app.tree_cursor >= app.tree_scroll + visible_height { + app.tree_scroll = app.tree_cursor - visible_height + 1; + } + + let items: Vec = app + .tree + .iter() + .enumerate() + .skip(app.tree_scroll) + .take(visible_height) + .map(|(i, item)| tree_item_to_list_item(app, i, item)) + .collect(); + + let mut state = ListState::default(); + let cursor_in_view = app.tree_cursor.saturating_sub(app.tree_scroll); + if !app.tree.is_empty() { + state.select(Some(cursor_in_view)); + } + + let list = List::new(items) + .block( + Block::default() + .title(Span::styled(title, Style::default().fg(TITLE))) + .borders(Borders::ALL) + .border_style(border_style), + ) + .highlight_style( + Style::default() + .bg(HIGHLIGHT_BG) + .add_modifier(Modifier::BOLD), + ); + + f.render_stateful_widget(list, area, &mut state); +} + +fn tree_item_to_list_item<'a>(app: &App, _idx: usize, item: &TreeItem) -> ListItem<'a> { + match item { + TreeItem::Package { name, expanded } => { + let icon = if *expanded { "▾" } else { "▸" }; + let label = if name.is_empty() { "(no package)".to_string() } else { name.clone() }; + ListItem::new(Line::from(vec![ + Span::styled( + format!("{icon} {label}"), + Style::default().fg(PKG_COLOR).add_modifier(Modifier::BOLD), + ), + ])) + } + TreeItem::Class { class_idx, expanded } => { + let cls = &app.classes[*class_idx]; + let icon = if *expanded { "▾" } else { "▸" }; + let has_members = !cls.methods.is_empty() || !cls.fields.is_empty(); + let icon = if has_members { icon } else { "·" }; + ListItem::new(Line::from(vec![ + Span::raw(" "), + Span::styled( + format!("{icon} {}", cls.simple_name), + Style::default().fg(CLASS_COLOR), + ), + ])) + } + TreeItem::Member { class_idx, member_idx } => { + let cls = &app.classes[*class_idx]; + let member = if *member_idx < cls.methods.len() { + &cls.methods[*member_idx] + } else { + &cls.fields[*member_idx - cls.methods.len()] + }; + let (icon, color) = match member.kind { + MemberKind::DirectMethod => ("⬡", DIRECT_METHOD_COLOR), + MemberKind::VirtualMethod => ("◈", VIRTUAL_METHOD_COLOR), + MemberKind::StaticField => ("■", STATIC_FIELD_COLOR), + MemberKind::InstanceField => ("□", INSTANCE_FIELD_COLOR), + }; + // Show just the raw method/field name (simpler to read). + let display_name = if member.raw_name.is_empty() { + &member.name + } else { + &member.raw_name + }; + ListItem::new(Line::from(vec![ + Span::raw(" "), + Span::styled(format!("{icon} "), Style::default().fg(color)), + Span::raw(display_name.to_string()), + ])) + } + } +} + +// -- Code pane ----------------------------------------------------------------- + +fn draw_code(f: &mut Frame, app: &mut App, area: Rect) { + match &app.mode { + AppMode::CodeEdit | AppMode::LineEdit => draw_code_editor(f, app, area), + _ => draw_code_viewer(f, app, area), + } +} + +fn draw_code_viewer(f: &mut Frame, app: &mut App, area: Rect) { + let active = app.focus == Focus::Code; + let border_style = if active { Style::default().fg(ACCENT) } else { Style::default().fg(DIM) }; + + let (title, content) = build_code_viewer_content(app); + + let total_lines = content.len(); + let visible = area.height.saturating_sub(2); + + let paragraph = Paragraph::new(content) + .block( + Block::default() + .title(Span::styled(title, Style::default().fg(TITLE))) + .borders(Borders::ALL) + .border_style(border_style), + ) + .scroll((app.code_scroll, 0)) + .wrap(Wrap { trim: false }); + + f.render_widget(paragraph, area); + + if total_lines > visible as usize { + let scrollbar = Scrollbar::new(ScrollbarOrientation::VerticalRight) + .begin_symbol(Some("↑")) + .end_symbol(Some("↓")); + let mut sb_state = + ScrollbarState::new(total_lines.saturating_sub(visible as usize)) + .position(app.code_scroll as usize); + let sb_area = Rect { + x: area.x + area.width - 1, + y: area.y + 1, + width: 1, + height: visible, + }; + f.render_stateful_widget(scrollbar, sb_area, &mut sb_state); + } + + app.code_total_lines = total_lines; + app.code_visible_height = visible; +} + +fn build_code_viewer_content(app: &mut App) -> (String, Vec>) { + // Clone the item to avoid holding a reference into app.tree while we mutate app.disasm_cache. + let Some(item) = app.current_item().cloned() else { + return (" Details ".to_string(), vec![]); + }; + + match &item { + TreeItem::Package { name, .. } => { + let pkg_display = if name.is_empty() { "(no package)" } else { name.as_str() }; + let n_classes = app.classes.iter().filter(|c| &c.package == name).count(); + let lines = vec![ + Line::from(Span::styled( + format!("Package: {pkg_display}"), + Style::default().fg(TITLE), + )), + Line::from(Span::styled( + format!("{n_classes} class(es)"), + Style::default().fg(DIM), + )), + ]; + (format!(" Package — {pkg_display} "), lines) + } + TreeItem::Class { class_idx, .. } => { + let cls = &app.classes[*class_idx]; + let title = format!(" {} ", cls.simple_name); + let lines = vec![ + key_val("Descriptor", &cls.descriptor), + key_val("Package", &cls.package), + key_val("Superclass", &cls.superclass), + key_val("Flags", &crate::output::format_flags(cls.access_flags)), + Line::default(), + Line::from(Span::styled( + format!("{} methods, {} fields", cls.methods.len(), cls.fields.len()), + Style::default().fg(DIM), + )), + Line::default(), + Line::from(Span::styled( + "Press [->] or [Enter] to expand, then select a method to view its code.", + Style::default().fg(HINT).add_modifier(Modifier::ITALIC), + )), + ]; + (title, lines) + } + TreeItem::Member { class_idx, member_idx } => { + let ci = *class_idx; + let mi = *member_idx; + + // Compute disasm on first view; subsequent views use the cache (instant). + let styled_disasm: Vec<(u32, _)> = { + let entry = app.get_or_compute_disasm(ci, mi); + entry.styled.clone() + }; + + let cls = &app.classes[ci]; + let member = if mi < cls.methods.len() { + &cls.methods[mi] + } else { + &cls.fields[mi - cls.methods.len()] + }; + let title = format!(" {} — {} ", member.kind.label(), member.raw_name); + let mut lines: Vec = Vec::new(); + + // Signature / type + lines.push(key_val("Name", &member.name)); + lines.push(key_val("Flags", &crate::output::format_flags(member.access_flags))); + + if let Some(ref info) = member.code_info { + lines.push(Line::default()); + lines.push(Line::from(Span::styled(info.clone(), Style::default().fg(DIM)))); + } + + if styled_disasm.is_empty() { + lines.push(Line::default()); + lines.push(Line::from(Span::styled( + "(abstract / native — no code)", + Style::default().fg(DIM).add_modifier(Modifier::ITALIC), + ))); + if app.is_editable() { + lines.push(Line::from(Span::styled( + "Press [e] to edit.", + Style::default().fg(HINT).add_modifier(Modifier::ITALIC), + ))); + } + } else { + lines.push(Line::default()); + for (pc, styled) in &styled_disasm { + let mut spans = vec![ + Span::styled(format!("{pc:04x}"), Style::default().fg(PC_COLOR)), + Span::raw(" "), + ]; + spans.extend(highlight::to_tui_line(styled).spans); + lines.push(Line::from(spans)); + } + if app.is_editable() { + lines.push(Line::default()); + lines.push(Line::from(Span::styled( + "Press [e] to edit instructions inline.", + Style::default().fg(HINT).add_modifier(Modifier::ITALIC), + ))); + } + } + (title, lines) + } + } +} + +fn key_val(key: &str, value: &str) -> Line<'static> { + Line::from(vec![ + Span::styled( + format!("{key:<14}"), + Style::default().fg(KEY_COLOR), + ), + Span::raw(value.to_string()), + ]) +} + +// -- Inline code editor -------------------------------------------------------- + +fn draw_code_editor(f: &mut Frame, app: &mut App, area: Rect) { + let in_line_edit = app.mode == AppMode::LineEdit; + let border_style = Style::default().fg(ACCENT); + + let cursor = app.code_edit.cursor; + let scroll = app.code_edit.scroll; + let visible_h = area.height.saturating_sub(4) as usize; // -4: borders + header + hint row + + // Clamp scroll + if cursor < scroll as usize { app.code_edit.scroll = cursor as u16; } + if cursor >= scroll as usize + visible_h { app.code_edit.scroll = (cursor - visible_h + 1) as u16; } + let scroll = app.code_edit.scroll as usize; + + let dirty_mark = if app.code_edit.dirty { " ●" } else { "" }; + let title = format!(" ✎ {} [code edit{}] ", app.code_edit.method_name, dirty_mark); + + let (regs, ins, outs) = app.code_edit.registers; + let header = format!(".registers {regs} {ins} {outs}"); + + let mut lines: Vec = Vec::new(); + + // Header line (non-editable but informational) + lines.push(Line::from(Span::styled( + header, + Style::default().fg(Color::Rgb(160, 120, 185)), + ))); + lines.push(Line::default()); + + // Instruction lines + for (i, line_text) in app.code_edit.lines.iter().enumerate() { + if i < scroll || i >= scroll + visible_h { continue; } + + let is_cursor = i == cursor; + let has_error = app.code_edit.errors.contains_key(&i); + + let display_text = if is_cursor && in_line_edit { + // Show the edit buffer with cursor indicator + format!("{}_", app.code_edit.line_buf) + } else { + line_text.clone() + }; + + let num_span = Span::styled( + format!("{:>3} ", i + 1), + Style::default().fg(DIM), + ); + + let text_style = if is_cursor && !in_line_edit { + Style::default().bg(EDIT_CURSOR_BG).fg(Color::Rgb(220, 220, 230)).add_modifier(Modifier::BOLD) + } else if is_cursor && in_line_edit { + Style::default().bg(LINE_EDIT_BG).fg(Color::Rgb(220, 190, 130)) + } else if has_error { + Style::default().fg(WARN) + } else { + Style::default().fg(Color::Rgb(195, 195, 200)) + }; + + lines.push(Line::from(vec![ + num_span, + Span::styled(display_text, text_style), + ])); + + // Show error on next line + if let Some(err) = app.code_edit.errors.get(&i) { + lines.push(Line::from(Span::styled( + format!(" ↳ {err}"), + Style::default().fg(WARN), + ))); + } + } + + // Bottom hint row + lines.push(Line::default()); + let hint = if in_line_edit { + "[Enter] Confirm [Esc] Cancel" + } else if app.code_edit.errors.is_empty() && app.code_edit.dirty { + "[w] Save [Esc] Discard [↑↓] Navigate [Enter/i] Edit line [a] Append [dd] Delete" + } else { + "[w] Save [Esc] Discard [↑↓] Navigate [Enter/i] Edit line [a] Append [O] Insert [dd] Delete" + }; + lines.push(Line::from(Span::styled(hint, Style::default().fg(DIM)))); + + // Top-level error + if let Some(err) = app.code_edit.errors.get(&0).filter(|_| app.code_edit.lines.is_empty() || !app.code_edit.errors.contains_key(&0)) { + lines.push(Line::from(Span::styled( + format!("Error: {err}"), + Style::default().fg(WARN).add_modifier(Modifier::BOLD), + ))); + } + + let paragraph = Paragraph::new(lines) + .block( + Block::default() + .title(Span::styled(title, Style::default().fg(TITLE))) + .borders(Borders::ALL) + .border_style(border_style), + ) + .wrap(Wrap { trim: false }); + + f.render_widget(paragraph, area); +} + +// -- Status bar ---------------------------------------------------------------- + +fn draw_status_bar(f: &mut Frame, app: &App, area: Rect) { + let (bg, fg, hints) = match &app.mode { + AppMode::Search => ( + STATUS_BG, + Color::Rgb(180, 170, 120), + format!(" Search: {} [Enter] Apply [Esc] Cancel", app.search), + ), + AppMode::CodeEdit => ( + STATUS_EDIT_BG, + Color::Rgb(160, 185, 210), + " [↑↓] Navigate [Enter/i] Edit [a] Append [dd] Delete [w] Save [Esc] Discard ".to_string(), + ), + AppMode::LineEdit => ( + STATUS_LINE_BG, + Color::Rgb(210, 185, 140), + " [Enter] Confirm line [Esc] Cancel ".to_string(), + ), + AppMode::RenameModal | AppMode::FlagsModal => ( + STATUS_BG, + Color::Rgb(160, 185, 210), + " [Enter] Apply [Esc] Cancel ".to_string(), + ), + AppMode::Browse if app.is_editable() => ( + STATUS_BG, + Color::Rgb(140, 150, 160), + " [↑↓/hjkl] Navigate [Tab] Toggle pane [/] Search [e] Edit code [r] Rename [f] Flags [i] Info [?] Help [q] Quit ".to_string(), + ), + _ => ( + STATUS_BG, + Color::Rgb(140, 150, 160), + " [↑↓/hjkl] Navigate [Tab] Toggle pane [/] Search [i] Info [?] Help [q] Quit ".to_string(), + ), + }; + let bar = Paragraph::new(hints).style(Style::default().bg(bg).fg(fg)); + f.render_widget(bar, area); +} + +// -- Help overlay -------------------------------------------------------------- + +fn draw_help_overlay(f: &mut Frame, area: Rect) { + let width = 62u16.min(area.width.saturating_sub(4)); + let height = 32u16.min(area.height.saturating_sub(4)); + let popup = centered_rect(width, height, area); + + f.render_widget(Clear, popup); + + let text = vec![ + Line::from(Span::styled(" Keyboard Shortcuts", Style::default().fg(Color::Rgb(200, 200, 210)).add_modifier(Modifier::BOLD))), + Line::default(), + Line::from(Span::styled(" Navigation", Style::default().fg(TITLE))), + Line::from(" [↑/k] [↓/j] Move up / down in tree"), + Line::from(" [->/l] [Enter] Expand class / enter code pane"), + Line::from(" [←/h] [Esc] Collapse / go back to tree"), + Line::from(" [Tab] Toggle focus tree ↔ code"), + Line::from(" [PgUp] [PgDn] Scroll code pane by page"), + Line::from(" [/] Search / filter classes"), + Line::default(), + Line::from(Span::styled(" Editing (requires -o flag)", Style::default().fg(TITLE))), + Line::from(" [e] Enter inline code editor"), + Line::from(" [r] Rename selected class"), + Line::from(" [f] Set access flags"), + Line::default(), + Line::from(Span::styled(" Code Editor", Style::default().fg(TITLE))), + Line::from(" [↑/k] [↓/j] Move cursor between lines"), + Line::from(" [Enter] or [i] Edit the highlighted line"), + Line::from(" [a] Append new line after cursor"), + Line::from(" [O] Insert new line before cursor"), + Line::from(" [dd] Delete current line"), + Line::from(" [w] Save (compile + write)"), + Line::from(" [Esc] Discard all edits"), + Line::default(), + Line::from(Span::styled(" Line Edit", Style::default().fg(TITLE))), + Line::from(" Type freely Instruction text input"), + Line::from(" [Enter] Confirm edit"), + Line::from(" [Esc] Cancel line edit"), + Line::default(), + Line::from(" [i] File info [?] This help [q] Quit"), + ]; + + let p = Paragraph::new(text).block( + Block::default() + .title(" Help ") + .borders(Borders::ALL) + .border_style(Style::default().fg(ACCENT)), + ); + f.render_widget(p, popup); +} + +// -- Info overlay -------------------------------------------------------------- + +fn draw_info_overlay(f: &mut Frame, app: &App, area: Rect) { + let width = 60u16.min(area.width.saturating_sub(4)); + let height = 10u16.min(area.height.saturating_sub(4)); + let popup = centered_rect(width, height, area); + + f.render_widget(Clear, popup); + + let lines: Vec = app.file_info.lines().map(|l| Line::from(l.to_string())).collect(); + let p = Paragraph::new(lines).block( + Block::default() + .title(" File Info ") + .borders(Borders::ALL) + .border_style(Style::default().fg(ACCENT)), + ); + f.render_widget(p, popup); +} + +// -- Modal overlay (rename / flags) -------------------------------------------- + +fn draw_modal_overlay(f: &mut Frame, app: &App, area: Rect) { + let (title, prompt) = match &app.mode { + AppMode::RenameModal => (" Rename Class ", "New descriptor (e.g. Lcom/example/Foo;):"), + AppMode::FlagsModal => (" Set Access Flags ", "Flags (decimal or 0x hex, e.g. 0x1 = public):"), + _ => return, + }; + + let width = 60u16.min(area.width.saturating_sub(4)); + let height = 7u16.min(area.height.saturating_sub(4)); + let popup = centered_rect(width, height, area); + + f.render_widget(Clear, popup); + + let mut lines = vec![ + Line::from(Span::styled(prompt, Style::default().fg(Color::Rgb(190, 190, 200)))), + Line::default(), + Line::from(vec![ + Span::raw("> "), + Span::styled( + app.modal.buffer.clone(), + Style::default().fg(TITLE).add_modifier(Modifier::BOLD), + ), + Span::styled("█", Style::default().fg(TITLE)), + ]), + ]; + + if let Some(ref err) = app.modal.error { + lines.push(Line::default()); + lines.push(Line::from(Span::styled( + format!("⚠ {err}"), + Style::default().fg(WARN), + ))); + } + + let p = Paragraph::new(lines).block( + Block::default() + .title(Span::styled(title, Style::default().fg(TITLE).add_modifier(Modifier::BOLD))) + .title_bottom(Span::styled(" [Enter] Apply [Esc] Cancel ", Style::default().fg(DIM))) + .borders(Borders::ALL) + .border_style(Style::default().fg(ACCENT)), + ); + f.render_widget(p, popup); +} + +// -- Helpers ------------------------------------------------------------------- + +fn centered_rect(width: u16, height: u16, area: Rect) -> Rect { + let x = area.x + (area.width.saturating_sub(width)) / 2; + let y = area.y + (area.height.saturating_sub(height)) / 2; + Rect { x, y, width, height } +} diff --git a/src/error.rs b/src/error.rs index 0be95b7..03045e7 100644 --- a/src/error.rs +++ b/src/error.rs @@ -57,6 +57,12 @@ pub enum DexError { #[error("{0}")] DexFileError(String), + #[error("Attempted to read {item_ty} at null offset (offset 0 is reserved for the DEX header)")] + NullOffset { item_ty: &'static str }, + + #[error("Unaligned read of {item_ty} at offset {offset}")] + UnalignedRead { offset: u32, item_ty: &'static str }, + #[error("Index({index}) to {item_ty} should be less than {max}")] DexIndexError { index: u32, @@ -193,6 +199,28 @@ pub enum DexError { opcode: &'static str, target: &'static str, }, + + // -- VDEX errors ----------------------------------------------------------- + + #[cfg(feature = "vdex")] + #[error("Invalid VDEX magic bytes")] + BadVdexMagic, + + #[cfg(feature = "vdex")] + #[error("Unsupported VDEX version: {version:?}")] + UnknownVdexVersion { version: [u8; 4] }, + + #[cfg(feature = "vdex")] + #[error("VDEX file is too short to contain a valid header (size={size})")] + TruncatedVdexFile { size: usize }, + + #[cfg(feature = "vdex")] + #[error("VDEX section '{section}' is invalid: {msg}")] + BadVdexSection { section: &'static str, msg: String }, + + #[cfg(feature = "vdex")] + #[error("VDEX DEX file index {index} out of range (num_dex_files={num_dex_files})")] + VdexDexIndexOutOfRange { index: u32, num_dex_files: u32 }, } #[macro_export] @@ -225,33 +253,3 @@ impl Debug for DexError { write!(f, "{}", self) } } - -#[cfg(feature = "python")] -#[pyo3::pymodule(name = "error")] -pub(crate) mod py_error { - use pyo3::exceptions::PyException; - - pyo3::create_exception!(dexrs._internal.error, PyDexError, PyException); - - impl From for pyo3::PyErr { - fn from(err: super::DexError) -> pyo3::PyErr { - PyDexError::new_err(err.to_string()) - } - } - - #[pymodule_export] - use PyDexError as PyDexErrorExport; - - // generic errors not wrapped by dexrs - #[derive(Debug, thiserror::Error)] - pub enum GenericError { - #[error(transparent)] - IOError(#[from] std::io::Error), - } - - impl From for pyo3::PyErr { - fn from(err: GenericError) -> pyo3::PyErr { - pyo3::exceptions::PyIOError::new_err(err.to_string()) - } - } -} diff --git a/src/file/builder.rs b/src/file/builder.rs new file mode 100644 index 0000000..266e1e6 --- /dev/null +++ b/src/file/builder.rs @@ -0,0 +1,1323 @@ +//! Instruction-level builder: assemble DEX bytecode from disassembly text. +//! +//! # Overview +//! +//! [`CodeBuilder`] accumulates instructions via [`CodeBuilder::emit`], which +//! accepts a single disassembly line such as: +//! +//! ```text +//! const-string v0, "hello" +//! invoke-virtual {v0, v1}, Ljava/lang/Object;->toString()Ljava/lang/String; +//! if-eqz v0, :my_label +//! ``` +//! +//! Labels are placed with [`CodeBuilder::label`]. After all instructions are +//! added, call [`CodeBuilder::build`] to run the branch-width fixup loop and +//! produce a [`CodeDef`] ready for inclusion in a [`DexIr`]. +//! +//! # Supported formats +//! +//! | Format | Example | +//! |----------|---------| +//! | `k10x` | `return-void` | +//! | `k11x` | `return v0`, `move-result-object v0` | +//! | `k12x` | `move v0, v1` | +//! | `k11n` | `const/4 v0, #3` | +//! | `k10t` | `goto :label` | +//! | `k20t` | `goto/16 :label` | +//! | `k30t` | `goto/32 :label` | +//! | `k21t` | `if-eqz v0, :label` | +//! | `k22t` | `if-eq v0, v1, :label` | +//! | `k21s` | `const/16 v0, #1000` | +//! | `k21h` | `const/high16 v0, #0x7fff` | +//! | `k31i` | `const v0, #12345` | +//! | `k51l` | `const-wide v0, #1234567890123` | +//! | `k21c` | `const-string v0, "text"`, `new-instance v0, Lfoo;`, `sget-object v0, …` | +//! | `k31c` | `const-string/jumbo v0, "text"` | +//! | `k22c` | `iget-object v0, v1, Lclass;->field:Ltype;` | +//! | `k23x` | `add-int v0, v1, v2` | +//! | `k22b` | `add-int/lit8 v0, v1, #5` | +//! | `k22s` | `add-int/lit16 v0, v1, #100` | +//! | `k35c` | `invoke-virtual {v0, v1}, Lclass;->method(…)…` | +//! | `k3rc` | `invoke-virtual/range {v0 .. v3}, Lclass;->method(…)…` | +//! | `k22x` | `move/from16 v0, v256` | +//! | `k32x` | `move/16 v256, v512` | + +use std::collections::HashMap; + +use crate::{ + error::DexError, + file::{ + instruction::{Code, Format, Instruction}, + ir::{ + parse_type_list, BranchTarget, CodeDef, DexRef, InsnNode, MethodDef, ProtoKey, TryDef, + }, + }, + Result, +}; + +// -- Opcode name -> Code lookup ------------------------------------------------- + +fn opcode_map() -> &'static HashMap<&'static str, Code> { + use std::sync::OnceLock; + static MAP: OnceLock> = OnceLock::new(); + MAP.get_or_init(|| { + let mut m = HashMap::new(); + for byte in 0u16..=0xFF { + let code = Instruction::opcode_of(byte); + let name = Instruction::name_of(code); + m.entry(name).or_insert(code); + } + m + }) +} + +// -- Token types --------------------------------------------------------------- + +#[derive(Debug, Clone)] +enum Token { + Register(u16), + PRegister(u16), + Literal(i64), + StringLit(String), + TypeRef(String), + MethodRef { class: String, name: String, proto: ProtoKey }, + FieldRef { class: String, name: String, field_type: String }, + Label(String), + RegList(Vec), + RegRange(u16, u16), // first..last inclusive +} + +// -- Tokenizer ----------------------------------------------------------------- + +/// Split `s` into whitespace-separated tokens *after* skipping the opcode. +/// Returns `(operand_str, operands)`. +fn tokenize(operands: &str) -> Result> { + let s = operands.trim(); + if s.is_empty() { + return Ok(Vec::new()); + } + + let mut tokens = Vec::new(); + let mut rest = s; + + loop { + rest = rest.trim_start_matches([' ', '\t']); + if rest.is_empty() { + break; + } + // Skip commas + if rest.starts_with(',') { + rest = &rest[1..]; + continue; + } + + let (tok, tail) = next_token(rest)?; + tokens.push(tok); + rest = tail; + } + Ok(tokens) +} + +fn next_token(s: &str) -> Result<(Token, &str)> { + let s = s.trim_start(); + let bytes = s.as_bytes(); + match bytes.first() { + // Register: v0..v65535 + Some(b'v') => { + let end = s[1..] + .find(|c: char| !c.is_ascii_digit()) + .map(|i| i + 1) + .unwrap_or(s.len()); + let num_str = &s[1..end]; + let n: u16 = num_str + .parse() + .map_err(|_| DexError::DexFileError(format!("bad v-register: {s:?}")))?; + Ok((Token::Register(n), &s[end..])) + } + + // p-register: p0..p255 — kept as Token::PRegister; caller resolves to vN + Some(b'p') => { + let end = s[1..] + .find(|c: char| !c.is_ascii_digit()) + .map(|i| i + 1) + .unwrap_or(s.len()); + let num_str = &s[1..end]; + let n: u16 = num_str + .parse() + .map_err(|_| DexError::DexFileError(format!("bad p-register: {s:?}")))?; + Ok((Token::PRegister(n), &s[end..])) + } + + // Register list: {v0, v1, v2} or {v0 .. v3} or {} + Some(b'{') => { + let close = + s.find('}').ok_or_else(|| DexError::DexFileError("unclosed {".into()))?; + let inner = &s[1..close]; + let rest = &s[close + 1..]; + // Range form: {vA .. vB} + if inner.contains("..") { + let parts: Vec<&str> = inner.splitn(2, "..").collect(); + let first = parse_reg(parts[0].trim())?; + let last = parse_reg(parts[1].trim())?; + Ok((Token::RegRange(first, last), rest)) + } else { + // Handle empty list {} + let inner = inner.trim(); + let regs = if inner.is_empty() { + Vec::new() + } else { + inner + .split(',') + .map(|r| parse_reg(r.trim())) + .collect::>>()? + }; + Ok((Token::RegList(regs), rest)) + } + } + + // String literal: "..." with proper escape handling + Some(b'"') => { + let mut chars = s[1..].char_indices(); + let mut end = s.len(); // fallback: consume all + let mut found = false; + while let Some((i, c)) = chars.next() { + if c == '"' { + end = 1 + i + 1; // +1 for leading '"', +1 past closing '"' + found = true; + break; + } + if c == '\\' { + chars.next(); // skip the escaped character + } + } + if !found { + return Err(DexError::DexFileError("unclosed string literal".into())); + } + let inner = &s[1..end - 1]; + // Unescape \n, \t, \\, \" + let unescaped = inner + .replace("\\n", "\n") + .replace("\\t", "\t") + .replace("\\\\", "\\") + .replace("\\\"", "\""); + Ok((Token::StringLit(unescaped), &s[end..])) + } + + // Literal: # or # (e.g. #+42, #-1, #int +65536) + Some(b'#') => { + let rest = s.strip_prefix('#').unwrap().trim_start(); + // Skip optional type keyword (int, long, float, double, short, byte, char) + let rest = skip_type_keyword(rest); + let rest = rest.trim_start(); + let end = rest + .find(|c: char| c == ',' || c == '}' || c.is_whitespace()) + .unwrap_or(rest.len()); + if end == 0 { + return Err(DexError::DexFileError(format!( + "empty literal after '#' in {s:?}" + ))); + } + let v = parse_int(&rest[..end])?; + Ok((Token::Literal(v), &rest[end..])) + } + + // Branch target / label: :name + Some(b':') => { + let rest = s.strip_prefix(':').unwrap(); + let end = rest + .find(|c: char| c == ',' || c.is_whitespace()) + .unwrap_or(rest.len()); + Ok((Token::Label(rest[..end].to_string()), &rest[end..])) + } + + // Type/Method/Field reference: L...; or [[... + Some(b'L') | Some(b'[') => parse_reference(s), + + // Primitive type descriptor (V, I, B, etc.) — treat as type ref + Some(b'V') + | Some(b'B') + | Some(b'C') + | Some(b'D') + | Some(b'F') + | Some(b'I') + | Some(b'J') + | Some(b'S') + | Some(b'Z') => { + let end = s + .find(|c: char| c == ',' || c.is_whitespace()) + .unwrap_or(s.len()); + Ok((Token::TypeRef(s[..end].to_string()), &s[end..])) + } + + // Signed integer literal (no # prefix) — branch offsets (+5, -3) and bare numbers + Some(c) if (*c as char).is_ascii_digit() || *c == b'-' || *c == b'+' => { + let end = s + .find(|c: char| c == ',' || c == '}' || c.is_whitespace()) + .unwrap_or(s.len()); + let v = parse_int(&s[..end])?; + Ok((Token::Literal(v), &s[end..])) + } + + other => Err(DexError::DexFileError(format!( + "unexpected token start: {:?} in {:?}", + other.map(|c| *c as char), + s + ))), + } +} + +/// Parse a register operand (`vN` or `pN`) and return the raw register index. +/// p-registers are returned as-is; callers must resolve them using `registers - ins + pN`. +fn parse_reg(s: &str) -> Result { + let s = s.trim(); + if let Some(rest) = s.strip_prefix('v') { + rest.parse().map_err(|_| DexError::DexFileError(format!("bad v-register {s:?}"))) + } else if let Some(rest) = s.strip_prefix('p') { + rest.parse().map_err(|_| DexError::DexFileError(format!("bad p-register {s:?}"))) + } else { + Err(DexError::DexFileError(format!("expected register, got {s:?}"))) + } +} + +fn parse_int(s: &str) -> Result { + let s = s.trim(); + if s.is_empty() { + return Err(DexError::DexFileError("empty integer".into())); + } + let negative = s.starts_with('-'); + // Strip sign prefix (+ or -) + let s2 = s.trim_start_matches(['-', '+']); + let v: u64 = if let Some(hex) = s2.strip_prefix("0x").or_else(|| s2.strip_prefix("0X")) { + u64::from_str_radix(hex, 16) + .map_err(|_| DexError::DexFileError(format!("bad hex: {s:?}")))? + } else { + s2.parse::().map_err(|_| DexError::DexFileError(format!("bad int: {s:?}")))? + }; + Ok(if negative { -(v as i64) } else { v as i64 }) +} + +/// Skip an optional type-keyword prefix (`int`, `long`, `float`, `double`, +/// `short`, `byte`, `char`) from a literal operand such as `int +65536`. +/// Returns the remainder after the keyword (and any whitespace). +fn skip_type_keyword(s: &str) -> &str { + const KEYWORDS: &[&str] = &["int", "long", "float", "double", "short", "byte", "char"]; + for kw in KEYWORDS { + if let Some(rest) = s.strip_prefix(kw) { + if rest.is_empty() + || rest.starts_with(|c: char| c.is_whitespace() || c == '+' || c == '-') + { + return rest.trim_start(); + } + } + } + s +} + +/// Parse a type/method/field reference starting at `s`. +fn parse_reference(s: &str) -> Result<(Token, &str)> { + // Find the end of the class descriptor (up to but not including '>') + let (class_desc, after_class) = consume_type_desc(s); + let after_class = after_class.trim(); + + if after_class.starts_with("->") { + // Method or field reference + let after_arrow = after_class.strip_prefix("->").unwrap(); + // Find the member name (up to '(' for method, ':' for field) + if let Some(paren) = after_arrow.find('(') { + // Method reference: name(params)return + let name = &after_arrow[..paren]; + let after_name = &after_arrow[paren..]; + // Find matching ')' + let close = after_name + .find(')') + .ok_or_else(|| DexError::DexFileError("unclosed method descriptor".into()))?; + let params_str = &after_name[1..close]; + let return_str = &after_name[close + 1..]; + // Return type can be any type descriptor; consume one + let (return_desc, rest_after_ref) = consume_type_desc(return_str); + let params = parse_type_list(params_str); + let end = s.len() - rest_after_ref.len(); + Ok(( + Token::MethodRef { + class: class_desc.to_string(), + name: name.to_string(), + proto: ProtoKey::new(return_desc, params), + }, + &s[end..], + )) + } else if let Some(colon) = after_arrow.find(':') { + // Field reference: name:type + let name = &after_arrow[..colon]; + let type_str = &after_arrow[colon + 1..]; + let (field_type, rest_after_ref) = consume_type_desc(type_str); + let end = s.len() - rest_after_ref.len(); + Ok(( + Token::FieldRef { + class: class_desc.to_string(), + name: name.to_string(), + field_type: field_type.to_string(), + }, + &s[end..], + )) + } else { + Err(DexError::DexFileError(format!( + "expected '(' or ':' after '->' in {:?}", + &after_class[..20.min(after_class.len())] + ))) + } + } else { + // Plain type reference (no '->'). + let end = s.len() - after_class.len(); + Ok((Token::TypeRef(class_desc.to_string()), &s[end..])) + } +} + +/// Consume one DEX type descriptor from the start of `s`. +/// Returns `(descriptor, remainder)` where `remainder` is the rest after the descriptor +/// and any optional trailing whitespace. +fn consume_type_desc(s: &str) -> (&str, &str) { + let bytes = s.as_bytes(); + match bytes.first() { + Some(b'[') => { + let mut i = 0; + while i < bytes.len() && bytes[i] == b'[' { + i += 1; + } + if i < bytes.len() && bytes[i] == b'L' { + let end = s[i..].find(';').map(|p| i + p + 1).unwrap_or(s.len()); + (&s[..end], &s[end..]) + } else if i < bytes.len() { + (&s[..i + 1], &s[i + 1..]) + } else { + (s, "") + } + } + Some(b'L') => { + let end = s.find(';').map(|p| p + 1).unwrap_or(s.len()); + (&s[..end], &s[end..]) + } + Some(_) => { + // Primitive + let end = s + .find(|c: char| c == ',' || c.is_whitespace() || c == '}') + .unwrap_or(s.len()); + (&s[..end.max(1)], &s[end.max(1)..]) + } + None => ("", ""), + } +} + +// -- Instruction encoder ------------------------------------------------------- + +/// Encode a single [`InsnNode`] (with already-resolved reference index and +/// branch offset) to one or more 16-bit code units. +pub(crate) fn encode_insn( + opcode: Code, + regs: &[u16], + literal: i64, + ref_idx: Option, + branch_offset: Option, +) -> Result> { + use Format::*; + let op = opcode as u8 as u16; + let fmt = Instruction::format_of(opcode); + let idx = ref_idx.unwrap_or(0); + let offset = branch_offset.unwrap_or(0); + let r0 = regs.first().copied().unwrap_or(0); + let r1 = regs.get(1).copied().unwrap_or(0); + let r2 = regs.get(2).copied().unwrap_or(0); + + let words: Vec = match fmt { + // 1-word formats + k10x => vec![op], + k12x => vec![op | ((r0 & 0xF) << 8) | ((r1 & 0xF) << 12)], + k11n => vec![op | ((r0 & 0xF) << 8) | (((literal as u8) & 0xF) as u16) << 12], + k11x => vec![op | ((r0 & 0xFF) << 8)], + k10t => vec![op | (((offset as i8) as u8 as u16) << 8)], + // 2-word formats + k20t => vec![op, offset as u16], + k22x => vec![op | ((r0 & 0xFF) << 8), r1], + k21t => vec![op | ((r0 & 0xFF) << 8), offset as u16], + k21s => vec![op | ((r0 & 0xFF) << 8), literal as i16 as u16], + k21h => { + // CONST_HIGH16 stores bits [31:16]; CONST_WIDE_HIGH16 stores bits [63:48]. + let encoded = if opcode == Code::CONST_WIDE_HIGH16 { + (literal >> 48) as u16 + } else { + (literal >> 16) as u16 + }; + vec![op | ((r0 & 0xFF) << 8), encoded] + } + k21c => vec![op | ((r0 & 0xFF) << 8), idx as u16], + k23x => vec![op | ((r0 & 0xFF) << 8), ((r2 & 0xFF) << 8) | (r1 & 0xFF)], + k22b => vec![ + op | ((r0 & 0xFF) << 8), + (((literal as i8) as u8 as u16) << 8) | (r1 & 0xFF), + ], + k22t => vec![ + op | ((r0 & 0xF) << 8) | ((r1 & 0xF) << 12), + offset as u16, + ], + k22s => vec![ + op | ((r0 & 0xF) << 8) | ((r1 & 0xF) << 12), + literal as i16 as u16, + ], + k22c => vec![ + op | ((r0 & 0xF) << 8) | ((r1 & 0xF) << 12), + idx as u16, + ], + // 3-word formats + k32x => vec![op, r0, r1], + k30t => { + let o = offset as u32; + vec![op, o as u16, (o >> 16) as u16] + } + k31t | k31i => { + let v = if matches!(fmt, k31t) { offset as u32 } else { literal as u32 }; + vec![op | ((r0 & 0xFF) << 8), v as u16, (v >> 16) as u16] + } + k31c => { + vec![op | ((r0 & 0xFF) << 8), idx as u16, (idx >> 16) as u16] + } + k35c | k45cc => { + // A|G|op BBBB F|E|D|C [HHHH for k45cc] + let count = regs.len() as u16; + let g = regs.get(4).copied().unwrap_or(0) & 0xF; + let word0 = op | (g << 8) | (count << 12); + let word2 = (regs.first().copied().unwrap_or(0) & 0xF) + | ((regs.get(1).copied().unwrap_or(0) & 0xF) << 4) + | ((regs.get(2).copied().unwrap_or(0) & 0xF) << 8) + | ((regs.get(3).copied().unwrap_or(0) & 0xF) << 12); + if matches!(fmt, k45cc) { + vec![word0, idx as u16, word2, 0] // second idx = 0 for now + } else { + vec![word0, idx as u16, word2] + } + } + k3rc | k4rcc => { + // AA|op BBBB CCCC [HHHH for k4rcc] + let count = regs.len() as u16; + let first = r0; + if matches!(fmt, k4rcc) { + vec![op | (count << 8), idx as u16, first, 0] + } else { + vec![op | (count << 8), idx as u16, first] + } + } + k51l => { + let v = literal as u64; + vec![ + op | ((r0 & 0xFF) << 8), + v as u16, + (v >> 16) as u16, + (v >> 32) as u16, + (v >> 48) as u16, + ] + } + _ => { + return Err(DexError::DexFileError(format!( + "unsupported format {fmt:?} for opcode {:?}", + opcode + ))) + } + }; + Ok(words) +} + +// -- Instruction node with pending branch / ref -------------------------------- + +/// An instruction before branch-offset resolution. +#[derive(Clone, Debug)] +struct PendingInsn { + node: InsnNode, + /// Width hint for branch instructions (in code units). + branch_width: u8, +} + +impl PendingInsn { + fn insn_size(&self) -> usize { + let fmt = Instruction::format_of(self.node.opcode); + use Format::*; + match fmt { + k10x | k12x | k11n | k11x | k10t => 1, + k20t | k22x | k21t | k21s | k21h | k21c | k23x | k22b | k22t | k22s | k22c => 2, + k32x | k30t | k31t | k31i | k31c | k35c | k3rc => 3, + k45cc | k4rcc => 4, + k51l => 5, + _ => 1, + } + } +} + +// -- CodeBuilder --------------------------------------------------------------- + +/// Assembler for a single DEX method body. +/// +/// Accumulate instructions with [`emit`](Self::emit) and labels with +/// [`label`](Self::label), then call [`build`](Self::build) to produce a +/// [`CodeDef`]. +pub struct CodeBuilder { + registers: u16, + ins: u16, + outs: u16, + insns: Vec, + labels: HashMap, // label -> insn index + tries: Vec, +} + +impl CodeBuilder { + /// Create a builder for a method with `registers` total registers, + /// `ins` incoming parameter registers, and `outs` outgoing parameter slots. + pub fn new(registers: u16, ins: u16, outs: u16) -> Self { + Self { registers, ins, outs, insns: Vec::new(), labels: HashMap::new(), tries: Vec::new() } + } + + /// Place a named label at the current instruction position. + pub fn label(&mut self, name: &str) { + self.labels.insert(name.to_string(), self.insns.len()); + } + + /// Add a try block. + pub fn add_try(&mut self, t: TryDef) { + self.tries.push(t); + } + + /// Parse and add one instruction from disassembly text. + /// + /// The line format is `opcode [operand, ...]`, for example: + /// ```text + /// const-string v0, "hello" + /// invoke-virtual {v0}, Ljava/lang/Object;->toString()Ljava/lang/String; + /// if-eqz v0, :loop + /// ``` + pub fn emit(&mut self, line: &str) -> Result<()> { + let node = parse_line(line)?; + let branch_width = branch_width_for(&node); + self.insns.push(PendingInsn { node, branch_width }); + Ok(()) + } + + /// Add a pre-built [`InsnNode`] directly (bypass the text parser). + pub fn add_insn(&mut self, node: InsnNode) { + let branch_width = branch_width_for(&node); + self.insns.push(PendingInsn { node, branch_width }); + } + + /// Add raw pre-encoded bytecode words. + pub fn add_raw(&mut self, words: &[u16]) { + // Wrap in a NOP node carrying the raw words as a payload via literal. + // We'll detect this in `build()` by checking a sentinel opcode. + // Actually, easier to just expand to NOP instructions and embed directly. + // For now, push each word as a special raw node. + for (i, &w) in words.iter().enumerate() { + let opcode = Instruction::opcode_of(w); + let mut node = InsnNode::new(opcode); + node.literal = i as i64; // index into original raw slice + self.insns.push(PendingInsn { node, branch_width: 0 }); + } + } + + /// Resolve labels and widen branches as needed, then return symbolic + /// [`InsnNode`]s with all [`BranchTarget::Label`] targets resolved to + /// [`BranchTarget::Offset`]. + /// + /// Pool references (strings, types, fields, methods) remain symbolic and + /// are resolved by [`crate::file::writer::DexWriter`] at serialisation + /// time. Returns `Err` if a label is referenced but never defined. + pub fn build(mut self) -> Result { + // Iterative branch-width fixup: widen branches until stable. + for _ in 0..5 { + let widened = self.fixup_branches()?; + if !widened { + break; + } + } + + // Resolve all label targets to concrete PC-relative offsets. + let pcs = compute_pcs(&self.insns); + let mut resolved: Vec = Vec::with_capacity(self.insns.len()); + + for (i, pending) in self.insns.into_iter().enumerate() { + let mut node = pending.node; + if let Some(BranchTarget::Label(ref lbl)) = node.target.clone() { + let target_pc = self + .labels + .get(lbl.as_str()) + .map(|&idx| pcs[idx] as i32) + .ok_or_else(|| { + DexError::DexFileError(format!("undefined label: {lbl:?}")) + })?; + node.target = Some(BranchTarget::Offset(target_pc - pcs[i] as i32)); + } + resolved.push(node); + } + + Ok(CodeDef { + registers: self.registers, + ins: self.ins, + outs: self.outs, + insns: resolved, + tries: self.tries, + }) + } + + /// One pass of branch-width fixup. Returns `true` if any branch was widened. + fn fixup_branches(&mut self) -> Result { + let pcs = compute_pcs(&self.insns); + let mut widened = false; + + for (i, pending) in self.insns.iter_mut().enumerate() { + let node = &pending.node; + if let Some(BranchTarget::Label(lbl)) = &node.target { + let target_idx = self.labels.get(lbl.as_str()).copied().ok_or_else(|| { + DexError::DexFileError(format!("undefined label: {lbl:?}")) + })?; + let offset = pcs[target_idx] as i32 - pcs[i] as i32; + let needed = if offset >= i8::MIN as i32 && offset <= i8::MAX as i32 { + 1 + } else if offset >= i16::MIN as i32 && offset <= i16::MAX as i32 { + 2 + } else { + 3 + }; + if needed > pending.branch_width { + pending.branch_width = needed; + // Widen the opcode + pending.node.opcode = widen_branch(pending.node.opcode); + widened = true; + } + } + } + Ok(widened) + } +} + +/// Compute the PC (in code units) of each instruction. +fn compute_pcs(insns: &[PendingInsn]) -> Vec { + let mut pcs = Vec::with_capacity(insns.len() + 1); + let mut pc = 0u32; + for insn in insns { + pcs.push(pc); + pc += insn.insn_size() as u32; + } + pcs.push(pc); // sentinel + pcs +} + +fn branch_width_for(node: &InsnNode) -> u8 { + use Format::*; + match Instruction::format_of(node.opcode) { + k10t => 1, + k20t => 2, + k30t => 3, + k21t | k22t => 2, + k31t => 3, + _ => 0, + } +} + +/// Widen a branch opcode to handle larger offsets. +fn widen_branch(op: Code) -> Code { + match op { + Code::GOTO => Code::GOTO_16, + Code::GOTO_16 => Code::GOTO_32, + _ => op, // if-* branches max at 16-bit; can't widen further + } +} + +// -- Disassembly text parser --------------------------------------------------- + +/// Parse a single disassembly line into an [`InsnNode`]. +fn parse_line(line: &str) -> Result { + let line = line.trim(); + // Split opcode from operands (first whitespace). + let (mnemonic, operands_str) = if let Some(sp) = line.find(|c: char| c.is_whitespace()) { + (&line[..sp], line[sp..].trim()) + } else { + (line, "") + }; + + // Strip inline comments (//) + let operands_str = if let Some(pos) = operands_str.find("//") { + &operands_str[..pos] + } else { + operands_str + }; + let operands_str = operands_str.trim(); + + let opcode = opcode_map() + .get(mnemonic) + .copied() + .ok_or_else(|| DexError::DexFileError(format!("unknown opcode: {mnemonic:?}")))?; + + let fmt = Instruction::format_of(opcode); + let index_type = Instruction::index_type_of(opcode); + + // Tokenize operands. + let tokens = tokenize(operands_str)?; + + use Format::*; + + let mut node = InsnNode::new(opcode); + + match fmt { + // -- No operands ------------------------------------------------------ + k10x => {} + + // -- Single register -------------------------------------------------- + k11x => { + node.regs = vec![require_reg(&tokens, 0)?]; + } + + // -- Two 4-bit registers (k12x) --------------------------------------- + k12x => { + node.regs = vec![require_reg(&tokens, 0)?, require_reg(&tokens, 1)?]; + } + + // -- 4-bit reg + 4-bit literal (k11n) --------------------------------- + k11n => { + node.regs = vec![require_reg(&tokens, 0)?]; + node.literal = require_literal(&tokens, 1)?; + } + + // -- 8-bit branch (k10t) ----------------------------------------------- + k10t => { + node.target = Some(require_target(&tokens, 0)?); + } + + // -- 16-bit branch (k20t) ---------------------------------------------- + k20t => { + node.target = Some(require_target(&tokens, 0)?); + } + + // -- 32-bit branch (k30t) ---------------------------------------------- + k30t => { + node.target = Some(require_target(&tokens, 0)?); + } + + // -- reg + 16-bit branch (k21t) ---------------------------------------- + k21t => { + node.regs = vec![require_reg(&tokens, 0)?]; + node.target = Some(require_target(&tokens, 1)?); + } + + // -- two 4-bit regs + 16-bit branch (k22t) ---------------------------- + k22t => { + node.regs = vec![require_reg(&tokens, 0)?, require_reg(&tokens, 1)?]; + node.target = Some(require_target(&tokens, 2)?); + } + + // -- reg + 16-bit literal (k21s, k22s) -------------------------------- + k21s => { + node.regs = vec![require_reg(&tokens, 0)?]; + node.literal = require_literal(&tokens, 1)?; + } + + // -- reg + high-16 literal (k21h) -------------------------------------- + // The assembler text carries the full shifted value (e.g. #+65536 for + // const/high16 v0, 0x10000); encode_insn then extracts the high 16 bits. + k21h => { + node.regs = vec![require_reg(&tokens, 0)?]; + node.literal = require_literal(&tokens, 1)?; + } + + // -- reg + 32-bit literal / branch (k31i, k31t) ----------------------- + k31i => { + node.regs = vec![require_reg(&tokens, 0)?]; + node.literal = require_literal(&tokens, 1)?; + } + k31t => { + node.regs = vec![require_reg(&tokens, 0)?]; + node.target = Some(require_target(&tokens, 1)?); + } + + // -- reg + 64-bit literal (k51l) --------------------------------------- + k51l => { + node.regs = vec![require_reg(&tokens, 0)?]; + node.literal = require_literal(&tokens, 1)?; + } + + // -- reg + 16-bit index (k21c, k31c) ---------------------------------- + k21c | k31c => { + node.regs = vec![require_reg(&tokens, 0)?]; + node.reference = Some(make_ref(index_type, &tokens, 1)?); + } + + // -- 3 × 8-bit reg (k23x) --------------------------------------------- + k23x => { + node.regs = vec![ + require_reg(&tokens, 0)?, + require_reg(&tokens, 1)?, + require_reg(&tokens, 2)?, + ]; + } + + // -- 8-bit reg pair + 8-bit literal (k22b) ---------------------------- + k22b => { + node.regs = vec![require_reg(&tokens, 0)?, require_reg(&tokens, 1)?]; + node.literal = require_literal(&tokens, 2)?; + } + + // -- two 4-bit regs + 16-bit literal (k22s) --------------------------- + k22s => { + node.regs = vec![require_reg(&tokens, 0)?, require_reg(&tokens, 1)?]; + node.literal = require_literal(&tokens, 2)?; + } + + // -- two 4-bit regs + 16-bit index (k22c) ----------------------------- + k22c => { + node.regs = vec![require_reg(&tokens, 0)?, require_reg(&tokens, 1)?]; + node.reference = Some(make_ref(index_type, &tokens, 2)?); + } + + // -- 8-bit reg + 16-bit reg (k22x) ------------------------------------ + k22x => { + node.regs = vec![require_reg(&tokens, 0)?, require_reg(&tokens, 1)?]; + } + + // -- two 16-bit regs (k32x) -------------------------------------------- + k32x => { + node.regs = vec![require_reg(&tokens, 0)?, require_reg(&tokens, 1)?]; + } + + // -- invoke with register list + index (k35c, k45cc) ------------------ + k35c | k45cc => { + node.regs = require_reg_list(&tokens, 0)?; + node.reference = Some(make_ref(index_type, &tokens, 1)?); + } + + // -- invoke range + index (k3rc, k4rcc) ------------------------------- + k3rc | k4rcc => { + node.regs = require_reg_range(&tokens, 0)?; + node.reference = Some(make_ref(index_type, &tokens, 1)?); + } + + _ => { + return Err(DexError::DexFileError(format!( + "unsupported format {fmt:?} for opcode {mnemonic:?}" + ))) + } + } + Ok(node) +} + +// -- Token extractors ---------------------------------------------------------- + +fn require_reg(tokens: &[Token], idx: usize) -> Result { + match tokens.get(idx) { + Some(Token::Register(r)) => Ok(*r), + Some(Token::PRegister(r)) => Ok(*r), // caller resolves p-regs if needed + other => Err(DexError::DexFileError(format!( + "expected register at token {idx}, got {other:?}" + ))), + } +} + +fn require_literal(tokens: &[Token], idx: usize) -> Result { + match tokens.get(idx) { + Some(Token::Literal(v)) => Ok(*v), + other => Err(DexError::DexFileError(format!( + "expected literal at token {idx}, got {other:?}" + ))), + } +} + +fn require_target(tokens: &[Token], idx: usize) -> Result { + match tokens.get(idx) { + Some(Token::Label(s)) => Ok(BranchTarget::Label(s.clone())), + Some(Token::Literal(v)) => Ok(BranchTarget::Offset(*v as i32)), + other => Err(DexError::DexFileError(format!( + "expected branch target at token {idx}, got {other:?}" + ))), + } +} + +fn require_reg_list(tokens: &[Token], idx: usize) -> Result> { + match tokens.get(idx) { + Some(Token::RegList(r)) => Ok(r.clone()), + Some(Token::RegRange(first, last)) => Ok((*first..=*last).collect()), + // Single register not in braces — still valid + Some(Token::Register(r)) | Some(Token::PRegister(r)) => Ok(vec![*r]), + other => Err(DexError::DexFileError(format!( + "expected register list at token {idx}, got {other:?}" + ))), + } +} + +fn require_reg_range(tokens: &[Token], idx: usize) -> Result> { + match tokens.get(idx) { + Some(Token::RegRange(first, last)) => Ok((*first..=*last).collect()), + Some(Token::RegList(r)) => Ok(r.clone()), + other => Err(DexError::DexFileError(format!( + "expected register range at token {idx}, got {other:?}" + ))), + } +} + +fn make_ref( + index_type: &crate::file::instruction::IndexType, + tokens: &[Token], + idx: usize, +) -> Result { + use crate::file::instruction::IndexType::*; + let tok = tokens.get(idx).ok_or_else(|| { + DexError::DexFileError(format!("missing reference operand at token {idx}")) + })?; + match (index_type, tok) { + (StringRef, Token::StringLit(s)) => Ok(DexRef::String(s.clone())), + (StringRef, Token::TypeRef(s)) => Ok(DexRef::String(s.clone())), + (TypeRef, Token::TypeRef(s)) => Ok(DexRef::Type(s.clone())), + (MethodRef, Token::MethodRef { class, name, proto }) => Ok(DexRef::Method { + class: class.clone(), + name: name.clone(), + proto: proto.clone(), + }), + (FieldRef, Token::FieldRef { class, name, field_type }) => Ok(DexRef::Field { + class: class.clone(), + name: name.clone(), + field_type: field_type.clone(), + }), + // Best-effort fallbacks + (_, Token::StringLit(s)) => Ok(DexRef::String(s.clone())), + (_, Token::TypeRef(s)) => Ok(DexRef::Type(s.clone())), + (_, Token::MethodRef { class, name, proto }) => Ok(DexRef::Method { + class: class.clone(), + name: name.clone(), + proto: proto.clone(), + }), + (_, Token::FieldRef { class, name, field_type }) => Ok(DexRef::Field { + class: class.clone(), + name: name.clone(), + field_type: field_type.clone(), + }), + _ => Err(DexError::DexFileError(format!( + "unexpected token {tok:?} for index type {index_type:?} at position {idx}" + ))), + } +} + +// -- DexIrBuilder — high-level builder facade ---------------------------------- + +/// High-level builder that progressively constructs a [`DexIr`] and emits classes, +/// methods, and fields through a fluent API. +/// +/// ```rust +/// use dexrs::file::builder::DexIrBuilder; +/// use dexrs::file::modifiers::{ACC_PUBLIC, ACC_STATIC}; +/// +/// let mut b = DexIrBuilder::new(35); +/// let mut cb = b.begin_class("Lhello/World;", ACC_PUBLIC, Some("Ljava/lang/Object;"), None); +/// let (method, mut code) = cb.begin_method("main", "([Ljava/lang/String;)V", ACC_PUBLIC | ACC_STATIC, 3, 1, 2); +/// code.emit("return-void").unwrap(); +/// cb.finish_method((method, code)).unwrap(); +/// b.finish_class(cb); +/// let ir = b.finish(); +/// ``` +pub struct DexIrBuilder { + ir: crate::file::ir::DexIr, +} + +/// In-progress class builder returned by [`DexIrBuilder::begin_class`]. +pub struct ClassBuilder { + class: crate::file::ir::ClassDef, +} + +impl DexIrBuilder { + pub fn new(version: u32) -> Self { + Self { ir: crate::file::ir::DexIr::new(version) } + } + + /// Begin defining a class. + pub fn begin_class( + &mut self, + descriptor: &str, + access_flags: u32, + superclass: Option<&str>, + source_file: Option<&str>, + ) -> ClassBuilder { + let mut c = crate::file::ir::ClassDef::new(descriptor).access(access_flags); + if let Some(s) = superclass { + c = c.superclass(s); + } + if let Some(sf) = source_file { + c = c.source_file(sf); + } + ClassBuilder { class: c } + } + + /// Add a completed class to the IR. + pub fn finish_class(&mut self, builder: ClassBuilder) { + self.ir.add_class(builder.class); + } + + /// Consume the builder and return the completed IR. + pub fn finish(self) -> crate::file::ir::DexIr { + self.ir + } +} + +impl ClassBuilder { + /// Begin defining a method. Returns a [`CodeBuilder`] pre-configured with + /// the register / in / out counts. + pub fn begin_method( + &self, + name: &str, + descriptor: &str, + access_flags: u32, + registers: u16, + ins: u16, + outs: u16, + ) -> (MethodDef, CodeBuilder) { + let proto = ProtoKey::from_descriptor(descriptor) + .unwrap_or_else(|| ProtoKey::new("V", [] as [&str; 0])); + let method = MethodDef::new(name, proto).access(access_flags); + let code = CodeBuilder::new(registers, ins, outs); + (method, code) + } + + /// Add a static field. + pub fn add_static_field(&mut self, name: &str, field_type: &str, access_flags: u32) { + self.class.add_static_field( + crate::file::ir::FieldDef::new(name, field_type).access(access_flags), + ); + } + + /// Add an instance field. + pub fn add_instance_field(&mut self, name: &str, field_type: &str, access_flags: u32) { + self.class.add_instance_field( + crate::file::ir::FieldDef::new(name, field_type).access(access_flags), + ); + } + + /// Finalise a method (build its code) and add it to this class. + pub fn finish_method(&mut self, method_and_code: (MethodDef, CodeBuilder)) -> Result<()> { + let (mut method, code_builder) = method_and_code; + method.code = Some(code_builder.build()?); + // Determine whether this is direct or virtual based on name + access flags. + if method.name.starts_with('<') || method.access_flags & 0x0008 != 0 + || method.access_flags & 0x0002 != 0 || method.access_flags & 0x0200 != 0 + { + self.class.add_direct_method(method); + } else { + self.class.add_virtual_method(method); + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::file::instruction::Code; + + #[test] + fn parse_nop() { + let node = parse_line("nop").unwrap(); + assert_eq!(node.opcode, Code::NOP); + } + + #[test] + fn parse_return_void() { + let node = parse_line("return-void").unwrap(); + assert_eq!(node.opcode, Code::RETURN_VOID); + } + + #[test] + fn parse_const_string() { + let node = parse_line(r#"const-string v0, "hello""#).unwrap(); + assert_eq!(node.opcode, Code::CONST_STRING); + assert_eq!(node.regs, vec![0]); + assert!(matches!(&node.reference, Some(DexRef::String(s)) if s == "hello")); + } + + #[test] + fn parse_invoke_virtual() { + let node = + parse_line("invoke-virtual {v0, v1}, Ljava/lang/Object;->toString()Ljava/lang/String;") + .unwrap(); + assert_eq!(node.opcode, Code::INVOKE_VIRTUAL); + assert_eq!(node.regs, vec![0, 1]); + assert!(matches!( + &node.reference, + Some(DexRef::Method { name, .. }) if name == "toString" + )); + } + + #[test] + fn parse_iget_object() { + let node = + parse_line("iget-object v0, v1, Lcom/example/Foo;->bar:Ljava/lang/String;").unwrap(); + assert!(matches!( + &node.reference, + Some(DexRef::Field { name, .. }) if name == "bar" + )); + } + + #[test] + fn parse_if_eqz() { + let node = parse_line("if-eqz v0, :my_label").unwrap(); + assert!(matches!(&node.target, Some(BranchTarget::Label(l)) if l == "my_label")); + } + + #[test] + fn parse_const_literal() { + let node = parse_line("const v0, #42").unwrap(); + assert_eq!(node.literal, 42); + } + + #[test] + fn code_builder_return_void() { + let mut cb = CodeBuilder::new(1, 0, 0); + cb.emit("return-void").unwrap(); + let code = cb.build().unwrap(); + assert_eq!(code.insns.len(), 1); + assert_eq!(code.insns[0].opcode, Code::RETURN_VOID); + } + + #[test] + fn code_builder_goto_forward() { + let mut cb = CodeBuilder::new(1, 0, 0); + cb.emit("nop").unwrap(); + cb.label("end"); + cb.emit("return-void").unwrap(); + // Backward goto test: + let mut cb2 = CodeBuilder::new(1, 0, 0); + cb2.label("loop"); + cb2.emit("nop").unwrap(); + cb2.emit("goto :loop").unwrap(); + let code = cb2.build().unwrap(); + // goto is at PC=1, target=0, offset=-1 + let goto_node = &code.insns[1]; + assert_eq!(goto_node.opcode, Code::GOTO); + assert!(matches!(&goto_node.target, Some(BranchTarget::Offset(-1)))); + } + + #[test] + fn encode_insn_const_4() { + let words = encode_insn(Code::CONST_4, &[0], 5, None, None).unwrap(); + assert_eq!(words.len(), 1); + // const/4: opcode=0x12, A=0, B=5 -> 0x5012 + assert_eq!(words[0], 0x5012); + } + + // -- Tokenizer hardening tests --------------------------------------------- + + #[test] + fn parse_positive_signed_literal() { + // #+42 and +42 must both parse as 42 + let n1 = parse_line("const/16 v0, #+42").unwrap(); + assert_eq!(n1.literal, 42); + let n2 = parse_line("const v0, #+65536").unwrap(); + assert_eq!(n2.literal, 65536); + } + + #[test] + fn parse_negative_signed_literal() { + let n = parse_line("const/16 v0, #-100").unwrap(); + assert_eq!(n.literal, -100); + } + + #[test] + fn parse_hex_literal() { + let n = parse_line("const v0, #0xff").unwrap(); + assert_eq!(n.literal, 255); + let n2 = parse_line("const v0, #0x10000").unwrap(); + assert_eq!(n2.literal, 0x10000); + } + + #[test] + fn parse_typed_literal_int() { + // #int +65536 — emitted by dump.rs imm_typed_u32 + let n = parse_line("const/high16 v0, #int +65536").unwrap(); + assert_eq!(n.literal, 65536); + } + + #[test] + fn parse_typed_literal_long() { + // #long +1234567890 — emitted by dump.rs imm_typed_u64 + let n = parse_line("const-wide v0, #long +1234567890").unwrap(); + assert_eq!(n.literal, 1234567890); + } + + #[test] + fn parse_goto_positive_offset() { + // dump.rs emits branch offsets as "+5" or "-3" (no # prefix) + let n = parse_line("goto +5").unwrap(); + assert!(matches!(n.target, Some(BranchTarget::Offset(5)))); + } + + #[test] + fn parse_goto_negative_offset() { + let n = parse_line("goto -3").unwrap(); + assert!(matches!(n.target, Some(BranchTarget::Offset(-3)))); + } + + #[test] + fn parse_if_eqz_offset() { + let n = parse_line("if-eqz v0, +12").unwrap(); + assert!(matches!(n.target, Some(BranchTarget::Offset(12)))); + } + + #[test] + fn parse_invoke_static_empty_args() { + // invoke-static {} must work (zero-arg static call) + let n = parse_line("invoke-static {}, Ljava/lang/Object;->clinit()V").unwrap(); + assert_eq!(n.regs, vec![] as Vec); + assert!(matches!(&n.reference, Some(DexRef::Method { name, .. }) if name == "clinit")); + } + + #[test] + fn parse_string_with_escaped_quote() { + // The bug: \" inside a string was not properly skipped, terminating early + let n = parse_line(r#"const-string v0, "say \"hello\"""#).unwrap(); + assert!( + matches!(&n.reference, Some(DexRef::String(s)) if s == r#"say "hello""#), + "got: {:?}", + n.reference + ); + } + + #[test] + fn parse_string_with_escape_sequences() { + let n = parse_line(r#"const-string v0, "line1\nline2\ttab""#).unwrap(); + assert!(matches!(&n.reference, Some(DexRef::String(s)) if s == "line1\nline2\ttab")); + } + + #[test] + fn parse_p_register() { + // p0 should be accepted (treated as register index 0) + let n = parse_line("return-object p0").unwrap(); + assert_eq!(n.regs, vec![0]); + } + + #[test] + fn parse_const_high16_full_value() { + // The assembler stores the full value; encoder extracts high 16 bits + let n = parse_line("const/high16 v0, #+65536").unwrap(); + assert_eq!(n.literal, 65536); + // Encoding: (65536 >> 16) as u16 = 1 + let words = encode_insn(n.opcode, &n.regs, n.literal, None, None).unwrap(); + assert_eq!(words[1], 1); // high word = 1 + } + + #[test] + fn parse_iget_with_comment() { + // Comments after // must be stripped before parsing + let n = parse_line( + "iget-object v0, v1, Lcom/example/Foo;->mField:Ljava/lang/String; // field@5", + ) + .unwrap(); + assert!(matches!(&n.reference, Some(DexRef::Field { name, .. }) if name == "mField")); + } + + #[test] + fn code_builder_branch_offset_roundtrip() { + // Build a method where goto uses an offset (not a label) + let mut cb = CodeBuilder::new(1, 0, 0); + cb.emit("nop").unwrap(); + cb.emit("goto -1").unwrap(); // jump back to nop at PC=0 (self.offset = -1) + let code = cb.build().unwrap(); + assert!(matches!(code.insns[1].target, Some(BranchTarget::Offset(-1)))); + } +} diff --git a/src/file/class_accessor.rs b/src/file/class_accessor.rs index 431f90a..70c9e00 100644 --- a/src/file/class_accessor.rs +++ b/src/file/class_accessor.rs @@ -540,10 +540,4 @@ impl Iterator for DataIterator<'_, T> { } } -// >>> begin python module export -#[cfg(feature = "python")] -#[pyo3::pymodule] -pub mod py_class_accessor { - #[pymodule_export] - use super::{PyClassAccessor, PyDexField, PyDexMethod}; -} + diff --git a/src/file/code_item_accessors.rs b/src/file/code_item_accessors.rs index 16eed7a..774018e 100644 --- a/src/file/code_item_accessors.rs +++ b/src/file/code_item_accessors.rs @@ -1,3 +1,7 @@ +use crate::{leb128, Result}; + +use super::{CatchHandlerData, CodeItem, DexContainer, DexFile, Instruction, TryItem, TypeIndex}; + #[cfg(feature = "python")] use pyo3::PyResult; #[cfg(feature = "python")] @@ -6,10 +10,6 @@ use std::sync::Arc; #[cfg(feature = "python")] use crate::py::rs_type_wrapper; -use crate::{leb128, Result}; - -use super::{CatchHandlerData, CodeItem, DexContainer, DexFile, Instruction, TryItem, TypeIndex}; - #[cfg(feature = "python")] use super::{PyDexCodeItem, PyDexInstruction}; @@ -19,7 +19,14 @@ use super::{PyDexCodeItem, PyDexInstruction}; #[derive(Debug, Clone)] pub struct CodeItemAccessor<'a> { code_off: u32, - code_item: &'a CodeItem, + /// Decoded CodeItem fields stored by value so both standard and compact + /// DEX code items normalise to the same structure. + registers_size: u16, + ins_size: u16, + outs_size: u16, + tries_size: u16, + /// `debug_info_off` from the standard DEX code item (0 for compact DEX). + pub debug_info_off: u32, insns: &'a [u16], // these values are cached to reduce the number of calculations tries_off: Option, @@ -27,41 +34,105 @@ pub struct CodeItemAccessor<'a> { } impl<'a> CodeItemAccessor<'a> { + /// Builds an accessor from a standard DEX [`CodeItem`]. #[inline(always)] pub fn from_code_item( dex: &DexFile<'a, C>, code_item: &'a CodeItem, code_off: u32, ) -> Result> + where + C: DexContainer<'a>, + { + Self::from_fields( + dex, + code_item.registers_size, + code_item.ins_size, + code_item.outs_size, + code_item.tries_size, + code_item.debug_info_off, + code_item.insns_size, + code_off, + ) + } + + /// Builds an accessor from already-decoded field values. + /// + /// `code_off` is the absolute byte offset of the first instruction word. + #[allow(clippy::too_many_arguments)] + pub fn from_fields( + dex: &DexFile<'a, C>, + registers_size: u16, + ins_size: u16, + outs_size: u16, + tries_size: u16, + debug_info_off: u32, + insns_size: u32, + code_off: u32, + ) -> Result> where C: DexContainer<'a>, { let insns = match code_off { 0 => &[], - _ => dex.get_insns_raw(code_off, code_item.insns_size)?, + _ => dex.get_insns_raw(code_off, insns_size)?, }; // end of insns must be 4-byte aligned - let tries_off = insns.len() * 2 + if insns.len() % 2 == 1 { 2 } else { 0 }; - let tries_size = code_item.tries_size as usize * std::mem::size_of::(); + let tries_off_rel = insns.len() * 2 + if insns.len() % 2 == 1 { 2 } else { 0 }; + let try_item_total = tries_size as usize * std::mem::size_of::(); Ok(CodeItemAccessor { code_off, - code_item, + registers_size, + ins_size, + outs_size, + tries_size, + debug_info_off, insns, - tries_off: if code_item.tries_size > 0 { - Some(tries_off as u32) + tries_off: if tries_size > 0 { + Some(tries_off_rel as u32) } else { None }, - catch_handlers_off: if code_item.tries_size > 0 { - // Start of catch handlers will be at the end of all TryItems - Some((tries_off + tries_size) as u32) + catch_handlers_off: if tries_size > 0 { + Some((tries_off_rel + try_item_total) as u32) } else { None }, }) } + /// Returns an empty accessor (no instructions, used for null code offsets). + pub fn empty() -> Result> { + Ok(CodeItemAccessor { + code_off: 0, + registers_size: 0, + ins_size: 0, + outs_size: 0, + tries_size: 0, + debug_info_off: 0, + insns: &[], + tries_off: None, + catch_handlers_off: None, + }) + } + + /// Returns a synthetic [`CodeItem`] built from the decoded fields. + /// + /// For standard DEX files this matches the on-disk struct exactly. + /// For compact DEX files it is a synthesised value (insns_size comes from + /// the instruction slice length). + pub fn code_item(&self) -> CodeItem { + CodeItem { + registers_size: self.registers_size, + ins_size: self.ins_size, + outs_size: self.outs_size, + tries_size: self.tries_size, + debug_info_off: self.debug_info_off, + insns_size: self.insns.len() as u32, + } + } + #[inline] pub fn insns_size_in_code_units(&self) -> u32 { self.insns.len() as u32 @@ -108,28 +179,20 @@ impl<'a> CodeItemAccessor<'a> { self.code_off } - pub fn code_item_off(&self) -> u32 { - self.code_off - std::mem::size_of::() as u32 - } - - pub fn code_item(&self) -> &'a CodeItem { - self.code_item - } - pub fn registers_size(&self) -> u16 { - self.code_item.registers_size + self.registers_size } pub fn ins_size(&self) -> u16 { - self.code_item.ins_size + self.ins_size } pub fn outs_size(&self) -> u16 { - self.code_item.outs_size + self.outs_size } pub fn tries_size(&self) -> u16 { - self.code_item.tries_size + self.tries_size } pub fn inst_at(&self, pc: u32) -> Instruction<'a> { @@ -144,6 +207,29 @@ impl<'a> CodeItemAccessor<'a> { } } +/// Performs a binary search for the try item covering `dex_pc`, matching ART's +/// `DexFile::FindTryItem`. Returns the index of the matching `TryItem`, or +/// `None` if `dex_pc` is not covered by any try block. +/// +/// `try_items` must be sorted by `start_addr` (guaranteed by the DEX spec). +pub fn find_try_item(try_items: &[TryItem], dex_pc: u32) -> Option { + let mut min = 0usize; + let mut max = try_items.len(); + while min < max { + let mid = (min + max) / 2; + let start = try_items[mid].start_addr; + let end = start + try_items[mid].insn_count as u32; + if dex_pc < start { + max = mid; + } else if dex_pc >= end { + min = mid + 1; + } else { + return Some(mid); + } + } + None +} + impl<'a> IntoIterator for CodeItemAccessor<'a> { type Item = Instruction<'a>; type IntoIter = DexInstructionIterator<'a>; diff --git a/src/file/compact_dex.rs b/src/file/compact_dex.rs new file mode 100644 index 0000000..b8002e7 --- /dev/null +++ b/src/file/compact_dex.rs @@ -0,0 +1,120 @@ +/// CompactDex (`cdex`) magic bytes and supported version. +pub const CDEX_MAGIC: &[u8] = b"cdex"; +pub const CDEX_MAGIC_VERSIONS: &[&[u8]] = &[b"001\0"]; + +// Compact CodeItem field constants (matching ART's CompactDexFile::CodeItem) +const FLAG_PREHEADER_REGISTERS_SIZE: u16 = 1 << 0; +const FLAG_PREHEADER_INS_SIZE: u16 = 1 << 1; +const FLAG_PREHEADER_OUTS_SIZE: u16 = 1 << 2; +const FLAG_PREHEADER_TRIES_SIZE: u16 = 1 << 3; +const FLAG_PREHEADER_INSNS_SIZE: u16 = 1 << 4; +const INSNS_COUNT_SHIFT: u32 = 5; + +const FIELDS_REGISTERS_SIZE_SHIFT: u32 = 12; +const FIELDS_INS_SIZE_SHIFT: u32 = 8; +const FIELDS_OUTS_SIZE_SHIFT: u32 = 4; +const FIELDS_TRIES_SIZE_SHIFT: u32 = 0; + +/// Fields decoded from a CompactDex code item (and its preheader if present). +#[derive(Debug, Clone, Copy)] +pub struct DecodedCompactCodeItem { + pub registers_size: u16, + pub ins_size: u16, + pub outs_size: u16, + pub tries_size: u16, + /// Total instruction count in code units. + pub insns_size: u32, + /// Absolute byte offset within the DEX data section where instructions start. + pub insns_off: u32, +} + +/// Decodes a compact DEX code item from `data` at the given byte offset. +/// +/// `offset` must point to the first byte of the compact `fields_` u16 word. +/// The preheader (if any) must be readable at `offset - N * 2`. +/// +/// Returns `None` if `offset` is out of bounds for the compact code item header. +pub fn decode_compact_code_item(data: &[u8], offset: usize) -> Option { + if offset + 4 > data.len() { + return None; + } + let fields = u16::from_le_bytes([data[offset], data[offset + 1]]); + let icf = u16::from_le_bytes([data[offset + 2], data[offset + 3]]); + + let mut registers_size = ((fields as u32 >> FIELDS_REGISTERS_SIZE_SHIFT) & 0xF) as u16; + let mut ins_size = ((fields as u32 >> FIELDS_INS_SIZE_SHIFT) & 0xF) as u16; + let mut outs_size = ((fields as u32 >> FIELDS_OUTS_SIZE_SHIFT) & 0xF) as u16; + let mut tries_size = ((fields as u32 >> FIELDS_TRIES_SIZE_SHIFT) & 0xF) as u16; + let mut insns_size = (icf as u32) >> INSNS_COUNT_SHIFT; + + // Decode preheader words that precede the code item in memory. + // The preheader pointer walks backwards from `offset` in steps of 2 bytes. + if icf + & (FLAG_PREHEADER_REGISTERS_SIZE + | FLAG_PREHEADER_INS_SIZE + | FLAG_PREHEADER_OUTS_SIZE + | FLAG_PREHEADER_TRIES_SIZE + | FLAG_PREHEADER_INSNS_SIZE) + != 0 + { + let mut pre = offset as isize; + + if icf & FLAG_PREHEADER_INSNS_SIZE != 0 { + pre -= 2; + if pre < 0 || pre as usize + 2 > data.len() { + return None; + } + let low = u16::from_le_bytes([data[pre as usize], data[pre as usize + 1]]) as u32; + pre -= 2; + if pre < 0 || pre as usize + 2 > data.len() { + return None; + } + let high = u16::from_le_bytes([data[pre as usize], data[pre as usize + 1]]) as u32; + insns_size += low + (high << 16); + } + if icf & FLAG_PREHEADER_REGISTERS_SIZE != 0 { + pre -= 2; + if pre < 0 || pre as usize + 2 > data.len() { + return None; + } + registers_size += + u16::from_le_bytes([data[pre as usize], data[pre as usize + 1]]); + } + if icf & FLAG_PREHEADER_INS_SIZE != 0 { + pre -= 2; + if pre < 0 || pre as usize + 2 > data.len() { + return None; + } + ins_size += u16::from_le_bytes([data[pre as usize], data[pre as usize + 1]]); + } + if icf & FLAG_PREHEADER_OUTS_SIZE != 0 { + pre -= 2; + if pre < 0 || pre as usize + 2 > data.len() { + return None; + } + outs_size += u16::from_le_bytes([data[pre as usize], data[pre as usize + 1]]); + } + if icf & FLAG_PREHEADER_TRIES_SIZE != 0 { + pre -= 2; + if pre < 0 || pre as usize + 2 > data.len() { + return None; + } + tries_size += u16::from_le_bytes([data[pre as usize], data[pre as usize + 1]]); + } + } + + // ART stores `registers_size - ins_size` in the packed field; restore it. + registers_size += ins_size; + + // Instructions follow immediately after the 4-byte compact header. + let insns_off = (offset + 4) as u32; + + Some(DecodedCompactCodeItem { + registers_size, + ins_size, + outs_size, + tries_size, + insns_size, + insns_off, + }) +} diff --git a/src/file/container.rs b/src/file/container.rs index 5ae7247..3fb3702 100644 --- a/src/file/container.rs +++ b/src/file/container.rs @@ -11,7 +11,7 @@ use super::MmapDexFile; use crate::Result; #[cfg(feature = "python")] -use crate::error::py_error::GenericError; +use crate::py::error::GenericError; // ---------------------------------------------------------------------------- // DexContainer @@ -50,32 +50,6 @@ impl<'a> DexContainerMut<'a> for &'a mut [u8] {} impl DexContainer<'_> for Vec {} impl DexContainerMut<'_> for Vec {} -// ---------------------------------------------------------------------------- -// InMemoryDexContainer -// ---------------------------------------------------------------------------- -pub struct InMemoryDexContainer<'a>(&'a [u8]); - -impl<'a> InMemoryDexContainer<'a> { - pub fn new(data: &'a [u8]) -> Self { - Self(data) - } -} - -impl<'a> Deref for InMemoryDexContainer<'a> { - type Target = [u8]; - fn deref(&self) -> &'a Self::Target { - self.0 - } -} - -impl<'a> AsRef<[u8]> for InMemoryDexContainer<'a> { - fn as_ref(&self) -> &'a [u8] { - self.0 - } -} - -impl<'a> DexContainer<'a> for InMemoryDexContainer<'a> {} - // >>> begin python export #[cfg(feature = "python")] @@ -112,7 +86,7 @@ impl DexContainer<'_> for PyInMemoryDexContainer {} #[cfg(feature = "python")] impl PyInMemoryDexContainer { - pub fn open<'py>(py: Python, data: Py) -> Self { + pub fn open(py: Python, data: Py) -> Self { Self { data: data.clone_ref(py), length: data.as_bytes(py).len(), @@ -210,7 +184,7 @@ pub struct PyFileDexContainer { impl AsRef<[u8]> for PyFileDexContainer { #[inline] fn as_ref(&self) -> &[u8] { - &self.data.as_ref() + self.data.as_ref() } } @@ -219,7 +193,7 @@ impl Deref for PyFileDexContainer { type Target = [u8]; fn deref(&self) -> &Self::Target { - &self.data.deref() + self.data.deref() } } @@ -269,10 +243,3 @@ impl PyFileDexContainer { } // <<< end python export -#[cfg(feature = "python")] -#[pyo3::pymodule(name = "container")] -pub(crate) mod py_container { - - #[pymodule_export] - use super::{PyFileDexContainer, PyInMemoryDexContainer}; -} diff --git a/src/file/debug.rs b/src/file/debug.rs index 3dac282..00f6a42 100644 --- a/src/file/debug.rs +++ b/src/file/debug.rs @@ -1,15 +1,35 @@ use crate::{ - leb128::{decode_leb128_off, decode_leb128p1_off}, + leb128::{decode_leb128_off, decode_leb128p1_off, decode_sleb128}, Result, }; use super::StringIndex; +#[derive(Debug, Clone)] pub enum SourceFile { This, Other(StringIndex), // index to file } +/// Local variable information decoded from a debug info stream. +#[derive(Debug, Clone, Default)] +pub struct LocalInfo { + /// Index into string_ids for the variable name, or `None`. + pub name_idx: Option, + /// Index into type_ids for the variable type descriptor, or `None`. + pub descriptor_idx: Option, + /// Index into string_ids for the Dalvik/generic signature, or `None`. + pub signature_idx: Option, + /// DEX program counter where the local comes into scope. + pub start_address: u32, + /// DEX program counter where the local goes out of scope. + pub end_address: u32, + /// Register number holding this local. + pub reg: u16, + /// Whether this local is currently live (used during decoding). + pub is_live: bool, +} + #[rustfmt::skip] pub mod code { pub const DBG_END_SEQUENCE: u8 = 0x00; @@ -32,8 +52,8 @@ pub struct PositionInfo { pub address: u32, pub line: u32, pub file: SourceFile, - prologue_end: bool, - epilogue_begin: bool, + pub prologue_end: bool, + pub epilogue_begin: bool, } impl PositionInfo { @@ -69,16 +89,16 @@ impl<'a> CodeItemDebugInfoAccessor<'a> { pub fn visit_parameter_names(&self, visitor: F) -> Result<()> where - F: Fn(u32), + F: FnMut(u32), { let mut offset = 0; self.decode_parameter_names(visitor, &mut offset)?; Ok(()) } - fn decode_parameter_names(&self, visitor: F, offset: &mut usize) -> Result + fn decode_parameter_names(&self, mut visitor: F, offset: &mut usize) -> Result where - F: Fn(u32), + F: FnMut(u32), { let line = decode_leb128_off(self.ptr, offset)?; let size = decode_leb128_off::(self.ptr, offset)?; @@ -90,9 +110,9 @@ impl<'a> CodeItemDebugInfoAccessor<'a> { Ok(line) } - pub fn decode_position_info(&self, pos_visitor: F) -> Result<()> + pub fn decode_position_info(&self, mut pos_visitor: F) -> Result<()> where - F: Fn(&PositionInfo), + F: FnMut(&PositionInfo), { let mut entry = PositionInfo::new(); let mut offset = 0; @@ -104,12 +124,13 @@ impl<'a> CodeItemDebugInfoAccessor<'a> { match opcode { code::DBG_END_SEQUENCE => break, - // This will cause overflow code::DBG_ADVANCE_PC => { - entry.address += decode_leb128_off::(self.ptr, &mut offset)? + entry.address = entry.address + .wrapping_add(decode_leb128_off::(self.ptr, &mut offset)?) } code::DBG_ADVANCE_LINE => { - entry.line += decode_leb128_off::(self.ptr, &mut offset)? + let delta = decode_sleb128(self.ptr, &mut offset)?; + entry.line = (entry.line as i32).wrapping_add(delta) as u32; } code::DBG_START_LOCAL => { decode_leb128_off::(self.ptr, &mut offset)?; // reg @@ -133,9 +154,11 @@ impl<'a> CodeItemDebugInfoAccessor<'a> { } _ => { let adjusted_opcode = opcode - code::DBG_FIRST_SPECIAL; - entry.address += (adjusted_opcode / code::DBG_LINE_RANGE) as u32; - entry.line += - (code::DBG_LINE_BASE + (adjusted_opcode % code::DBG_LINE_RANGE)) as u32; + entry.address = entry.address + .wrapping_add((adjusted_opcode / code::DBG_LINE_RANGE) as u32); + let line_delta = (code::DBG_LINE_BASE as i8 as i32) + + (adjusted_opcode % code::DBG_LINE_RANGE) as i32; + entry.line = (entry.line as i32).wrapping_add(line_delta) as u32; pos_visitor(&entry); entry.epilogue_begin = false; entry.prologue_end = false; @@ -145,8 +168,140 @@ impl<'a> CodeItemDebugInfoAccessor<'a> { Ok(()) } - // TODO - // pub fn decode_local_info(&self, visitor: F) + /// Returns the source line number for the given DEX program counter, or `None` + /// if no position entry covers that PC. Matches ART's `GetLineNumForPc`. + pub fn get_line_for_pc(&self, dex_pc: u32) -> Result> { + let mut result: Option = None; + self.decode_position_info(|pos| { + if pos.address <= dex_pc { + result = Some(pos.line); + } + })?; + Ok(result) + } + + /// Decodes the local variable table and calls `visitor` for each completed + /// (or still-live-at-end) local variable. + /// + /// `num_regs` should come from [`CodeItem::registers_size`]. + /// Matches ART's `CodeItemDebugInfoAccessor::DecodeDebugLocalInfo`. + pub fn decode_local_info(&self, num_regs: u16, mut visitor: F) -> Result<()> + where + F: FnMut(&LocalInfo), + { + let mut locals: Vec = (0..num_regs as usize) + .map(|i| LocalInfo { reg: i as u16, ..Default::default() }) + .collect(); + + let mut offset = 0usize; + // skip line start and parameter names + decode_leb128_off::(self.ptr, &mut offset)?; + let param_count = decode_leb128_off::(self.ptr, &mut offset)?; + for _ in 0..param_count { + decode_leb128p1_off(self.ptr, &mut offset)?; + } + + let mut address: u32 = 0; + + loop { + if offset >= self.ptr.len() { + break; + } + let opcode = self.ptr[offset]; + offset += 1; + + match opcode { + code::DBG_END_SEQUENCE => break, + code::DBG_ADVANCE_PC => { + address = address + .wrapping_add(decode_leb128_off::(self.ptr, &mut offset)?); + } + code::DBG_ADVANCE_LINE => { + decode_sleb128(self.ptr, &mut offset)?; + } + code::DBG_START_LOCAL => { + let reg = decode_leb128_off::(self.ptr, &mut offset)? as usize; + let name = decode_leb128p1_off(self.ptr, &mut offset)?; + let descriptor = decode_leb128p1_off(self.ptr, &mut offset)?; + if reg < locals.len() { + if locals[reg].is_live { + let mut ended = locals[reg].clone(); + ended.end_address = address; + visitor(&ended); + } + locals[reg] = LocalInfo { + reg: reg as u16, + name_idx: if name >= 0 { Some(name as u32) } else { None }, + descriptor_idx: if descriptor >= 0 { Some(descriptor as u32) } else { None }, + signature_idx: None, + start_address: address, + end_address: 0, + is_live: true, + }; + } + } + code::DBG_START_LOCAL_EXTENDED => { + let reg = decode_leb128_off::(self.ptr, &mut offset)? as usize; + let name = decode_leb128p1_off(self.ptr, &mut offset)?; + let descriptor = decode_leb128p1_off(self.ptr, &mut offset)?; + let signature = decode_leb128p1_off(self.ptr, &mut offset)?; + if reg < locals.len() { + if locals[reg].is_live { + let mut ended = locals[reg].clone(); + ended.end_address = address; + visitor(&ended); + } + locals[reg] = LocalInfo { + reg: reg as u16, + name_idx: if name >= 0 { Some(name as u32) } else { None }, + descriptor_idx: if descriptor >= 0 { Some(descriptor as u32) } else { None }, + signature_idx: if signature >= 0 { Some(signature as u32) } else { None }, + start_address: address, + end_address: 0, + is_live: true, + }; + } + } + code::DBG_END_LOCAL => { + let reg = decode_leb128_off::(self.ptr, &mut offset)? as usize; + if reg < locals.len() && locals[reg].is_live { + let mut ended = locals[reg].clone(); + ended.end_address = address; + ended.is_live = false; + visitor(&ended); + locals[reg].is_live = false; + } + } + code::DBG_RESTART_LOCAL => { + let reg = decode_leb128_off::(self.ptr, &mut offset)? as usize; + if reg < locals.len() && !locals[reg].is_live { + locals[reg].start_address = address; + locals[reg].is_live = true; + } + } + code::DBG_SET_PROLOGUE_END | code::DBG_SET_EPILOGUE_BEGIN => {} + code::DBG_SET_FILE => { + decode_leb128p1_off(self.ptr, &mut offset)?; + } + _ => { + let adjusted_opcode = opcode - code::DBG_FIRST_SPECIAL; + address = address + .wrapping_add((adjusted_opcode / code::DBG_LINE_RANGE) as u32); + } + } + } + + // flush locals still live at end of method + for local in &locals { + if local.is_live { + let mut ended = local.clone(); + ended.end_address = address; + visitor(&ended); + } + } + Ok(()) + } + } pub struct DebugInfoParameterNamesIterator<'dex> { diff --git a/src/file/dex_file.rs b/src/file/dex_file.rs new file mode 100644 index 0000000..37bfb35 --- /dev/null +++ b/src/file/dex_file.rs @@ -0,0 +1,991 @@ +use plain::Plain; + +use crate::{dex_err, error::DexError, leb128::decode_leb128, utf, Result}; + +// All sibling module types are available via the file module's re-exports. +#[allow(unused_imports)] +use super::*; + +pub struct DexFile<'a, T: DexContainer<'a> = Mmap> { + pub(super) mmap: &'a T, + /// Owned copy of the DEX header, read via `plain::copy_from_bytes` so that + /// the backing container does not need to be aligned to `Header`'s + /// alignment (4 bytes). This allows callers to pass arbitrary byte slices + /// such as those produced by `include_bytes!`. + pub(super) header: Header, + + string_ids: &'a [StringId], + type_ids: &'a [TypeId], + field_ids: &'a [FieldId], + proto_ids: &'a [ProtoId], + method_ids: &'a [MethodId], + class_defs: &'a [ClassDef], + method_handles: &'a [MethodHandleItem], + call_site_ids: &'a [CallSiteIdItem], + + hiddenapi_data: Option<&'a HiddenapiClassData<'a>>, + + pub(super) location: DexLocation, + format: DexFormat, +} + +macro_rules! check_lt_result { + ($idx:expr, $count:expr, $item_ty:tt) => { + if ($idx as usize) >= ($count as usize) { + return dex_err!(DexIndexError { + index: $idx as u32, + item_ty: stringify!($item_ty), + max: $count as usize, + }); + } + }; +} + +// writer +impl<'a, C: DexContainerMut<'a>> DexFile<'a, C> { + //TODO +} + +macro_rules! fn_id { + ($name:ident, $attr:ident, $ret_ty:ty, $idx_ty:ty, $(#[$meta:meta])* ) => { + $(#[$meta])* + #[inline(always)] + pub fn $name(&self, idx: $idx_ty) -> Result<&'a $ret_ty> { + check_lt_result!(idx, self.$attr.len(), $ret_ty); + Ok(&self.$attr[idx as usize]) + } + }; + ($name:ident, $attr:ident, Option: $ret_ty:ty, $fallback:ident, $idx_ty:ident, $(#[$meta:meta])*) => { + $(#[$meta])* + #[inline(always)] + pub fn $name(&'a self, idx: $idx_ty) -> Result> { + match idx { + $idx_ty::MAX => Ok(None), + _=> Ok(Some(self.$fallback(idx)?)), + } + } + }; + ($name:ident, $attr:ident, $ret_ty:ty[], $(#[$meta:meta])* ) => { + $(#[$meta])* + #[inline(always)] + pub fn $name(&'a self) -> &'a [$ret_ty] { + &self.$attr + } + }; + ($name:ident, $attr:ident, Idx: $ref_ty:ty, $(#[$meta:meta])* ) => { + $(#[$meta])* + #[inline(always)] + pub fn $name(&'a self, item: &'a $ref_ty) -> Result { + self.offset_of(self.$attr, item) + } + } +} + +impl<'a, C: DexContainer<'a>> DexFile<'a, C> { + pub fn get_section(base: &'a C, offset: u32, len: u32) -> &'a [T] { + if len == 0 { + return &[]; + } + let size = base.len(); + let section_size = len as usize * std::mem::size_of::(); + if offset as usize + section_size > size { + return &[]; + } + let data = &base[offset as usize..]; + T::slice_from_bytes_len(data, len as usize).unwrap_or_default() + } + + pub fn from_raw_parts(base: &'a C, location: DexLocation) -> Result> { + if base.len() < std::mem::size_of::
() { + return dex_err!(TruncatedFile); + } + + // Copy the header bytes into an owned, properly-aligned value. This + // allows callers to pass byte slices with any alignment (e.g. data + // produced by `include_bytes!` or sub-slices of a VDEX container). + let mut header = unsafe { std::mem::zeroed::
() }; + plain::copy_from_bytes(&mut header, base).map_err(|_| DexError::TruncatedFile)?; + + // Pre-compute section slices *before* moving `header` into the struct + // so we can read its offset/size fields without a self-referential borrow. + let string_ids = DexFile::get_section(base, header.string_ids_off, header.string_ids_size); + let type_ids = DexFile::get_section(base, header.type_ids_off, header.type_ids_size); + let field_ids = DexFile::get_section(base, header.field_ids_off, header.field_ids_size); + let proto_ids = DexFile::get_section(base, header.proto_ids_off, header.proto_ids_size); + let method_ids = DexFile::get_section(base, header.method_ids_off, header.method_ids_size); + let class_defs = DexFile::get_section(base, header.class_defs_off, header.class_defs_size); + let format = if base.len() >= 4 && &base[..4] == CDEX_MAGIC { + DexFormat::Compact + } else { + DexFormat::Standard + }; + + let mut dex = Self { + mmap: base, + header, + string_ids, + type_ids, + field_ids, + proto_ids, + method_ids, + class_defs, + method_handles: &[], + call_site_ids: &[], + hiddenapi_data: None, + location, + format, + }; + + dex.init_sections_from_maplist(); + Ok(dex) + } + + pub fn open_file(container: &'a DexFileContainer) -> Result> { + let loc = container.get_location(); + let size = container.data().len(); + if size < std::mem::size_of::
() { + return dex_err!(DexFileError, "Invalid or truncated file {:?}", loc); + } + + DexFile::open( + container.data(), + DexLocation::Path(loc.to_string()), + if container.verify_checksum { + // currenlty supports only checksum + verifier::VerifyPreset::ChecksumOnly + } else { + verifier::VerifyPreset::None + }, + ) + } + + pub fn open( + container: &'a C, + location: DexLocation, + verify_preset: verifier::VerifyPreset, + ) -> Result> { + let dex = DexFile::from_raw_parts(container, location)?; + dex.init()?; + if verify_preset != verifier::VerifyPreset::None { + DexFile::verify(&dex, verify_preset)?; + } + Ok(dex) + } + + pub fn expected_header_size(&self) -> u32 { + let version = self.header.get_version(); + if version != 0 { + if version < 41 { + std::mem::size_of::
() as u32 + } else { + std::mem::size_of::() as u32 + } + } else { + 0 + } + } + + pub fn get_location(&self) -> &DexLocation { + &self.location + } + + #[inline(always)] + pub fn file_size(&self) -> usize { + self.mmap.len() + } + + #[inline(always)] + pub fn get_header(&self) -> &Header { + &self.header + } + + // ------------------------------------------------------------------------------ + // strings + // ------------------------------------------------------------------------------ + + // TODO: add docs + fn_id!(get_string_id, string_ids, StringId, u32,); + fn_id!(get_string_ids, string_ids, StringId[],); + fn_id! {get_string_id_opt, string_ids, Option: StringId, get_string_id, u32,} + fn_id! {string_id_idx, string_ids, Idx: StringId, } + + #[inline(always)] + pub fn num_string_ids(&self) -> u32 { + self.header.string_ids_size + } + + #[inline] + pub fn get_string_data(&self, string_id: &StringId) -> Result<(u32, &'a [u8])> { + check_lt_result!(string_id.offset(), self.file_size(), "string-id"); + let (utf16_len, size) = match decode_leb128(&self.mmap[string_id.offset()..]) { + Ok((utf16_len, size)) => (utf16_len, size), + Err(DexError::VarIntError(e)) => { + return dex_err!(BadStringData { + offset: string_id.offset(), + kind: e + }); + } + _ => unreachable!(), + }; + + let start = string_id.offset() + size; + check_lt_result!(start, self.file_size(), "string-data"); + match &self.mmap[start..].iter().position(|x| *x == 0) { + Some(pos) => Ok((utf16_len, &self.mmap[start..start + pos + 1])), + None => dex_err!(BadStringDataMissingNullByte, start), + } + } + + #[inline] + /// # Safety + /// + /// Caller must ensure the string data at `string_id` is valid UTF-8. No null-terminator or + /// encoding validation is performed; invalid bytes will produce undefined behaviour. + pub unsafe fn fast_get_utf8_str(&self, string_id: &StringId) -> Result { + let (size, data) = self.get_string_data(string_id)?; + Ok(String::from_utf8_unchecked(data[0..size as usize].to_vec())) + } + + #[inline] + /// # Safety + /// + /// Caller must ensure the string data at index `idx` is valid UTF-8. No null-terminator or + /// encoding validation is performed; invalid bytes will produce undefined behaviour. + pub unsafe fn fast_get_utf8_str_at(&self, idx: u32) -> Result { + let string_id = self.get_string_id(idx)?; + self.fast_get_utf8_str(string_id) + } + + #[inline(always)] + pub fn get_str_lossy(&self, string_id: &StringId) -> Result { + let (_, data) = self.get_string_data(string_id)?; + utf::mutf8_to_str_lossy(data) + } + + #[inline(always)] + pub fn get_str_lossy_at(&self, idx: u32) -> Result { + let string_id = self.get_string_id(idx)?; + self.get_str_lossy(string_id) + } + + #[inline(always)] + pub fn get_str(&self, string_id: &StringId) -> Result { + let (_, data) = self.get_string_data(string_id)?; + crate::utf::mutf8_to_str(data) + } + + #[inline(always)] + pub fn get_str_at(&self, idx: StringIndex) -> Result { + let string_id = self.get_string_id(idx)?; + self.get_str(string_id) + } + + #[inline(always)] + pub fn get_str_opt_at(&self, idx: StringIndex) -> Result> { + match idx { + StringIndex::MAX => Ok(None), + _ => Ok(Some(self.get_str_at(idx)?)), + } + } + + // ------------------------------------------------------------------------------ + // types + // ------------------------------------------------------------------------------ + fn_id!(get_type_id, type_ids, TypeId, TypeIndex,); + fn_id!(get_type_ids, type_ids, TypeId[],); + fn_id! {type_id_idx, type_ids, Idx: TypeId, } + fn_id! {get_type_id_opt, type_ids, Option: TypeId, get_type_id, TypeIndex,} + + #[inline(always)] + pub fn num_type_ids(&self) -> u32 { + self.header.type_ids_size + } + + #[inline(always)] + pub fn get_type_desc_utf16_lossy_at(&self, idx: TypeIndex) -> Result { + let type_id = self.get_type_id(idx)?; + self.get_str_lossy_at(type_id.descriptor_idx) + } + + #[inline(always)] + pub fn get_type_desc_utf16_lossy(&self, type_id: &TypeId) -> Result { + self.get_str_lossy_at(type_id.descriptor_idx) + } + + #[inline(always)] + pub fn get_type_desc_utf16(&self, type_id: &TypeId) -> Result { + self.get_str_at(type_id.descriptor_idx) + } + + #[inline(always)] + pub fn get_type_desc_utf16_at(&self, idx: TypeIndex) -> Result { + let type_id = self.get_type_id(idx)?; + self.get_str_at(type_id.descriptor_idx) + } + + // -- code item + #[inline(always)] + pub fn get_code_item(&self, offset: u32) -> Result> { + check_lt_result!(offset, self.file_size(), "code item offset"); + self.data_ptr(offset) + } + + /// Returns `true` if this is a CompactDex file (`cdex` magic). + #[inline] + pub fn is_compact_dex(&self) -> bool { + self.format == DexFormat::Compact + } + + /// Returns `true` if this is a standard DEX file (`dex\n` magic). + #[inline] + pub fn is_standard_dex(&self) -> bool { + self.format == DexFormat::Standard + } + + /// Builds a [`CodeItemAccessor`] for the code item at `offset`. + /// + /// For standard DEX, `offset` points to the [`CodeItem`] struct. + /// For compact DEX, `offset` points to the compact two-field header + /// (`fields_` + `insns_count_and_flags_`). + #[inline(always)] + pub fn get_code_item_accessor(&self, offset: u32) -> Result> { + if offset == 0 { + return CodeItemAccessor::empty(); + } + check_lt_result!(offset, self.file_size(), "code item offset"); + match self.format { + DexFormat::Standard => { + let code_item = self.non_null_data_ptr(offset)?; + let insns_off = offset + std::mem::size_of::() as u32; + CodeItemAccessor::from_code_item(self, code_item, insns_off) + } + DexFormat::Compact => { + let decoded = compact_dex::decode_compact_code_item( + self.mmap.as_ref(), + offset as usize, + ) + .ok_or_else(|| crate::error::DexError::BadOffsetTooLarge { + offset, + size: self.file_size(), + section: "compact code item", + })?; + CodeItemAccessor::from_fields( + self, + decoded.registers_size, + decoded.ins_size, + decoded.outs_size, + decoded.tries_size, + 0, // debug_info_off is in CompactOffsetTable, not the code item + decoded.insns_size, + decoded.insns_off, + ) + } + } + } + + #[inline(always)] + pub fn get_insns_raw(&self, code_off: u32, size_in_code_units: u32) -> Result<&'a [u16]> { + check_lt_result!(code_off, self.file_size(), "code stream offset"); + self.non_null_array_data_ptr(code_off, size_in_code_units as usize) + } + + // ------------------------------------------------------------------------------ + // Debug Info + // ------------------------------------------------------------------------------ + #[inline(always)] + pub fn get_debug_info_accessor(&'a self, offset: u32) -> Result> { + check_lt_result!(offset, self.file_size(), "debug info offset"); + Ok(CodeItemDebugInfoAccessor::new( + &self.mmap[offset as usize..], + )) + } + + #[inline(always)] + pub fn get_debug_info_accessor_opt( + &'a self, + offset: u32, + ) -> Result>> { + match offset { + // WHY?: It seems that some applications incorrectly set the debug info offset to 0 + 0 | u32::MAX => Ok(None), + _ => Ok(Some(self.get_debug_info_accessor(offset)?)), + } + } + + // ------------------------------------------------------------------------------ + // field ids + // ------------------------------------------------------------------------------ + fn_id!(get_field_id, field_ids, FieldId, FieldIndex,); + fn_id!(get_field_ids, field_ids, FieldId[],); + fn_id! {field_id_idx, field_ids, Idx: FieldId, } + fn_id! {get_field_id_opt, field_ids, Option: FieldId, get_field_id, FieldIndex,} + + #[inline(always)] + pub fn num_field_ids(&self) -> u32 { + self.header.field_ids_size + } + + #[inline(always)] + pub fn get_field_name(&self, field_id: &FieldId) -> Result { + self.get_str_lossy_at(field_id.name_idx) + } + + #[inline(always)] + pub fn get_field_name_at(&self, idx: FieldIndex) -> Result { + let field_id = self.get_field_id(idx)?; + self.get_str_lossy_at(field_id.name_idx) + } + + // ------------------------------------------------------------------------------ + // proto ids + // ------------------------------------------------------------------------------ + fn_id!(get_proto_id, proto_ids, ProtoId, ProtoIndex,); + fn_id!(get_proto_ids, proto_ids, ProtoId[],); + fn_id! {proto_id_idx, proto_ids, Idx: ProtoId, } + fn_id! {get_proto_id_opt, proto_ids, Option: ProtoId, get_proto_id, ProtoIndex,} + + pub fn num_proto_ids(&self) -> u32 { + self.header.proto_ids_size + } + + pub fn get_shorty_at(&self, idx: ProtoIndex) -> Result { + let proto_id = self.get_proto_id(idx)?; + self.get_shorty(proto_id) + } + + pub fn get_shorty_lossy_at(&self, idx: ProtoIndex) -> Result { + let proto_id = self.get_proto_id(idx)?; + self.get_shorty_lossy(proto_id) + } + + pub fn get_shorty(&self, proto_id: &ProtoId) -> Result { + self.get_str_at(proto_id.shorty_idx) + } + + pub fn get_shorty_lossy(&self, proto_id: &ProtoId) -> Result { + self.get_str_lossy_at(proto_id.shorty_idx) + } + + //------------------------------------------------------------------------------ + // EncodedValue + //------------------------------------------------------------------------------ + pub fn get_encoded_value(&self, off: u32) -> Result { + check_lt_result!(off, self.file_size(), EncodedValue); + EncodedValue::new(&self.mmap[off as usize..]) + } + + //------------------------------------------------------------------------------ + // Method Ids + //------------------------------------------------------------------------------ + fn_id!(get_method_id, method_ids, MethodId, u32,); + fn_id!(get_method_ids, method_ids, MethodId[],); + fn_id! {method_id_idx, method_ids, Idx: MethodId, } + fn_id! {get_method_id_opt, method_ids, Option: MethodId, get_method_id, u32,} + + #[inline(always)] + pub fn num_method_ids(&self) -> u32 { + self.header.method_ids_size + } + + // classdef related methods + //------------------------------------------------------------------------------ + // ClassDefs + //------------------------------------------------------------------------------ + fn_id!(get_class_def, class_defs, ClassDef, u32,); + fn_id!(get_class_defs, class_defs, ClassDef[],); + fn_id! {class_def_idx, class_defs, Idx: ClassDef, } + fn_id! {get_class_def_opt, class_defs, Option: ClassDef, get_class_def, u32,} + + #[inline(always)] + pub fn num_class_defs(&self) -> u32 { + self.header.class_defs_size + } + + #[inline] + pub fn get_class_desc_utf16_lossy(&self, class_def: &ClassDef) -> Result { + self.get_type_desc_utf16_lossy_at(class_def.class_idx) + } + + #[inline] + pub fn get_class_desc_utf16(&self, class_def: &ClassDef) -> Result { + self.get_type_desc_utf16_at(class_def.class_idx) + } + + #[inline] + pub fn get_interfaces_list(&self, class_def: &ClassDef) -> Result>> { + self.get_type_list(class_def.interfaces_off) + } + + //------------------------------------------------------------------------------ + // ClassDef convenience helpers + //------------------------------------------------------------------------------ + + /// Returns the Java-visible access flags for a class definition (lower 16 bits only). + /// + /// Matches ART's `ClassDef::GetJavaAccessFlags()`. + #[inline] + pub fn get_java_access_flags(class_def: &ClassDef) -> u32 { + class_def.access_flags & 0xFFFF + } + + //------------------------------------------------------------------------------ + // Proto helpers + //------------------------------------------------------------------------------ + + /// Returns the parameter `TypeList` for a `ProtoId`, or `None` if the proto has no parameters. + /// + /// Matches ART's `DexFile::GetProtoParameters()`. + #[inline] + pub fn get_proto_parameters(&'a self, proto_id: &ProtoId) -> Result>> { + self.get_type_list(proto_id.parameters_off) + } + + /// Builds and returns the method signature string for the given method index + /// in DEX format: `"(param1param2...)return_type"`. + /// + /// Matches ART's `DexFile::GetMethodSignature()`. + pub fn get_method_signature(&'a self, method_idx: u32) -> Result { + let method_id = self.get_method_id(method_idx)?; + let proto_id = self.get_proto_id(method_id.proto_idx)?; + + let mut buf = String::from("("); + if let Some(params) = self.get_proto_parameters(proto_id)? { + for item in params { + let desc = self.get_type_desc_utf16_at(item.type_idx)?; + buf.push_str(&desc); + } + } + buf.push(')'); + let ret_desc = self.get_type_desc_utf16_at(proto_id.return_type_idx)?; + let is_void = ret_desc == "V"; + let num_params = self + .get_proto_parameters(proto_id)? + .map(|p| p.len() as u32) + .unwrap_or(0); + buf.push_str(&ret_desc); + Ok(Signature::new(buf, num_params, is_void)) + } + + //------------------------------------------------------------------------------ + // TypeLookupTable + //------------------------------------------------------------------------------ + + /// Builds a [`TypeLookupTable`] for fast O(1) class lookup by descriptor. + pub fn build_type_lookup_table(&'a self) -> TypeLookupTable { + TypeLookupTable::new(self) + } + + //------------------------------------------------------------------------------ + // HiddenapiClassData helpers + //------------------------------------------------------------------------------ + + /// Decodes the ULEB128 hidden-API flags stream for a given class. + /// + /// Returns `None` if no hidden-API data is present for this class. + /// The returned `Vec` contains `count` flags in class_data_item order: + /// static fields, instance fields, direct methods, virtual methods. + pub fn get_hiddenapi_class_flags(&'a self, class_def_idx: u32, count: usize) -> Option> { + let hiddenapi = self.hiddenapi_data?; + let slice = hiddenapi.get_flags_slice(class_def_idx)?; + let mut flags = Vec::with_capacity(count); + let mut offset = 0; + for _ in 0..count { + match crate::leb128::decode_leb128_off::(slice, &mut offset) { + Ok(v) => flags.push(v), + Err(_) => break, + } + } + Some(flags) + } + + //------------------------------------------------------------------------------ + // Map List + //------------------------------------------------------------------------------ + + /// Returns the raw DEX section map, or `None` if the map list is unavailable. + pub fn get_map_list(&'a self) -> Option> { + if !self.maplist_available() { + return None; + } + let map_list_size_off = self.header.map_off; + let map_list_off = (self.header.map_off as usize) + std::mem::size_of::(); + if map_list_off >= self.file_size() { + return None; + } + let count: &u32 = self.non_null_data_ptr(map_list_size_off).ok()?; + let map_limit = (self.file_size() + - std::mem::size_of::() + - map_list_size_off as usize) + / std::mem::size_of::(); + if *count as usize > map_limit { + return None; + } + self.non_null_array_data_ptr::(map_list_off as u32, *count as usize) + .ok() + } + + //------------------------------------------------------------------------------ + // Method Handles + //------------------------------------------------------------------------------ + #[inline(always)] + pub fn get_method_handle(&self, idx: u32) -> Result<&'a MethodHandleItem> { + check_lt_result!(idx, self.method_handles.len(), MethodHandleItem); + Ok(&self.method_handles[idx as usize]) + } + + #[inline(always)] + pub fn num_method_handles(&self) -> u32 { + self.method_handles.len() as u32 + } + + #[inline(always)] + pub fn get_method_handles(&self) -> &'a [MethodHandleItem] { + self.method_handles + } + + //------------------------------------------------------------------------------ + // CallSites + //------------------------------------------------------------------------------ + #[inline(always)] + pub fn get_call_site_id(&self, idx: u32) -> Result<&'a CallSiteIdItem> { + check_lt_result!(idx, self.call_site_ids.len(), CallSiteIdItem); + Ok(&self.call_site_ids[idx as usize]) + } + + #[inline(always)] + pub fn num_call_site_ids(&self) -> u32 { + self.call_site_ids.len() as u32 + } + + #[inline(always)] + pub fn get_call_site_ids(&self) -> &'a [CallSiteIdItem] { + self.call_site_ids + } + + //------------------------------------------------------------------------------ + // TryItem + //------------------------------------------------------------------------------ + pub fn get_try_items(&'a self, ca: &CodeItemAccessor<'_>) -> Result<&'a [TryItem]> { + // skip heavy work if there are no try items + match ca.get_tries_abs_off() { + None => Ok(&[]), + Some(tries_off) => self.get_try_items_raw(tries_off, ca.tries_size()), + } + } + + #[inline] + pub fn get_try_items_raw(&'a self, tries_off: u32, tries_size: u16) -> Result<&'a [TryItem]> { + check_lt_result!(tries_off, self.file_size(), TryItem); + self.non_null_array_data_ptr(tries_off, tries_size as usize) + } + + //------------------------------------------------------------------------------ + // EncodedCatchHandler + //------------------------------------------------------------------------------ + #[inline] + pub fn get_catch_handler_data( + &self, + ca: &CodeItemAccessor<'_>, + offset: usize, + ) -> Result> { + match ca.get_catch_handler_data_abs_off() { + None => Ok(None), + Some(data_offset) => { + let offset = data_offset as usize + offset; + check_lt_result!(offset, self.file_size(), CatchHandlerData); + + // TODO: handle values greater than u16 since u16::MAX is maximum offset + Ok(Some(&self.mmap[offset..])) + } + } + } + + #[inline] + pub fn iter_catch_handlers_at( + &self, + ca: &CodeItemAccessor<'_>, + offset: usize, + ) -> Result>> { + match self.get_catch_handler_data(ca, offset)? { + None => Ok(None), + Some(data) => Ok(Some(EncodedCatchHandlerIterator::new(data)?)), + } + } + + #[inline] + pub fn iter_catch_handlers( + &self, + ca: &CodeItemAccessor<'_>, + try_item: &TryItem, + ) -> Result>> { + self.iter_catch_handlers_at(ca, try_item.handler_off as usize) + } + + //------------------------------------------------------------------------------ + // Annotations + //------------------------------------------------------------------------------ + // see implementation in annotations.rs for accessor + pub fn get_annotation_set(&self, off: u32) -> Result> { + // this will not panic if offset is zero + match self.data_ptr::(off)? { + None => Ok(&[]), + Some(size) => { + let off = off as usize + std::mem::size_of::(); + check_lt_result!(off, self.file_size(), AnnotationSetItem); + self.non_null_array_data_ptr(off as u32, *size as usize) + } + } + } + + #[inline(always)] + pub fn get_field_annotation_set( + &'a self, + anno_item: &FieldAnnotationsItem, + ) -> Result> { + self.get_annotation_set(anno_item.annotations_off) + } + + #[inline(always)] + pub fn get_method_annotation_set( + &'a self, + anno_item: &MethodAnnotationsItem, + ) -> Result> { + self.get_annotation_set(anno_item.annotations_off) + } + + #[inline(always)] + pub fn get_parameter_annotation_set( + &'a self, + anno_item: &ParameterAnnotationsItem, + ) -> Result> { + self.get_annotation_set(anno_item.annotations_off) + } + + #[inline] + pub fn get_annotation(&self, annotation_off: u32) -> Result { + check_lt_result!(annotation_off, self.file_size(), Annotation); + AnnotationItem::from_raw_parts(&self.mmap[annotation_off as usize..]) + } + + //------------------------------------------------------------------------------ + // internal helpers + //------------------------------------------------------------------------------ + #[inline] + fn offset_of(&self, buf: &[U], o: &T) -> Result { + let start = buf.as_ptr() as usize; + let target = o as *const _ as usize; + let end = buf.as_ptr() as usize + self.file_size(); + + if target < start || target > end { + return dex_err!(UnknownObjectRef { + offset: target, + start, + end + }); + } + + Ok(((target - start) / std::mem::size_of::()) as u32) + } + + #[inline(always)] + pub fn get_type_list(&self, offset: u32) -> Result>> { + if offset == 0 { + return Ok(None); + } + + check_lt_result!(offset, self.file_size(), TypeList); + let length = u32::from_bytes(&self.mmap[offset as usize..]).unwrap(); + let data_off = offset + std::mem::size_of::() as u32; + + self.array_data_ptr(data_off, *length as usize) + } + + // private methods + #[inline] + pub fn data_ptr(&self, offset: u32) -> Result> { + match offset { + 0 => Ok(None), + _ => Ok(Some(self.non_null_data_ptr(offset)?)), + } + } + + pub fn non_null_data_ptr(&self, offset: u32) -> Result<&'a T> { + if offset == 0 { + return dex_err!(NullOffset { + item_ty: std::any::type_name::() + }); + } + match T::from_bytes(&self.mmap[offset as usize..]) { + Ok(v) => Ok(v), + Err(plain::Error::TooShort) => { + dex_err!(DexLayoutError, self, offset, std::any::type_name::(), 0) + } + Err(plain::Error::BadAlignment) => dex_err!(UnalignedRead { + offset, + item_ty: std::any::type_name::() + }), + } + } + + #[inline] + pub fn array_data_ptr(&self, offset: u32, len: usize) -> Result> { + match offset { + 0 => Ok(None), + _ => Ok(Some(self.non_null_array_data_ptr(offset, len)?)), + } + } + + pub fn non_null_array_data_ptr(&self, offset: u32, len: usize) -> Result<&'a [T]> { + if offset == 0 { + return dex_err!(NullOffset { + item_ty: std::any::type_name::() + }); + } + match T::slice_from_bytes_len(&self.mmap[offset as usize..], len) { + Ok(v) => Ok(v), + Err(plain::Error::TooShort) => dex_err!( + DexLayoutError, + self, + offset, + std::any::type_name::(), + len + ), + Err(plain::Error::BadAlignment) => dex_err!(UnalignedRead { + offset, + item_ty: std::any::type_name::() + }), + } + } + + //------------------------------------------------------------------------------ + // Initialization + //------------------------------------------------------------------------------ + fn init(&self) -> Result<()> { + let container_size = self.file_size(); + if container_size < std::mem::size_of::
() { + return dex_err!( + DexFileError, + "Unable to open {:?}: File size is too small to fit dex header", + self.location + ); + } + + self.check_magic_and_version()?; + + let expected_header_size = self.expected_header_size(); + if expected_header_size < self.header.header_size { + return dex_err!( + DexFileError, + "Unable to open {:?}: Header size is {} but {} was expected", + self.location, + expected_header_size, + self.header.header_size + ); + } + + if container_size < self.header.file_size as usize { + return dex_err!( + DexFileError, + "Unable to open {:?}: File size is {} but the header expects {}", + self.location, + container_size, + self.header.file_size + ); + } + Ok(()) + } + + fn check_magic_and_version(&self) -> Result<()> { + if !self.is_magic_valid() { + return dex_err!( + DexFileError, + "Unrecognized magic number in {:?}: {:?}", + self.location, + &self.header.get_magic()[..4] + ); + } + + if !self.is_version_valid() { + return dex_err!( + DexFileError, + "Unrecognized dex version in {:?}: {:?}", + self.location, + &self.header.get_magic()[4..] + ); + } + Ok(()) + } + + #[inline] + fn maplist_available(&self) -> bool { + if self.header.map_off == 0x00 { + return false; + } + + let size = self.file_size(); + let end = (self.header.map_off as usize) + std::mem::size_of::(); + end <= size && plain::is_aligned::(&self.mmap[0..end]) + } + + fn init_sections_from_maplist(&mut self) { + if !self.maplist_available() { + // bad offset + return; + } + + let map_list_size_off = self.header.map_off; + let map_list_off = (self.header.map_off as usize) + std::mem::size_of::(); + if map_list_off >= self.file_size() { + // bad offset + return; + } + + let count: &u32 = match self.non_null_data_ptr(map_list_size_off) { + Ok(v) => v, + Err(_) => { + // bad file will be reported through verifier + return; + } + }; + let map_limit = + (self.file_size() - std::mem::size_of::() - map_list_size_off as usize) + / std::mem::size_of::(); + + if *count as usize > map_limit { + // bad file + return; + } + + // we should unwrap this here + let items = + match self.non_null_array_data_ptr::(map_list_off as u32, *count as usize) { + Ok(v) => v, + Err(_) => { + // bad file will be reported through verifier + return; + } + }; + for map_item in items { + match map_item.type_ { + MapItemType::MethodHandleItem => { + self.method_handles = + DexFile::get_section(self.mmap, map_item.off, map_item.size) + } + MapItemType::CallSiteIdItem => { + self.call_site_ids = + DexFile::get_section(self.mmap, map_item.off, map_item.size) + } + MapItemType::HiddenapiClassData => { + let item_off = map_item.off as usize; + self.hiddenapi_data = Some( + HiddenapiClassData::from_bytes( + &self.mmap[item_off..item_off + map_item.size as usize], + ) + .unwrap(), + ); + } + _ => {} + } + } + } +} diff --git a/src/file/dump.rs b/src/file/dump.rs index f547b2c..feb2c5e 100644 --- a/src/file/dump.rs +++ b/src/file/dump.rs @@ -6,7 +6,7 @@ use crate::{ use super::{ signatures, vreg, Code, DexContainer, DexFile, FieldId, Format, Instruction, MethodId, - StringId, TypeId, + StringId, TypeId, VarArgs, }; pub mod prettify { @@ -54,7 +54,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { result.push_str(&self.pretty_type_opt_at(field_id.class_idx)?); result.push('.'); - result.push_str(&self.get_utf16_str_lossy_at(field_id.name_idx)?); + result.push_str(&self.get_str_lossy_at(field_id.name_idx)?); Ok(result) } @@ -81,7 +81,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } pub fn pretty_utf16(&self, string_id: &StringId) -> String { - match self.get_utf16_str_lossy(string_id) { + match self.get_str_lossy(string_id) { Ok(str_data) => str_data, Err(_) => format!("<>", string_id.string_data_off), } @@ -123,7 +123,7 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { result.push_str(&self.pretty_type_at(method_id.class_idx)); result.push('.'); - result.push_str(&self.get_utf16_str_lossy_at(method_id.name_idx)?); + result.push_str(&self.get_str_lossy_at(method_id.name_idx)?); if let Some(proto_id) = proto_id { result.push('('); @@ -142,308 +142,802 @@ impl<'a, C: DexContainer<'a>> DexFile<'a, C> { } } +// ============================================================================ +// Colored output types +// ============================================================================ + +/// Semantic category of a fragment of formatted instruction text. +/// +/// Consumers map this to terminal colors, HTML classes, or any other +/// presentation layer without re-parsing the text. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Highlight { + /// Instruction mnemonic (`invoke-virtual`, `const/4`, …). + Opcode, + /// Virtual or parameter register (`v0`, `p1`, …). + Register, + /// Numeric immediate (`#+42`, `#int +65536 // 0x10000`, …). + Immediate, + /// Signed branch offset (`+5`, `-3`, …). + Offset, + /// Quoted string literal content (`"hello"`, …). + StringLiteral, + /// Resolved type, field, method, or proto reference name. + Ref, + /// Index annotation comment (` // field@3`, ` // method@2, proto@1`, …). + Comment, + /// Structural punctuation and whitespace (`, `, ` .. `, `{`, `}`, ` `). + Plain, +} + +/// A fragment of formatted instruction text with its semantic highlight. +#[derive(Debug, Clone)] +pub struct Span { + pub text: String, + pub hl: Highlight, +} + +/// A fully formatted, highlighted instruction line as a sequence of [`Span`]s. +pub type StyledLine = Vec; + +// ============================================================================ +// Internal writer abstraction +// ============================================================================ + +/// Low-level sink for instruction fragments. Implementors decide whether to +/// concatenate into a plain `String` or collect typed [`Span`]s. +trait InsnWriter { + fn push(&mut self, text: String, hl: Highlight); + + fn opcode(&mut self, s: &str) { + self.push(s.to_string(), Highlight::Opcode); + } + fn plain(&mut self, s: impl Into) { + self.push(s.into(), Highlight::Plain); + } + fn sep(&mut self) { + self.push(", ".to_string(), Highlight::Plain); + } + fn reg(&mut self, n: i32) { + self.push(format!("v{n}"), Highlight::Register); + } + fn offset_signed(&mut self, n: i32) { + self.push(format!("{n:+}"), Highlight::Offset); + } + fn imm_i32(&mut self, n: i32) { + self.push(format!("#{n:+}"), Highlight::Immediate); + } + fn imm_u64(&mut self, n: u64) { + self.push(format!("#{n:+}"), Highlight::Immediate); + } + fn imm_typed_u32(&mut self, ty: &str, n: u32) { + self.push(format!("#{ty} {n:+} // {n:#x}"), Highlight::Immediate); + } + fn imm_typed_u64(&mut self, ty: &str, n: u64) { + self.push(format!("#{ty} {n:+} // {n:#x}"), Highlight::Immediate); + } + fn string_lit(&mut self, s: &str, idx: impl std::fmt::Display) { + self.push(format!("{s:?}"), Highlight::StringLiteral); + self.push(format!(" // string@{idx}"), Highlight::Comment); + } + fn type_ref(&mut self, name: &str, idx: impl std::fmt::Display) { + self.push(name.to_string(), Highlight::Ref); + self.push(format!(" // type@{idx}"), Highlight::Comment); + } + fn field_ref(&mut self, name: &str, idx: u32) { + self.push(name.to_string(), Highlight::Ref); + self.push(format!(" // field@{idx}"), Highlight::Comment); + } + fn method_ref(&mut self, name: &str, idx: u32) { + self.push(name.to_string(), Highlight::Ref); + self.push(format!(" // method@{idx}"), Highlight::Comment); + } + /// Emit `method_name, shorty_desc // method@M, proto@P` for polymorphic calls. + fn method_proto_ref(&mut self, method: &str, method_idx: u32, shorty: &str, proto_idx: u32) { + self.push(method.to_string(), Highlight::Ref); + self.sep(); + self.push(shorty.to_string(), Highlight::Ref); + self.push( + format!(" // method@{method_idx}, proto@{proto_idx}"), + Highlight::Comment, + ); + } + fn call_site_ref(&mut self, idx: u32) { + self.push(format!("// call_site@{idx}"), Highlight::Comment); + } + fn thing_at(&mut self, kind: &str, idx: impl std::fmt::Display) { + self.push(format!("{kind}@{idx}"), Highlight::Plain); + } +} + +struct PlainWriter { + buf: String, +} +impl InsnWriter for PlainWriter { + fn push(&mut self, text: String, _: Highlight) { + self.buf.push_str(&text); + } +} + +struct SpanWriter { + spans: Vec, +} +impl InsnWriter for SpanWriter { + fn push(&mut self, text: String, hl: Highlight) { + self.spans.push(Span { text, hl }); + } +} + +/// Emit `{v0, v1, v2}` (var-arg register list) into `w`. +fn push_var_args(w: &mut W, args: &VarArgs) { + w.plain("{"); + for (i, ®) in args.arg.iter().enumerate() { + if i > 0 { + w.sep(); + } + w.reg(reg as i32); + } + w.plain("}"); +} + +/// Emit `{vStart .. vEnd}` (range register list) into `w`. +fn push_range_regs(w: &mut W, start: u16, end: u16) { + w.plain("{"); + w.reg(start as i32); + w.plain(" .. "); + w.reg(end as i32); + w.plain("}"); +} + +/// Core formatting logic: writes every fragment of one instruction into `w`. +/// +/// Both [`Instruction::to_string`] and [`Instruction::to_styled`] delegate +/// here, differing only in the [`InsnWriter`] implementation supplied. +fn format_insn<'a, C, W>( + inst: &Instruction<'a>, + dex_file: Option<&DexFile<'a, C>>, + w: &mut W, +) -> Result<()> +where + C: DexContainer<'a>, + W: InsnWriter, +{ + let opcode = inst.name(); + + if inst.opcode() == Code::NOP { + let name = match inst.fetch16(0)? { + signatures::ArrayDataSignature => "array-data", + signatures::PackedSwitchSignature => "packed-switch", + signatures::SparseSwitchSignature => "sparse-switch", + _ => opcode, + }; + w.opcode(name); + return Ok(()); + } + + match inst.format() { + &Format::k10x => w.opcode(opcode), + Format::k12x => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + w.reg(vreg::B(inst)?); + } + Format::k11n => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + w.imm_i32(vreg::B(inst)?); + } + Format::k11x => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + } + Format::k10t => { + w.opcode(opcode); + w.plain(" "); + w.offset_signed(vreg::A(inst)?); + } + Format::k20t => { + w.opcode(opcode); + w.plain(" "); + w.offset_signed(vreg::A(inst)?); + } + Format::k22x => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + w.reg(vreg::B(inst)?); + } + Format::k21t => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + w.offset_signed(vreg::B(inst)?); + } + Format::k21s => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + w.imm_i32(vreg::B(inst)?); + } + Format::k21h => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + if inst.opcode() == Code::CONST_HIGH16 { + let value = (vreg::B(inst)? as u32) << 16; + w.imm_typed_u32("int", value); + } else { + let value = (vreg::B(inst)? as u64) << 48; + w.imm_typed_u64("long", value); + } + } + Format::k21c => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + let index = vreg::B(inst)?; + match (dex_file, inst.opcode()) { + (Some(dex), Code::CONST_STRING) => { + w.string_lit(&dex.pretty_utf16_at(index as u32), index); + } + (Some(dex), Code::CHECK_CAST | Code::CONST_CLASS | Code::NEW_INSTANCE) => { + let type_idx = index as TypeIndex; + w.type_ref(&dex.pretty_type_at(type_idx), type_idx); + } + ( + Some(dex), + Code::SGET + | Code::SGET_WIDE + | Code::SGET_OBJECT + | Code::SGET_BOOLEAN + | Code::SGET_BYTE + | Code::SGET_CHAR + | Code::SGET_SHORT + | Code::SPUT + | Code::SPUT_WIDE + | Code::SPUT_OBJECT + | Code::SPUT_BOOLEAN + | Code::SPUT_BYTE + | Code::SPUT_CHAR + | Code::SPUT_SHORT, + ) => { + let field_idx = index as u32; + w.field_ref( + &dex.pretty_field_at(field_idx, prettify::Field::WithType), + field_idx, + ); + } + _ => { + w.thing_at("thing", index); + } + } + } + #[rustfmt::skip] + &Format::k23x => { + w.opcode(opcode); w.plain(" "); + w.reg(vreg::A(inst)?); w.sep(); + w.reg(vreg::B(inst)?); w.sep(); + w.reg(vreg::C(inst)?); + } + #[rustfmt::skip] + Format::k22b => { + w.opcode(opcode); w.plain(" "); + w.reg(vreg::A(inst)?); w.sep(); + w.reg(vreg::B(inst)?); w.sep(); + w.imm_i32(vreg::C(inst)?); + } + #[rustfmt::skip] + Format::k22t => { + w.opcode(opcode); w.plain(" "); + w.reg(vreg::A(inst)?); w.sep(); + w.reg(vreg::B(inst)?); w.sep(); + w.offset_signed(vreg::C(inst)?); + } + #[rustfmt::skip] + Format::k22s => { + w.opcode(opcode); w.plain(" "); + w.reg(vreg::A(inst)?); w.sep(); + w.reg(vreg::B(inst)?); w.sep(); + w.imm_i32(vreg::C(inst)?); + } + Format::k22c => { + let index = vreg::C(inst)? as u32; + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + w.reg(vreg::B(inst)?); + w.sep(); + match (dex_file, inst.opcode()) { + ( + Some(dex), + Code::IGET + | Code::IGET_WIDE + | Code::IGET_OBJECT + | Code::IGET_BOOLEAN + | Code::IGET_BYTE + | Code::IGET_CHAR + | Code::IGET_SHORT + | Code::IPUT + | Code::IPUT_WIDE + | Code::IPUT_OBJECT + | Code::IPUT_BOOLEAN + | Code::IPUT_BYTE + | Code::IPUT_CHAR + | Code::IPUT_SHORT, + ) => { + w.field_ref( + &dex.pretty_field_at(index, prettify::Field::WithType), + index, + ); + } + (Some(dex), Code::NEW_ARRAY | Code::INSTANCE_OF) => { + w.type_ref(&dex.pretty_type_at(index as TypeIndex), index); + } + _ => { + w.thing_at("thing", index); + } + } + } + Format::k30t => { + w.opcode(opcode); + w.plain(" "); + w.offset_signed(vreg::A(inst)?); + } + Format::k32x => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + w.reg(vreg::B(inst)?); + } + Format::k31i => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + w.imm_i32(vreg::B(inst)?); + } + Format::k31t => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + w.offset_signed(vreg::B(inst)?); + } + Format::k31c => { + let index = vreg::B(inst)? as u32; + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + if let (Some(dex), Code::CONST_STRING_JUMBO) = (dex_file, inst.opcode()) { + w.string_lit(&dex.pretty_utf16_at(index), index); + } else { + w.thing_at("thing", index); + } + } + Format::k35c => { + let var_args = vreg::var_args(inst)?; + let index = vreg::B(inst)? as u32; + w.opcode(opcode); + w.plain(" "); + push_var_args(w, &var_args); + w.sep(); + match (dex_file, inst.opcode()) { + (Some(dex), Code::FILLED_NEW_ARRAY) => { + w.type_ref(&dex.pretty_type_at(index as TypeIndex), index); + } + ( + Some(dex), + Code::INVOKE_VIRTUAL + | Code::INVOKE_SUPER + | Code::INVOKE_DIRECT + | Code::INVOKE_STATIC + | Code::INVOKE_INTERFACE, + ) => { + w.method_ref( + &dex.pretty_method_at(index, prettify::Method::WithSig), + index, + ); + } + (_, Code::INVOKE_CUSTOM) => { + w.call_site_ref(index); + } + _ => { + w.thing_at("thing", index); + } + } + } + Format::k3rc => { + let var_range = vreg::args_range(inst)?; + let index = vreg::B(inst)? as u32; + w.opcode(opcode); + w.plain(" "); + push_range_regs(w, *var_range.start(), *var_range.end()); + match (dex_file, inst.opcode()) { + ( + Some(dex), + Code::INVOKE_VIRTUAL_RANGE + | Code::INVOKE_SUPER_RANGE + | Code::INVOKE_DIRECT_RANGE + | Code::INVOKE_STATIC_RANGE + | Code::INVOKE_INTERFACE_RANGE, + ) => { + w.sep(); + w.method_ref( + &dex.pretty_method_at(index, prettify::Method::WithSig), + index, + ); + } + (_, Code::INVOKE_CUSTOM_RANGE) => { + w.sep(); + w.call_site_ref(index); + } + _ => { + // Note: preserved space (not comma) before fallback, matching + // the original output. + w.plain(" "); + w.thing_at("thing", index); + } + } + } + Format::k45cc => { + let var_args = vreg::var_args(inst)?; + let method_idx = vreg::B(inst)? as u32; + let proto_idx = vreg::H(inst)? as u32; + w.opcode(opcode); + w.plain(" "); + push_var_args(w, &var_args); + w.sep(); + if let Some(dex) = dex_file { + w.method_proto_ref( + &dex.pretty_method_at(method_idx, prettify::Method::WithSig), + method_idx, + &dex.get_shorty_lossy_at(proto_idx as ProtoIndex)?, + proto_idx, + ); + } else { + w.thing_at("method", method_idx); + w.sep(); + w.thing_at("proto", proto_idx); + } + } + Format::k4rcc => { + let args_range = vreg::args_range(inst)?; + let method_idx = vreg::B(inst)? as u32; + let proto_idx = vreg::H(inst)? as u32; + w.opcode(opcode); + w.plain(" "); + push_range_regs(w, *args_range.start(), *args_range.end()); + w.sep(); + match (dex_file, inst.opcode()) { + (Some(dex), Code::INVOKE_POLYMORPHIC_RANGE) => { + w.method_proto_ref( + &dex.pretty_method_at(method_idx, prettify::Method::WithSig), + method_idx, + &dex.get_shorty_lossy_at(proto_idx as ProtoIndex)?, + proto_idx, + ); + } + _ => { + w.thing_at("method", method_idx); + w.sep(); + w.thing_at("proto", proto_idx); + } + } + } + Format::k51l => { + w.opcode(opcode); + w.plain(" "); + w.reg(vreg::A(inst)?); + w.sep(); + w.imm_u64(vreg::wide_b(inst)?); + } + Format::kInvalidFormat => { + w.plain(""); + } + } + Ok(()) +} + impl<'a> Instruction<'a> { + /// Format this instruction as a plain string. + /// + /// When `dex_file` is supplied, indices are resolved to human-readable + /// names. Pass `None` to get raw `thing@N` fallbacks. pub fn to_string(&self, dex_file: Option<&DexFile<'a, C>>) -> Result where C: DexContainer<'a>, { - let opcode = self.name(); - if self.opcode() == Code::NOP { - return Ok((match self.fetch16(0)? { - signatures::ArrayDataSignature => "array-data", - signatures::PackedSwitchSignature => "packed-switch", - signatures::SparseSwitchSignature => "sparse-switch", - _ => opcode, - }) - .to_string()); - } - - Ok(match self.format() { - &Format::k10x => opcode.to_string(), - Format::k12x => format!("{opcode} v{}, v{}", vreg::A(self)?, vreg::B(self)?), - Format::k11n => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::B(self)?), - Format::k11x => format!("{opcode} v{}", vreg::A(self)?), - Format::k10t => format!("{opcode} {:+}", vreg::A(self)?), - Format::k20t => format!("{opcode} {:+}", vreg::A(self)?), - Format::k22x => format!("{opcode} v{}, v{}", vreg::A(self)?, vreg::B(self)?), - Format::k21t => format!("{opcode} v{}, {:+}", vreg::A(self)?, vreg::B(self)?), - Format::k21s => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::B(self)?), - Format::k21h => { - // op vAA, #+BBBB0000[00000000] - if self.opcode() == Code::CONST_HIGH16 { - let value = (vreg::B(self)? as u32) << 16; - format!( - "{opcode} v{}, #int {:+} // {:#x}", - vreg::A(self)?, - value, - value - ) - } else { - let value = (vreg::B(self)? as u64) << 48; - format!( - "{opcode} v{}, #long {:+} // {:#x}", - vreg::A(self)?, - value, - value - ) - } + let mut w = PlainWriter { buf: String::new() }; + format_insn(self, dex_file, &mut w)?; + Ok(w.buf) + } + + /// Format this instruction as a sequence of highlighted [`Span`]s. + /// + /// Identical output to [`to_string`] when spans are concatenated, but each + /// fragment carries a [`Highlight`] tag that callers can map to colors or + /// other markup without re-parsing the text. + pub fn to_styled(&self, dex_file: Option<&DexFile<'a, C>>) -> Result + where + C: DexContainer<'a>, + { + let mut w = SpanWriter { spans: Vec::new() }; + format_insn(self, dex_file, &mut w)?; + Ok(w.spans) + } + + /// Format this instruction as assembler text that can be round-tripped back + /// through [`crate::file::builder::CodeBuilder::emit`]. + /// + /// Unlike [`to_string`], which uses pretty-printed class/method/field names, + /// this method outputs raw DEX descriptor form so the output is parseable: + /// + /// ```text + /// invoke-virtual {v0, v1}, Ljava/lang/Object;->toString()Ljava/lang/String; + /// iget-object v0, v1, Lcom/example/Foo;->bar:Ljava/lang/String; + /// const/high16 v0, #+65536 + /// goto +5 + /// ``` + /// + /// Index-annotation comments (`// method@N`) are **not** included. + pub fn to_assembler_text(&self, dex_file: &DexFile<'a, C>) -> Result + where + C: DexContainer<'a>, + { + format_insn_assembler(self, dex_file) + } +} + +// -- Raw-descriptor helpers on DexFile ---------------------------------------- + +impl<'a, C: DexContainer<'a>> DexFile<'a, C> { + /// Raw DEX type descriptor for `type_idx` (e.g. `"Ljava/lang/Object;"`). + pub fn raw_type_ref_at(&self, type_idx: TypeIndex) -> String { + self.get_type_id(type_idx) + .and_then(|t| self.get_str_lossy_at(t.descriptor_idx)) + .unwrap_or_else(|_| format!("Lunknown/type_{type_idx};")) + } + + /// Raw DEX method reference (e.g. `"Ljava/lang/Object;->toString()Ljava/lang/String;"`). + pub fn raw_method_ref_at(&self, method_idx: u32) -> String { + self.raw_method_ref_at_impl(method_idx) + .unwrap_or_else(|_| format!("Lunknown/method_{method_idx};->unknown()V")) + } + + fn raw_method_ref_at_impl(&self, method_idx: u32) -> Result { + let mid = self.get_method_id(method_idx)?; + let class = self.raw_type_ref_at(mid.class_idx); + let name = self.get_str_lossy_at(mid.name_idx)?; + let proto = self.get_proto_id(mid.proto_idx)?; + let ret = self.raw_type_ref_at(proto.return_type_idx); + let mut params = String::new(); + if let Some(type_list) = self.get_type_list(proto.parameters_off)? { + for item in type_list { + params.push_str(&self.raw_type_ref_at(item.type_idx)); } - Format::k21c => { - // op vAA, type@BBBB check-cast - // op vAA, field@BBBB const-class - // op vAA, method_handle@BBBB const-method-handle - // op vAA, proto@BBBB const-method-type - // op vAA, string@BBBB const-string - match (dex_file, self.opcode()) { - (Some(dex), Code::CONST_STRING) => { - let index = vreg::B(self)?; - format!( - "{opcode} v{}, {:?} // string@{}", - vreg::A(self)?, - dex.pretty_utf16_at(index as u32), - index - ) - } - (Some(dex), Code::CHECK_CAST | Code::CONST_CLASS | Code::NEW_INSTANCE) => { - let type_idx = vreg::B(self)? as TypeIndex; - format!( - "{opcode} v{}, {} // type@{}", - vreg::A(self)?, - dex.pretty_type_at(type_idx), - type_idx - ) - } - ( - Some(dex), - Code::SGET - | Code::SGET_WIDE - | Code::SGET_OBJECT - | Code::SGET_BOOLEAN - | Code::SGET_BYTE - | Code::SGET_CHAR - | Code::SGET_SHORT - | Code::SPUT - | Code::SPUT_WIDE - | Code::SPUT_OBJECT - | Code::SPUT_BOOLEAN - | Code::SPUT_BYTE - | Code::SPUT_CHAR - | Code::SPUT_SHORT, - ) => { - let field_idx = vreg::B(self)? as u32; - format!( - "{opcode} v{}, {} // field@{}", - vreg::A(self)?, - dex.pretty_field_at(field_idx, prettify::Field::WithType), - field_idx - ) - } - _ => format!("{opcode} v{}, thing@{}", vreg::A(self)?, vreg::B(self)?), - } + } + Ok(format!("{class}->{name}({params}){ret}")) + } + + /// Raw DEX field reference (e.g. `"Lcom/example/Foo;->counter:I"`). + pub fn raw_field_ref_at(&self, field_idx: u32) -> String { + self.raw_field_ref_at_impl(field_idx) + .unwrap_or_else(|_| format!("Lunknown/field_{field_idx};->unknown:V")) + } + + fn raw_field_ref_at_impl(&self, field_idx: u32) -> Result { + let fid = self.get_field_id(field_idx)?; + let class = self.raw_type_ref_at(fid.class_idx); + let name = self.get_str_lossy_at(fid.name_idx)?; + let ftype = self.raw_type_ref_at(fid.type_idx); + Ok(format!("{class}->{name}:{ftype}")) + } +} + +// -- Assembler text formatter -------------------------------------------------- + +/// Format one instruction as assembler text suitable for round-tripping through +/// [`crate::file::builder::CodeBuilder::emit`]. All references use raw DEX +/// descriptor form; index-annotation comments are omitted. +fn format_insn_assembler<'a, C>(inst: &Instruction<'a>, dex: &DexFile<'a, C>) -> Result +where + C: DexContainer<'a>, +{ + let op = inst.name(); + let mut buf = String::with_capacity(64); + + // Helper closures to avoid repetition + macro_rules! w { ($s:expr) => { buf.push_str($s) } } + macro_rules! wfmt { ($($t:tt)*) => { buf.push_str(&format!($($t)*)) } } + + match inst.format() { + &Format::k10x => { + // Distinguish pseudo-instructions (switch/array payloads) from nop + if inst.opcode() == Code::NOP { + let name = match inst.fetch16(0)? { + signatures::ArrayDataSignature => "array-data", + signatures::PackedSwitchSignature => "packed-switch", + signatures::SparseSwitchSignature => "sparse-switch", + _ => op, + }; + w!(name); + } else { + w!(op); } - #[rustfmt::skip] - &Format::k23x => format!("{opcode} v{}, v{}, v{}", vreg::A(self)?, vreg::B(self)?, vreg::C(self)?), - #[rustfmt::skip] - Format::k22b => format!("{opcode} v{}, v{}, #{:+}", vreg::A(self)?, vreg::B(self)?, vreg::C(self)?), - #[rustfmt::skip] - Format::k22t => format!("{opcode} v{}, v{}, {:+}", vreg::A(self)?, vreg::B(self)?, vreg::C(self)?), - #[rustfmt::skip] - Format::k22s => format!("{opcode} v{}, v{}, #{:+}", vreg::A(self)?, vreg::B(self)?, vreg::C(self)?), - Format::k22c => { - let index = vreg::C(self)? as u32; - match (dex_file, self.opcode()) { - ( - Some(dex), - Code::IGET - | Code::IGET_WIDE - | Code::IGET_OBJECT - | Code::IGET_BOOLEAN - | Code::IGET_BYTE - | Code::IGET_CHAR - | Code::IGET_SHORT - | Code::IPUT - | Code::IPUT_WIDE - | Code::IPUT_OBJECT - | Code::IPUT_BOOLEAN - | Code::IPUT_BYTE - | Code::IPUT_CHAR - | Code::IPUT_SHORT, - ) => { - format!( - "{opcode} v{}, v{}, {} // field@{}", - vreg::A(self)?, - vreg::B(self)?, - dex.pretty_field_at(index, prettify::Field::WithType), - index - ) - } - (Some(dex), Code::NEW_ARRAY | Code::INSTANCE_OF) => { - format!( - "{opcode} v{}, v{}, {} // type@{}", - vreg::A(self)?, - vreg::B(self)?, - dex.pretty_type_at(index as TypeIndex), - index - ) - } - _ => { - format!( - "{opcode} v{}, v{}, thing@{}", - vreg::A(self)?, - vreg::B(self)?, - index, - ) - } + } + Format::k11x => { wfmt!("{op} v{}", vreg::A(inst)?) } + Format::k12x => { wfmt!("{op} v{}, v{}", vreg::A(inst)?, vreg::B(inst)?) } + Format::k11n => { wfmt!("{op} v{}, #{:+}", vreg::A(inst)?, vreg::B(inst)?) } + // Branch targets: emit as signed offsets (parseable without labels) + Format::k10t | Format::k20t | Format::k30t => { + wfmt!("{op} {:+}", vreg::A(inst)?) + } + Format::k22x => { wfmt!("{op} v{}, v{}", vreg::A(inst)?, vreg::B(inst)?) } + Format::k21t => { wfmt!("{op} v{}, {:+}", vreg::A(inst)?, vreg::B(inst)?) } + Format::k22t => { + wfmt!("{op} v{}, v{}, {:+}", vreg::A(inst)?, vreg::B(inst)?, vreg::C(inst)?) + } + Format::k21s => { wfmt!("{op} v{}, #{:+}", vreg::A(inst)?, vreg::B(inst)?) } + Format::k21h => { + // Emit the full shifted value so the parser can store it directly. + let full_val: i64 = if inst.opcode() == Code::CONST_HIGH16 { + ((vreg::B(inst)? as u32) << 16) as i64 + } else { + ((vreg::B(inst)? as u64) << 48) as i64 + }; + wfmt!("{op} v{}, #{:+}", vreg::A(inst)?, full_val); + } + Format::k21c => { + let index = vreg::B(inst)? as u32; + wfmt!("{op} v{}, ", vreg::A(inst)?); + match inst.opcode() { + Code::CONST_STRING => { + let s = dex.pretty_utf16_at(index); + wfmt!("{s:?}"); } - } - Format::k30t => format!("{opcode} {:+}", vreg::A(self)?), - Format::k32x => format!("{opcode} v{}, v{}", vreg::A(self)?, vreg::B(self)?), - Format::k31i => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::B(self)?), - Format::k31t => format!("{opcode} v{}, {:+}", vreg::A(self)?, vreg::B(self)?), - Format::k31c => { - let index = vreg::B(self)? as u32; - if let (Some(dex), Code::CONST_STRING_JUMBO) = (dex_file, self.opcode()) { - format!( - "{opcode} v{}, {:?} // string@{}", - vreg::A(self)?, - dex.pretty_utf16_at(index), - index - ) - } else { - format!("{opcode} v{}, thing@{}", vreg::A(self)?, index,) + Code::CONST_CLASS | Code::CHECK_CAST | Code::NEW_INSTANCE => { + w!(&dex.raw_type_ref_at(index as TypeIndex)); + } + Code::SGET + | Code::SGET_WIDE + | Code::SGET_OBJECT + | Code::SGET_BOOLEAN + | Code::SGET_BYTE + | Code::SGET_CHAR + | Code::SGET_SHORT + | Code::SPUT + | Code::SPUT_WIDE + | Code::SPUT_OBJECT + | Code::SPUT_BOOLEAN + | Code::SPUT_BYTE + | Code::SPUT_CHAR + | Code::SPUT_SHORT => { + w!(&dex.raw_field_ref_at(index)); + } + _ => { + wfmt!("thing@{index}"); } } - Format::k35c => { - let var_args = vreg::var_args(self)?; - let args_str = var_args - .arg - .iter() - .map(|reg| format!("v{}", reg)) - .collect::>() - .join(", "); - let index = vreg::B(self)? as u32; - match (dex_file, self.opcode()) { - (Some(dex), Code::FILLED_NEW_ARRAY) => { - format!( - "{opcode} {{{args_str}}}, {} // type@{}", - dex.pretty_type_at(index as TypeIndex), - index - ) - } - ( - Some(dex), - Code::INVOKE_VIRTUAL - | Code::INVOKE_SUPER - | Code::INVOKE_DIRECT - | Code::INVOKE_STATIC - | Code::INVOKE_INTERFACE, - ) => { - format!( - "{opcode} {{{args_str}}}, {} // method@{}", - dex.pretty_method_at(index, prettify::Method::WithSig), - index - ) - } - (_, Code::INVOKE_CUSTOM) => { - format!("{opcode} {{{args_str}}}, // call_site@{}", index) - } - _ => { - format!("{opcode} {{{args_str}}}, thing@{}", index,) - } + } + Format::k31c => { + let index = vreg::B(inst)? as u32; + wfmt!("{op} v{}, ", vreg::A(inst)?); + if inst.opcode() == Code::CONST_STRING_JUMBO { + let s = dex.pretty_utf16_at(index); + wfmt!("{s:?}"); + } else { + wfmt!("thing@{index}"); + } + } + Format::k22c => { + let index = vreg::C(inst)? as u32; + wfmt!("{op} v{}, v{}, ", vreg::A(inst)?, vreg::B(inst)?); + match inst.opcode() { + Code::IGET + | Code::IGET_WIDE + | Code::IGET_OBJECT + | Code::IGET_BOOLEAN + | Code::IGET_BYTE + | Code::IGET_CHAR + | Code::IGET_SHORT + | Code::IPUT + | Code::IPUT_WIDE + | Code::IPUT_OBJECT + | Code::IPUT_BOOLEAN + | Code::IPUT_BYTE + | Code::IPUT_CHAR + | Code::IPUT_SHORT => { + w!(&dex.raw_field_ref_at(index)); + } + Code::NEW_ARRAY | Code::INSTANCE_OF => { + w!(&dex.raw_type_ref_at(index as TypeIndex)); + } + _ => { + wfmt!("thing@{index}"); } } - Format::k3rc => { - let var_range = vreg::args_range(self)?; - let index = vreg::B(self)? as u32; - match (dex_file, self.opcode()) { - ( - Some(dex), - Code::INVOKE_VIRTUAL_RANGE - | Code::INVOKE_SUPER_RANGE - | Code::INVOKE_DIRECT_RANGE - | Code::INVOKE_STATIC_RANGE - | Code::INVOKE_INTERFACE_RANGE, - ) => { - format!( - "{opcode} {{v{} .. v{}}}, {} // method@{}", - var_range.start(), - var_range.end(), - dex.pretty_method_at(index, prettify::Method::WithSig), - index - ) - } - (_, Code::INVOKE_CUSTOM_RANGE) => { - format!( - "{opcode} {{v{} .. v{}}}, // call_site@{}", - var_range.start(), - var_range.end(), - index - ) - } - _ => { - format!( - "{opcode} {{v{} .. v{}}} thing@{}", - var_range.start(), - var_range.end(), - index - ) - } + } + #[rustfmt::skip] + Format::k23x => { + wfmt!("{op} v{}, v{}, v{}", vreg::A(inst)?, vreg::B(inst)?, vreg::C(inst)?) + } + #[rustfmt::skip] + Format::k22b => { + wfmt!("{op} v{}, v{}, #{:+}", vreg::A(inst)?, vreg::B(inst)?, vreg::C(inst)?) + } + #[rustfmt::skip] + Format::k22s => { + wfmt!("{op} v{}, v{}, #{:+}", vreg::A(inst)?, vreg::B(inst)?, vreg::C(inst)?) + } + Format::k32x => { wfmt!("{op} v{}, v{}", vreg::A(inst)?, vreg::B(inst)?) } + Format::k31i => { wfmt!("{op} v{}, #{:+}", vreg::A(inst)?, vreg::B(inst)?) } + Format::k31t => { wfmt!("{op} v{}, {:+}", vreg::A(inst)?, vreg::B(inst)?) } + Format::k35c | Format::k45cc => { + let var_args = vreg::var_args(inst)?; + let index = vreg::B(inst)? as u32; + wfmt!("{op} {{"); + for (i, &r) in var_args.arg.iter().enumerate() { + if i > 0 { + w!(", "); } + wfmt!("v{r}"); } - Format::k45cc => { - let var_args = vreg::var_args(self)?; - let args_str = var_args - .arg - .iter() - .map(|reg| format!("v{}", reg)) - .collect::>() - .join(", "); - let method_idx = vreg::B(self)? as u32; - let proto_idx = vreg::H(self)? as u32; - if let Some(dex) = dex_file { - format!( - "{opcode} {{{args_str}}}, {}, {} // method@{}, proto@{}", - dex.pretty_method_at(method_idx, prettify::Method::WithSig), - dex.get_shorty_lossy_at(proto_idx as ProtoIndex)?, - method_idx, - proto_idx - ) - } else { - format!( - "{opcode} {{{args_str}}}, method@{}, proto@{}", - method_idx, proto_idx - ) + w!("}, "); + match inst.opcode() { + Code::INVOKE_VIRTUAL + | Code::INVOKE_SUPER + | Code::INVOKE_DIRECT + | Code::INVOKE_STATIC + | Code::INVOKE_INTERFACE + | Code::INVOKE_POLYMORPHIC => { + w!(&dex.raw_method_ref_at(index)); + } + Code::FILLED_NEW_ARRAY => { + w!(&dex.raw_type_ref_at(index as TypeIndex)); + } + _ => { + wfmt!("thing@{index}"); } } - Format::k4rcc => { - let args_range = vreg::args_range(self)?; - let method_idx = vreg::B(self)? as u32; - let proto_idx = vreg::H(self)? as u32; - match (dex_file, self.opcode()) { - (Some(dex), Code::INVOKE_POLYMORPHIC_RANGE) => { - format!( - "{opcode} {{v{} .. v{}}}, {}, {} // method@{}, proto@{}", - args_range.start(), - args_range.end(), - dex.pretty_method_at(method_idx, prettify::Method::WithSig), - dex.get_shorty_lossy_at(proto_idx as ProtoIndex)?, - method_idx, - proto_idx - ) - } - _ => { - format!( - "{opcode} {{v{} .. v{}}}, method@{}, proto@{}", - args_range.start(), - args_range.end(), - method_idx, - proto_idx - ) - } + } + Format::k3rc | Format::k4rcc => { + let range = vreg::args_range(inst)?; + let index = vreg::B(inst)? as u32; + wfmt!("{op} {{v{} .. v{}}}", range.start(), range.end()); + match inst.opcode() { + Code::INVOKE_VIRTUAL_RANGE + | Code::INVOKE_SUPER_RANGE + | Code::INVOKE_DIRECT_RANGE + | Code::INVOKE_STATIC_RANGE + | Code::INVOKE_INTERFACE_RANGE + | Code::INVOKE_POLYMORPHIC_RANGE => { + w!(", "); + w!(&dex.raw_method_ref_at(index)); + } + _ => { + wfmt!(" thing@{index}"); } } - Format::k51l => format!("{opcode} v{}, #{:+}", vreg::A(self)?, vreg::wide_b(self)?), - Format::kInvalidFormat => "".to_string(), - }) + } + Format::k51l => { + wfmt!("{op} v{}, #{:+}", vreg::A(inst)?, vreg::wide_b(inst)? as i64) + } + Format::kInvalidFormat => { + w!(""); + } } + Ok(buf) } diff --git a/src/file/editor.rs b/src/file/editor.rs new file mode 100644 index 0000000..2d7e723 --- /dev/null +++ b/src/file/editor.rs @@ -0,0 +1,854 @@ +use std::{fs, mem, path::Path}; + +use crate::{ + error::DexError, + file::{ + patch::{encode_uleb128, read_header, skip_uleb128, update_checksum}, + MapItem, MapItemType, + }, + utf::{mutf8_len, str_to_mutf8}, + Result, +}; + +// -- byte helpers -------------------------------------------------------------- + +#[inline] +fn read_u16(data: &[u8], off: usize) -> u16 { + u16::from_le_bytes(data[off..off + 2].try_into().unwrap()) +} + +#[inline] +fn read_u32(data: &[u8], off: usize) -> u32 { + u32::from_le_bytes(data[off..off + 4].try_into().unwrap()) +} + +#[inline] +fn write_u32(data: &mut [u8], off: usize, v: u32) { + data[off..off + 4].copy_from_slice(&v.to_le_bytes()); +} + +// -- cached header fields ------------------------------------------------------ + +#[derive(Clone, Copy)] +struct CachedHeader { + string_ids_size: u32, + string_ids_off: u32, + type_ids_size: u32, + type_ids_off: u32, + proto_ids_size: u32, + proto_ids_off: u32, + field_ids_size: u32, + field_ids_off: u32, + method_ids_size: u32, + method_ids_off: u32, + class_defs_size: u32, + class_defs_off: u32, + map_off: u32, + #[allow(dead_code)] + data_off: u32, +} + +impl CachedHeader { + fn from_data(data: &[u8]) -> Result { + let h = read_header(data) + .ok_or_else(|| DexError::DexFileError("cannot read DEX header".into()))?; + Ok(CachedHeader { + string_ids_size: h.string_ids_size, + string_ids_off: h.string_ids_off, + type_ids_size: h.type_ids_size, + type_ids_off: h.type_ids_off, + proto_ids_size: h.proto_ids_size, + proto_ids_off: h.proto_ids_off, + field_ids_size: h.field_ids_size, + field_ids_off: h.field_ids_off, + method_ids_size: h.method_ids_size, + method_ids_off: h.method_ids_off, + class_defs_size: h.class_defs_size, + class_defs_off: h.class_defs_off, + map_off: h.map_off, + data_off: h.data_off, + }) + } +} + +// -- public struct ------------------------------------------------------------- + +/// An owned DEX file with mutation methods. +/// +/// Call [`build`](Self::build) or [`write_to`](Self::write_to) to finalise the +/// result (recalculates the Adler32 checksum). +pub struct DexEditor { + data: Vec, +} + +impl DexEditor { + pub fn from_file(path: &Path) -> Result { + let data = fs::read(path) + .map_err(|e| DexError::DexFileError(format!("read {}: {e}", path.display())))?; + Self::from_bytes(data) + } + + pub fn from_bytes(data: Vec) -> Result { + if data.len() < 0x70 { + return Err(DexError::TruncatedFile); + } + if &data[0..4] != b"dex\n" { + return Err(DexError::BadFileMagic); + } + Ok(Self { data }) + } + + /// Patch `ClassDef.access_flags` for the class identified by its descriptor. + /// + /// Accepts dotted names (`com.example.Foo`), slash form (`com/example/Foo`), + /// or full descriptor form (`Lcom/example/Foo;`). + pub fn set_class_access_flags(&mut self, class_desc: &str, flags: u32) -> Result<()> { + let h = CachedHeader::from_data(&self.data)?; + let cd_off = resolve_class_def_off(&self.data, &h, class_desc)?; + // access_flags is at offset 4: class_idx(2) + pad(2) + write_u32(&mut self.data, cd_off + 4, flags); + Ok(()) + } + + /// Re-encode `access_flags` for the named method inside `class_desc`. + /// + /// Handles LEB128 width changes by splicing bytes in the `class_data_item`. + pub fn set_method_access_flags( + &mut self, + class_desc: &str, + method_name: &str, + flags: u32, + ) -> Result<()> { + let h = CachedHeader::from_data(&self.data)?; + let cd_off = resolve_class_def_off(&self.data, &h, class_desc)?; + + // class_data_off at offset 24: class_idx(2)+pad(2)+flags(4)+super(2)+pad(2)+ifaces(4)+src(4)+ann(4) + let class_data_off = read_u32(&self.data, cd_off + 24) as usize; + if class_data_off == 0 { + return Err(DexError::DexFileError(format!( + "class {class_desc} has no class_data_item" + ))); + } + + let target_mutf8 = str_to_mutf8(method_name); // includes null terminator + + let mut pos = class_data_off; + let static_fields = skip_uleb128(&self.data, &mut pos)?; + let instance_fields = skip_uleb128(&self.data, &mut pos)?; + let direct_methods = skip_uleb128(&self.data, &mut pos)?; + let virtual_methods = skip_uleb128(&self.data, &mut pos)?; + + for _ in 0..static_fields + instance_fields { + skip_uleb128(&self.data, &mut pos)?; // field_idx_diff + skip_uleb128(&self.data, &mut pos)?; // access_flags + } + + let total_methods = direct_methods + virtual_methods; + let mut method_idx: u32 = 0; + + for _ in 0..total_methods { + let diff = skip_uleb128(&self.data, &mut pos)?; + method_idx = method_idx + .checked_add(diff) + .ok_or_else(|| DexError::DexFileError("method index overflow".into()))?; + + let flags_start = pos; + skip_uleb128(&self.data, &mut pos)?; // old access_flags + let flags_end = pos; + skip_uleb128(&self.data, &mut pos)?; // code_off + + if method_name_matches_mutf8(&self.data, &h, method_idx, &target_mutf8)? { + let new_encoded = encode_uleb128(flags); + self.data.splice(flags_start..flags_end, new_encoded); + return Ok(()); + } + } + + Err(DexError::DexFileError(format!( + "method '{method_name}' not found in {class_desc}" + ))) + } + + /// Zero out the `HiddenapiClassData` section (if present) and remove its map entry. + pub fn clear_hiddenapi_flags(&mut self) -> Result<()> { + let map_off = CachedHeader::from_data(&self.data)?.map_off as usize; + if map_off + 4 > self.data.len() { + return Err(DexError::TruncatedFile); + } + + let count = read_u32(&self.data, map_off) as usize; + let items_start = map_off + 4; + const MAP_ITEM_SIZE: usize = mem::size_of::(); // 12 bytes + + let hiddenapi_pos = (0..count).find(|&i| { + let type_val = read_u16(&self.data, items_start + i * MAP_ITEM_SIZE); + type_val == MapItemType::HiddenapiClassData as u16 + }); + + let pos = match hiddenapi_pos { + Some(p) => p, + None => return Ok(()), // section not present + }; + + let item_off = items_start + pos * MAP_ITEM_SIZE; + // MapItem: type_(2) + unused_(2) + size(4) + off(4); off is at byte 8 + let section_off = read_u32(&self.data, item_off + 8) as usize; + + // The first u32 of the section is its total byte size + let section_bytes = if section_off + 4 <= self.data.len() { + read_u32(&self.data, section_off) as usize + } else { + 0 + }; + let section_end = section_off.saturating_add(section_bytes).min(self.data.len()); + self.data[section_off..section_end].fill(0); + + // Remove map item by shifting subsequent items left + let tail_start = item_off + MAP_ITEM_SIZE; + let tail_end = items_start + count * MAP_ITEM_SIZE; + if tail_start < tail_end { + self.data.copy_within(tail_start..tail_end, item_off); + } + // Zero the now-unused last slot + let last_slot = items_start + (count - 1) * MAP_ITEM_SIZE; + if last_slot + MAP_ITEM_SIZE <= self.data.len() { + self.data[last_slot..last_slot + MAP_ITEM_SIZE].fill(0); + } + // Decrement map list count + write_u32(&mut self.data, map_off, (count - 1) as u32); + + Ok(()) + } + + /// Rename a class: replaces `old_desc` with `new_desc` in the string pool and + /// updates all cross-references. + /// + /// Performs a full string-pool rebuild when the MUTF-8 byte lengths differ. + pub fn rename_class(&mut self, old_desc: &str, new_desc: &str) -> Result<()> { + let old_mutf8 = descriptor_mutf8(old_desc); + let new_mutf8 = descriptor_mutf8(new_desc); + + // str_to_mutf8 includes a null terminator; strip it for content comparisons + let old_bytes = &old_mutf8[..old_mutf8.len() - 1]; + let new_bytes = &new_mutf8[..new_mutf8.len() - 1]; + + if old_bytes == new_bytes { + return Ok(()); + } + + if old_bytes.len() == new_bytes.len() { + self.rename_inplace(old_bytes, new_bytes) + } else { + self.rename_rebuild(old_bytes, new_bytes) + } + } + + /// Recalculate the Adler32 checksum and return the finalised bytes. + pub fn build(mut self) -> Result> { + update_checksum(&mut self.data); + Ok(self.data) + } + + /// Finalise and write to `path`. + pub fn write_to(self, path: &Path) -> Result<()> { + let data = self.build()?; + fs::write(path, &data) + .map_err(|e| DexError::DexFileError(format!("write {}: {e}", path.display()))) + } + + // -- private methods ------------------------------------------------------- + + /// Same MUTF-8 byte length: patch in place, re-sort string_ids if needed. + fn rename_inplace(&mut self, old_bytes: &[u8], new_bytes: &[u8]) -> Result<()> { + let h = CachedHeader::from_data(&self.data)?; + + let string_idx = find_string_idx(&self.data, &h, old_bytes) + .ok_or_else(|| DexError::DexFileError("class descriptor not in string pool".into()))?; + + let id_off = h.string_ids_off as usize + string_idx as usize * 4; + let str_data_off = read_u32(&self.data, id_off) as usize; + + let mut pos = str_data_off; + let old_utf16_len = skip_uleb128(&self.data, &mut pos)?; + let leb_size = pos - str_data_off; + let content_start = pos; + + // Recompute utf16_len for the new string (may differ for non-ASCII) + let new_utf16_len = mutf8_len(new_bytes, new_bytes.len())? as u32; + let new_leb = encode_uleb128(new_utf16_len); + + if new_leb.len() != leb_size { + // ULEB128 header size changed; fall back to full rebuild + return self.rename_rebuild(old_bytes, new_bytes); + } + + if new_utf16_len != old_utf16_len { + self.data[str_data_off..str_data_off + leb_size].copy_from_slice(&new_leb); + } + self.data[content_start..content_start + new_bytes.len()].copy_from_slice(new_bytes); + // null terminator already in place (lengths are equal) + + // Re-sort string_ids if the new content is out of lexicographic order + if string_out_of_order(&self.data, &h, string_idx, new_bytes)? { + resort_and_remap(&mut self.data, &h)?; + } + + Ok(()) + } + + /// Different MUTF-8 byte length: full string-pool rebuild. + fn rename_rebuild(&mut self, old_bytes: &[u8], new_bytes: &[u8]) -> Result<()> { + let h = CachedHeader::from_data(&self.data)?; + let count = h.string_ids_size as usize; + + // Collect all strings (indexed by current string_ids position = sorted order) + let mut strings: Vec> = collect_strings(&self.data, &h)?; + + let old_string_idx = strings + .iter() + .position(|s| s.as_slice() == old_bytes) + .ok_or_else(|| { + DexError::DexFileError("class descriptor not in string pool".into()) + })?; + strings[old_string_idx] = new_bytes.to_vec(); + + // Compute new sorted order + let mut sorted_indices: Vec = (0..count).collect(); + sorted_indices.sort_by(|&a, &b| strings[a].cmp(&strings[b])); + // sorted_indices[new_pos] = orig_pos + + // old_to_new[orig_pos] = new_pos + let mut old_to_new = vec![0u32; count]; + for (new_pos, &orig_pos) in sorted_indices.iter().enumerate() { + old_to_new[orig_pos] = new_pos as u32; + } + + // Find the string_data section bounds + let (old_section_start, old_section_end) = + find_string_data_section_bounds(&self.data, &h)?; + + // Build the new string_data section; new_offsets[new_pos] = file offset + let mut new_section: Vec = Vec::new(); + let mut new_offsets = vec![0u32; count]; + + for (new_pos, &orig_pos) in sorted_indices.iter().enumerate() { + new_offsets[new_pos] = (old_section_start + new_section.len()) as u32; + let bytes = &strings[orig_pos]; + let utf16_len = mutf8_len(bytes, bytes.len())? as u32; + new_section.extend_from_slice(&encode_uleb128(utf16_len)); + new_section.extend_from_slice(bytes); + new_section.push(0); + } + + let delta = new_section.len() as i64 - (old_section_end - old_section_start) as i64; + + // Splice in the new string_data bytes + self.data + .splice(old_section_start..old_section_end, new_section); + + // Shift all file offsets that were past the old section end + adjust_offsets(&mut self.data, old_section_end, delta); + + // Rewrite string_ids with new file offsets + let ids_off = CachedHeader::from_data(&self.data)?.string_ids_off as usize; + for (new_pos, &file_off) in new_offsets.iter().enumerate() { + write_u32(&mut self.data, ids_off + new_pos * 4, file_off); + } + + // Remap all string_idx cross-references + let h2 = CachedHeader::from_data(&self.data)?; + remap_string_refs(&mut self.data, &h2, &old_to_new); + + // Update file_size and data_size in the header + let new_file_size = self.data.len() as u32; + write_u32(&mut self.data, 32, new_file_size); // header offset 32 = file_size + let data_off = read_u32(&self.data, 108); // header offset 108 = data_off + if data_off != 0 && new_file_size >= data_off { + write_u32(&mut self.data, 104, new_file_size - data_off); // offset 104 = data_size + } + + Ok(()) + } +} + +// -- free helpers -------------------------------------------------------------- + +/// Normalise a class name to its full DEX descriptor form (`Lcom/example/Foo;`). +fn to_descriptor(name: &str) -> String { + if name.starts_with('L') && name.ends_with(';') { + name.to_string() + } else { + format!("L{};", name.replace('.', "/")) + } +} + +/// Return the MUTF-8 encoding (with null terminator) of a class descriptor. +fn descriptor_mutf8(name: &str) -> Vec { + str_to_mutf8(&to_descriptor(name)) +} + +/// Find the position in `string_ids` whose string content equals `target` (without null). +fn find_string_idx(data: &[u8], h: &CachedHeader, target: &[u8]) -> Option { + let ids_off = h.string_ids_off as usize; + (0..h.string_ids_size).find(|&i| { + let data_off = read_u32(data, ids_off + i as usize * 4) as usize; + read_string_bytes(data, data_off).as_deref() == Some(target) + }) +} + +/// Read the MUTF-8 bytes (without null) for the string_data_item at `data_off`. +fn read_string_bytes(data: &[u8], data_off: usize) -> Option> { + let mut pos = data_off; + skip_uleb128(data, &mut pos).ok()?; + let start = pos; + while pos < data.len() && data[pos] != 0 { + pos += 1; + } + Some(data[start..pos].to_vec()) +} + +/// Find the type_ids index whose `descriptor_idx` equals `string_idx`. +fn find_type_idx(data: &[u8], h: &CachedHeader, string_idx: u32) -> Option { + let ids_off = h.type_ids_off as usize; + (0..h.type_ids_size as usize).find_map(|i| { + let sidx = read_u32(data, ids_off + i * 4); + (sidx == string_idx).then_some(i as u16) + }) +} + +/// Return the byte offset of the `ClassDef` for `type_idx`, or `None`. +fn find_class_def_off(data: &[u8], h: &CachedHeader, type_idx: u16) -> Option { + const CLASS_DEF_SIZE: usize = 32; + let defs_off = h.class_defs_off as usize; + (0..h.class_defs_size as usize).find_map(|i| { + let off = defs_off + i * CLASS_DEF_SIZE; + (read_u16(data, off) == type_idx).then_some(off) + }) +} + +/// Resolve a class descriptor -> byte offset of its `ClassDef`. +fn resolve_class_def_off(data: &[u8], h: &CachedHeader, class_desc: &str) -> Result { + let mutf8 = descriptor_mutf8(class_desc); + let content = &mutf8[..mutf8.len() - 1]; // strip null for comparison + + let string_idx = find_string_idx(data, h, content) + .ok_or_else(|| DexError::DexFileError(format!("string not found: {class_desc}")))?; + let type_idx = find_type_idx(data, h, string_idx) + .ok_or_else(|| DexError::DexFileError(format!("type not found: {class_desc}")))?; + find_class_def_off(data, h, type_idx) + .ok_or_else(|| DexError::DexFileError(format!("class not found: {class_desc}"))) +} + +/// Return `true` if method `method_idx` has the MUTF-8 name `target_mutf8` (with null). +fn method_name_matches_mutf8( + data: &[u8], + h: &CachedHeader, + method_idx: u32, + target_mutf8: &[u8], +) -> Result { + if method_idx >= h.method_ids_size { + return Ok(false); + } + // MethodId layout: class_idx(2) + proto_idx(2) + name_idx(4); name_idx at offset 4 + let mid_off = h.method_ids_off as usize + method_idx as usize * 8; + let name_idx = read_u32(data, mid_off + 4); + if name_idx >= h.string_ids_size { + return Ok(false); + } + let str_data_off = read_u32(data, h.string_ids_off as usize + name_idx as usize * 4) as usize; + let mut pos = str_data_off; + skip_uleb128(data, &mut pos)?; + Ok(data.get(pos..pos + target_mutf8.len()) == Some(target_mutf8)) +} + +/// Collect all string content bytes (no null) indexed by `string_ids` position. +fn collect_strings(data: &[u8], h: &CachedHeader) -> Result>> { + (0..h.string_ids_size as usize) + .map(|i| { + let id_off = h.string_ids_off as usize + i * 4; + let str_data_off = read_u32(data, id_off) as usize; + read_string_bytes(data, str_data_off) + .ok_or_else(|| DexError::DexFileError("truncated string data".into())) + }) + .collect() +} + +/// Find the [start, end) byte range of the string_data section. +fn find_string_data_section_bounds(data: &[u8], h: &CachedHeader) -> Result<(usize, usize)> { + if h.string_ids_size == 0 { + return Err(DexError::DexFileError("DEX has no strings".into())); + } + let mut min_off = usize::MAX; + let mut max_off: usize = 0; + for i in 0..h.string_ids_size as usize { + let id_off = h.string_ids_off as usize + i * 4; + let off = read_u32(data, id_off) as usize; + min_off = min_off.min(off); + max_off = max_off.max(off); + } + // Compute the end of the last string_data_item + let end = { + let mut pos = max_off; + skip_uleb128(data, &mut pos) + .map_err(|_| DexError::DexFileError("truncated string data".into()))?; + while pos < data.len() && data[pos] != 0 { + pos += 1; + } + if pos >= data.len() { + return Err(DexError::DexFileError("unterminated string".into())); + } + pos + 1 // include null terminator + }; + Ok((min_off, end)) +} + +/// Check whether `string_idx`'s new content is out of lexicographic order. +fn string_out_of_order( + data: &[u8], + h: &CachedHeader, + string_idx: u32, + new_bytes: &[u8], +) -> Result { + let idx = string_idx as usize; + let n = h.string_ids_size as usize; + let ids_off = h.string_ids_off as usize; + + if idx > 0 { + let prev_off = read_u32(data, ids_off + (idx - 1) * 4) as usize; + let prev = read_string_bytes(data, prev_off) + .ok_or_else(|| DexError::DexFileError("bad string data".into()))?; + if new_bytes < prev.as_slice() { + return Ok(true); + } + } + if idx + 1 < n { + let next_off = read_u32(data, ids_off + (idx + 1) * 4) as usize; + let next = read_string_bytes(data, next_off) + .ok_or_else(|| DexError::DexFileError("bad string data".into()))?; + if new_bytes > next.as_slice() { + return Ok(true); + } + } + Ok(false) +} + +/// Re-sort the `string_ids` array by string content and remap all cross-references. +fn resort_and_remap(data: &mut [u8], h: &CachedHeader) -> Result<()> { + let n = h.string_ids_size as usize; + let ids_off = h.string_ids_off as usize; + + // Collect (original_pos, file_off, content_bytes) + let mut entries: Vec<(usize, u32, Vec)> = (0..n) + .map(|i| { + let file_off = read_u32(data, ids_off + i * 4); + let bytes = read_string_bytes(data, file_off as usize).unwrap_or_default(); + (i, file_off, bytes) + }) + .collect(); + + entries.sort_by(|(_, _, a), (_, _, b)| a.cmp(b)); + // entries[new_pos] = (orig_pos, file_off, bytes) + + let mut old_to_new = vec![0u32; n]; + for (new_pos, (orig_pos, _, _)) in entries.iter().enumerate() { + old_to_new[*orig_pos] = new_pos as u32; + } + + for (new_pos, (_, file_off, _)) in entries.iter().enumerate() { + write_u32(data, ids_off + new_pos * 4, *file_off); + } + + remap_string_refs(data, h, &old_to_new); + Ok(()) +} + +/// Update all u32 string_idx references using `old_to_new[old_idx] = new_idx`. +/// +/// Tables: type_ids (descriptor_idx), proto_ids (shorty_idx), +/// method_ids (name_idx), field_ids (name_idx), class_defs (source_file_idx). +fn remap_string_refs(data: &mut [u8], h: &CachedHeader, old_to_new: &[u32]) { + let remap = |v: u32| old_to_new.get(v as usize).copied().unwrap_or(v); + + // type_ids: descriptor_idx (u32) at offset 0, 4 bytes per entry + for i in 0..h.type_ids_size as usize { + let off = h.type_ids_off as usize + i * 4; + let v = read_u32(data, off); + write_u32(data, off, remap(v)); + } + // proto_ids: shorty_idx (u32) at offset 0, 12 bytes per entry + for i in 0..h.proto_ids_size as usize { + let off = h.proto_ids_off as usize + i * 12; + let v = read_u32(data, off); + write_u32(data, off, remap(v)); + } + // method_ids: name_idx (u32) at offset 4, 8 bytes per entry + for i in 0..h.method_ids_size as usize { + let off = h.method_ids_off as usize + i * 8 + 4; + let v = read_u32(data, off); + write_u32(data, off, remap(v)); + } + // field_ids: name_idx (u32) at offset 4, 8 bytes per entry + for i in 0..h.field_ids_size as usize { + let off = h.field_ids_off as usize + i * 8 + 4; + let v = read_u32(data, off); + write_u32(data, off, remap(v)); + } + // class_defs: source_file_idx (u32) at offset 16, 32 bytes per entry + for i in 0..h.class_defs_size as usize { + let off = h.class_defs_off as usize + i * 32 + 16; + let v = read_u32(data, off); + if v != u32::MAX { + write_u32(data, off, remap(v)); + } + } +} + +/// Shift all stored file offsets >= `threshold` by `delta`. +/// +/// Adjusts: header offset fields, map list entries, ClassDef offset fields, +/// and CodeItem debug_info_off fields. Call after splicing bytes so that the +/// threshold equals the byte position after the last unchanged byte. +fn adjust_offsets(data: &mut [u8], threshold: usize, delta: i64) { + if delta == 0 { + return; + } + let threshold = threshold as u32; + let adjust = |v: u32| -> u32 { + if v != 0 && v >= threshold { + (v as i64 + delta) as u32 + } else { + v + } + }; + + // Header offset fields (byte offset -> field): + // 48=link_off, 52=map_off, 60=string_ids_off, 68=type_ids_off, + // 76=proto_ids_off, 84=field_ids_off, 92=method_ids_off, 100=class_defs_off, 108=data_off + const OFFSET_FIELDS: &[usize] = &[48, 52, 60, 68, 76, 84, 92, 100, 108]; + for &byte_off in OFFSET_FIELDS { + if byte_off + 4 <= data.len() { + let v = read_u32(data, byte_off); + write_u32(data, byte_off, adjust(v)); + } + } + + // Fix file_size at byte 32 + write_u32(data, 32, data.len() as u32); + + // Adjust map list entries (read map_off after the header adjustment above) + let map_off = read_u32(data, 52) as usize; + if map_off + 4 > data.len() { + return; + } + let count = read_u32(data, map_off) as usize; + const MAP_ITEM_SIZE: usize = mem::size_of::(); // 12 + for i in 0..count { + // MapItem: type_(2) + unused_(2) + size(4) + off(4); off field at byte 8 within item + let off_field = map_off + 4 + i * MAP_ITEM_SIZE + 8; + if off_field + 4 <= data.len() { + let v = read_u32(data, off_field); + write_u32(data, off_field, adjust(v)); + } + } + + // Adjust ClassDef offset fields within each ClassDef (32 bytes each): + // interfaces_off @ +12, annotations_off @ +20, class_data_off @ +24, static_values_off @ +28 + let class_defs_off = read_u32(data, 100) as usize; + let class_defs_size = read_u32(data, 96) as usize; + const CLASS_DEF_SIZE: usize = 32; + const CLASS_DEF_OFFSET_FIELDS: &[usize] = &[12, 20, 24, 28]; + for i in 0..class_defs_size { + let def_base = class_defs_off + i * CLASS_DEF_SIZE; + for &rel_off in CLASS_DEF_OFFSET_FIELDS { + let abs_off = def_base + rel_off; + if abs_off + 4 <= data.len() { + let v = read_u32(data, abs_off); + write_u32(data, abs_off, adjust(v)); + } + } + } + + // Adjust debug_info_off in CodeItems (variable-size, located via map list). + // CodeItem layout: registers(2)+ins(2)+outs(2)+tries(2)+debug_info_off(4)+insns_size(4)+insns[...] + // We need to find code items via the map list entry (type 0x2001 = CODE_ITEM). + const CODE_ITEM_TYPE: u16 = 0x2001; + // Re-read map after adjustments above + if map_off + 4 > data.len() { + return; + } + let map_count = read_u32(data, map_off) as usize; + for i in 0..map_count { + let item_off = map_off + 4 + i * MAP_ITEM_SIZE; + if item_off + MAP_ITEM_SIZE > data.len() { + break; + } + let item_type = u16::from_le_bytes([data[item_off], data[item_off + 1]]); + if item_type != CODE_ITEM_TYPE { + continue; + } + let code_items_count = read_u32(data, item_off + 4) as usize; + let mut code_item_off = read_u32(data, item_off + 8) as usize; + for _ in 0..code_items_count { + if code_item_off + 16 > data.len() { + break; + } + // debug_info_off at byte 8 within CodeItem + let dbg_field = code_item_off + 8; + let v = read_u32(data, dbg_field); + write_u32(data, dbg_field, adjust(v)); + + // Advance to next CodeItem: header(16) + insns_size*2 bytes, aligned to 4 + let insns_size = read_u32(data, code_item_off + 12) as usize; + let raw_next = code_item_off + 16 + insns_size * 2; + // align up to 4 + code_item_off = (raw_next + 3) & !3; + } + break; // only one CODE_ITEM map entry + } +} + +// --- Tests ------------------------------------------------------------------- + +// --- Tests ------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::file::{verifier::VerifyPreset, DexFile, DexLocation}; + + const PRIME: &[u8] = include_bytes!("../../tests/prime/prime.dex"); + const FIB: &[u8] = include_bytes!("../../tests/fibonacci/fib.dex"); + + fn prime() -> DexEditor { DexEditor::from_bytes(PRIME.to_vec()).unwrap() } + + // -- from_bytes ----------------------------------------------------------- + + #[test] + fn from_bytes_valid() { let _ = prime(); } + + #[test] + fn from_bytes_invalid_magic_errors() { + assert!(DexEditor::from_bytes(b"not a dex file".to_vec()).is_err()); + } + + #[test] + fn from_bytes_too_short_errors() { + assert!(DexEditor::from_bytes(vec![0u8; 10]).is_err()); + } + + // -- set_class_access_flags ----------------------------------------------- + + #[test] + fn set_class_flags_roundtrip() { + let mut ed = prime(); + ed.set_class_access_flags("Lprime/prime;", 0x0011).unwrap(); + let bytes = ed.build().unwrap(); + let dex = DexFile::from_raw_parts(&bytes, DexLocation::InMemory).unwrap(); + assert_eq!(dex.get_class_def(0).unwrap().access_flags, 0x0011); + } + + #[test] + fn set_class_flags_dotted_name() { + let mut ed = prime(); + ed.set_class_access_flags("prime.prime", 0x0001).unwrap(); + let bytes = ed.build().unwrap(); + let dex = DexFile::from_raw_parts(&bytes, DexLocation::InMemory).unwrap(); + assert_eq!(dex.get_class_def(0).unwrap().access_flags, 0x0001); + } + + #[test] + fn set_class_flags_unknown_class_errors() { + let mut ed = prime(); + assert!(ed.set_class_access_flags("Lno/such/Class;", 0x0001).is_err()); + } + + // -- set_method_access_flags ---------------------------------------------- + + #[test] + fn set_method_flags_main() { + let mut ed = prime(); + ed.set_method_access_flags("Lprime/prime;", "main", 0x0009).unwrap(); + let bytes = ed.build().unwrap(); + assert!(!bytes.is_empty()); + } + + #[test] + fn set_method_flags_unknown_method_errors() { + let mut ed = prime(); + assert!(ed.set_method_access_flags("Lprime/prime;", "noSuch", 0x0001).is_err()); + } + + // -- rename_class --------------------------------------------------------- + + #[test] + fn rename_same_length_roundtrip() { + let mut ed = prime(); + ed.rename_class("Lprime/prime;", "Lprime/other;").unwrap(); + let bytes = ed.build().unwrap(); + let dex = DexFile::from_raw_parts(&bytes, DexLocation::InMemory).unwrap(); + let desc = dex.get_type_desc_utf16_at(dex.get_class_def(0).unwrap().class_idx).unwrap(); + assert_eq!(desc, "Lprime/other;"); + } + + #[test] + fn rename_different_length_roundtrip() { + let mut ed = prime(); + ed.rename_class("Lprime/prime;", "Lprime/renamed;").unwrap(); + let bytes = ed.build().unwrap(); + let dex = DexFile::from_raw_parts(&bytes, DexLocation::InMemory).unwrap(); + let desc = dex.get_type_desc_utf16_at(dex.get_class_def(0).unwrap().class_idx).unwrap(); + assert_eq!(desc, "Lprime/renamed;"); + } + + #[test] + fn rename_verifies_checksum() { + let mut ed = prime(); + ed.rename_class("Lprime/prime;", "Lprime/renamed;").unwrap(); + let bytes = ed.build().unwrap(); + let dex = DexFile::from_raw_parts(&bytes, DexLocation::InMemory).unwrap(); + DexFile::verify(&dex, VerifyPreset::ChecksumOnly).unwrap(); + } + + #[test] + fn rename_unknown_class_errors() { + let mut ed = prime(); + assert!(ed.rename_class("Lno/such/Class;", "Lnew/name;").is_err()); + } + + // -- clear_hiddenapi ------------------------------------------------------ + + #[test] + fn clear_hiddenapi_noop_on_plain_dex() { + let mut ed = prime(); + let _ = ed.clear_hiddenapi_flags(); + ed.build().unwrap(); + } + + // -- build ---------------------------------------------------------------- + + #[test] + fn build_preserves_size_for_no_op() { + let bytes = prime().build().unwrap(); + assert_eq!(bytes.len(), PRIME.len()); + } + + #[test] + fn multiple_mutations_chained() { + let mut ed = prime(); + ed.set_class_access_flags("Lprime/prime;", 0x0011).unwrap(); + ed.set_method_access_flags("Lprime/prime;", "main", 0x0009).unwrap(); + ed.rename_class("Lprime/prime;", "Lprime/renamed;").unwrap(); + let bytes = ed.build().unwrap(); + let dex = DexFile::from_raw_parts(&bytes, DexLocation::InMemory).unwrap(); + let cd = dex.get_class_def(0).unwrap(); + assert_eq!(cd.access_flags, 0x0011); + assert_eq!(dex.get_type_desc_utf16_at(cd.class_idx).unwrap(), "Lprime/renamed;"); + } + + #[test] + fn fib_roundtrip() { + let mut ed = DexEditor::from_bytes(FIB.to_vec()).unwrap(); + ed.set_class_access_flags("Lfibonacci/fib;", 0x0001).unwrap(); + DexFile::from_raw_parts(&ed.build().unwrap(), DexLocation::InMemory).unwrap(); + } +} diff --git a/src/file/instruction.rs b/src/file/instruction.rs index f33b8c2..bc8efeb 100644 --- a/src/file/instruction.rs +++ b/src/file/instruction.rs @@ -298,6 +298,7 @@ define_formats!( // ---------------------------------------------------------------------------- macro_rules! define_index_types { ($($index_ty:tt|)*) => { + #[derive(Debug)] pub enum IndexType { $($index_ty,)* } @@ -1138,10 +1139,10 @@ macro_rules! insn_desc_table { } #[cfg(feature = "python")] - impl Into for PyDexCode { + impl From for Code { #[inline] - fn into(self) -> Code { - Instruction::opcode_of(self as u8 as u16) + fn from(val: PyDexCode) -> Self { + Instruction::opcode_of(val as u8 as u16) } } diff --git a/src/file/ir.rs b/src/file/ir.rs new file mode 100644 index 0000000..6f5e060 --- /dev/null +++ b/src/file/ir.rs @@ -0,0 +1,528 @@ +//! Intermediate representation (IR) for DEX files. +//! +//! The IR stores everything symbolically: class/type/field/method names are kept +//! as plain `String`s, and integer pool indices are assigned only at write time by +//! [`crate::file::writer::DexWriter`]. This makes the representation trivially +//! composable — add a class, add a method, splice in instructions — without having +//! to maintain cross-references by hand. +//! +//! # Quick start +//! +//! ```rust +//! use dexrs::file::ir::{DexIr, ClassDef, MethodDef, ProtoKey, CodeDef}; +//! use dexrs::file::builder::CodeBuilder; +//! use dexrs::file::writer::DexWriter; +//! use dexrs::file::modifiers::{ACC_PUBLIC, ACC_STATIC}; +//! +//! let mut ir = DexIr::new(35); +//! let mut class = ClassDef::new("Lhello/World;") +//! .access(ACC_PUBLIC) +//! .superclass("Ljava/lang/Object;"); +//! +//! let mut code = CodeBuilder::new(3, 1, 2); +//! code.emit(r#"sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;"#).unwrap(); +//! code.emit(r#"const-string v1, "Hello, World!""#).unwrap(); +//! code.emit(r#"invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V"#).unwrap(); +//! code.emit("return-void").unwrap(); +//! +//! class.add_direct_method( +//! MethodDef::new("main", ProtoKey::new("V", ["[Ljava/lang/String;"])) +//! .access(ACC_PUBLIC | ACC_STATIC) +//! .code(code.build().unwrap()), +//! ); +//! +//! ir.add_class(class); +//! let bytes = DexWriter::write(ir).unwrap(); +//! ``` + +use crate::file::instruction::Code; + +// -- Proto key ----------------------------------------------------------------- + +/// Key identifying a method prototype. +/// +/// Sorted by return type first, then parameter types lexicographically — the +/// same ordering the DEX spec requires for `proto_ids`. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct ProtoKey { + /// Return type descriptor, e.g. `"V"`, `"I"`, `"Ljava/lang/String;"`. + pub return_type: String, + /// Parameter type descriptors in order. + pub params: Vec, +} + +impl PartialOrd for ProtoKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for ProtoKey { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + let rt = self.return_type.cmp(&other.return_type); + if rt != std::cmp::Ordering::Equal { + return rt; + } + for (a, b) in self.params.iter().zip(other.params.iter()) { + let c = a.cmp(b); + if c != std::cmp::Ordering::Equal { + return c; + } + } + self.params.len().cmp(&other.params.len()) + } +} + +impl ProtoKey { + /// Construct a prototype from a return type and zero or more parameter types. + pub fn new( + return_type: impl Into, + params: impl IntoIterator>, + ) -> Self { + Self { + return_type: return_type.into(), + params: params.into_iter().map(|s| s.into()).collect(), + } + } + + /// Compute the shorty descriptor string, e.g. `"VI"` for `(I)V`. + /// + /// Array types (`[...`) and object types (`L...;`) both map to `'L'`. + pub fn shorty(&self) -> String { + let mut s = String::with_capacity(1 + self.params.len()); + s.push(shorty_char(&self.return_type)); + for p in &self.params { + s.push(shorty_char(p)); + } + s + } + + /// Parse a JVM-style method descriptor `"(Ljava/lang/String;I)V"` into a `ProtoKey`. + pub fn from_descriptor(desc: &str) -> Option { + let start = desc.find('(')?; + let end = desc.find(')')?; + let params_str = &desc[start + 1..end]; + let return_str = &desc[end + 1..]; + let params = parse_type_list(params_str); + Some(Self::new(return_str, params)) + } +} + +/// Map a single DEX type descriptor to its shorty character. +pub(crate) fn shorty_char(desc: &str) -> char { + match desc.as_bytes().first() { + Some(b'[') => 'L', // array -> object shorty + Some(&c) => c as char, + None => 'V', + } +} + +/// Parse a sequence of DEX type descriptors (as found between `(` and `)` in a method +/// descriptor) into individual descriptors. +pub(crate) fn parse_type_list(mut s: &str) -> Vec { + let mut result = Vec::new(); + while !s.is_empty() { + let (desc, rest) = consume_one_type(s); + result.push(desc.to_string()); + s = rest; + } + result +} + +/// Consume exactly one DEX type descriptor from the front of `s`. +/// Returns `(descriptor, remainder)`. +pub(crate) fn consume_one_type(s: &str) -> (&str, &str) { + let bytes = s.as_bytes(); + match bytes.first() { + Some(b'[') => { + // Array: skip all leading '[' and then the element type + let mut i = 0; + while i < bytes.len() && bytes[i] == b'[' { + i += 1; + } + if i < bytes.len() && bytes[i] == b'L' { + // object array element + let end = s[i..].find(';').map(|p| i + p + 1).unwrap_or(s.len()); + (&s[..end], &s[end..]) + } else if i < bytes.len() { + // primitive array element + (&s[..i + 1], &s[i + 1..]) + } else { + (s, "") + } + } + Some(b'L') => { + // Object: read up to and including ';' + let end = s.find(';').map(|p| p + 1).unwrap_or(s.len()); + (&s[..end], &s[end..]) + } + Some(_) => (&s[..1], &s[1..]), // primitive + None => ("", ""), + } +} + +// -- Reference types ----------------------------------------------------------- + +/// A symbolic DEX reference used in instruction operands. +#[derive(Clone, Debug)] +pub enum DexRef { + /// A string literal (`const-string`). + String(String), + /// A type descriptor (`new-instance`, `check-cast`, `const-class`, etc.). + Type(String), + /// A field reference (`iget`, `iput`, `sget`, `sput`, etc.). + Field { + class: String, + name: String, + field_type: String, + }, + /// A method reference (`invoke-*`). + Method { + class: String, + name: String, + proto: ProtoKey, + }, + /// A method prototype reference (`invoke-polymorphic` second index). + Proto(ProtoKey), +} + +/// A branch target in a code item. +#[derive(Clone, Debug)] +pub enum BranchTarget { + /// A named label placed with [`CodeBuilder::label`]. + Label(String), + /// A raw PC-relative offset in code units. + Offset(i32), +} + +// -- Instruction node ---------------------------------------------------------- + +/// A single instruction in symbolic form (before index assignment or offset +/// resolution). +#[derive(Clone, Debug)] +pub struct InsnNode { + pub opcode: Code, + /// Register operands (vA, vB, …). Up to 5 for `k35c`. + pub regs: Vec, + /// Literal value for `const-*`, `add-int/lit*`, etc. + pub literal: i64, + /// Reference operand (string, type, field, method, or proto). + pub reference: Option, + /// Branch target for `goto`, `if-*`, etc. + pub target: Option, +} + +impl InsnNode { + pub fn new(opcode: Code) -> Self { + Self { + opcode, + regs: Vec::new(), + literal: 0, + reference: None, + target: None, + } + } + + pub fn with_regs(mut self, regs: impl IntoIterator) -> Self { + self.regs = regs.into_iter().collect(); + self + } + + pub fn with_literal(mut self, lit: i64) -> Self { + self.literal = lit; + self + } + + pub fn with_reference(mut self, r: DexRef) -> Self { + self.reference = Some(r); + self + } + + pub fn with_target(mut self, t: BranchTarget) -> Self { + self.target = Some(t); + self + } +} + +// -- Try/catch IR -------------------------------------------------------------- + +/// A single catch handler entry. +#[derive(Clone, Debug)] +pub struct CatchHandlerIr { + /// Exception type descriptor (None = catch-all). + pub type_desc: Option, + /// Handler address in code units. + pub address: u32, +} + +/// A try block. +#[derive(Clone, Debug)] +pub struct TryDef { + /// Start address in code units. + pub start: u32, + /// Number of instructions covered. + pub count: u16, + /// The catch handlers for this try block. + pub handlers: Vec, +} + +// -- Code item IR -------------------------------------------------------------- + +/// A code item in symbolic form: instructions are kept as [`InsnNode`]s with +/// unresolved pool references. [`crate::file::writer::DexWriter`] resolves +/// those references against the pool and encodes the instructions to `u16` +/// words during serialization. +/// +/// Produced by [`crate::file::builder::CodeBuilder::build`]. +#[derive(Clone, Debug)] +pub struct CodeDef { + pub registers: u16, + pub ins: u16, + pub outs: u16, + /// Symbolic instruction nodes. Branch offsets are already resolved to + /// [`BranchTarget::Offset`] by the builder; pool references remain as + /// symbolic [`DexRef`]s until the writer serializes them. + pub insns: Vec, + pub tries: Vec, +} + +impl CodeDef { + /// A trivial empty code body (a single `return-void`). + pub fn empty(registers: u16, ins: u16) -> Self { + Self { + registers, + ins, + outs: 0, + insns: vec![InsnNode::new(crate::file::instruction::Code::RETURN_VOID)], + tries: vec![], + } + } +} + +// -- Static field value IR ----------------------------------------------------- + +/// A static field initialiser value for `` encoded arrays. +#[derive(Clone, Debug)] +pub enum EncodedValueIr { + Byte(i8), + Short(i16), + Char(u16), + Int(i32), + Long(i64), + Float(f32), + Double(f64), + Boolean(bool), + String(String), + Type(String), + Null, +} + +// -- Field / Method definition ------------------------------------------------- + +/// A field declaration inside a class. +#[derive(Clone, Debug)] +pub struct FieldDef { + pub name: String, + pub field_type: String, + pub access_flags: u32, +} + +impl FieldDef { + pub fn new(name: impl Into, field_type: impl Into) -> Self { + Self { name: name.into(), field_type: field_type.into(), access_flags: 0 } + } + + pub fn access(mut self, flags: u32) -> Self { + self.access_flags = flags; + self + } +} + +/// A method declaration (possibly with a body) inside a class. +#[derive(Clone, Debug)] +pub struct MethodDef { + pub name: String, + pub proto: ProtoKey, + pub access_flags: u32, + pub code: Option, +} + +impl MethodDef { + pub fn new(name: impl Into, proto: ProtoKey) -> Self { + Self { name: name.into(), proto, access_flags: 0, code: None } + } + + pub fn access(mut self, flags: u32) -> Self { + self.access_flags = flags; + self + } + + pub fn code(mut self, code: CodeDef) -> Self { + self.code = Some(code); + self + } +} + +// -- Class definition ---------------------------------------------------------- + +/// A complete class definition, including all fields and methods. +#[derive(Clone, Debug)] +pub struct ClassDef { + /// Full DEX descriptor, e.g. `"Lcom/example/Foo;"`. + pub descriptor: String, + pub access_flags: u32, + /// Superclass descriptor (`None` means no explicit superclass). + pub superclass: Option, + /// Implemented interfaces (type descriptors). + pub interfaces: Vec, + /// Source file name (for debug info), e.g. `"Foo.java"`. + pub source_file: Option, + pub static_fields: Vec, + pub instance_fields: Vec, + /// Direct methods: ``, ``, and `static` / `private` methods. + pub direct_methods: Vec, + /// Virtual (overridable) methods. + pub virtual_methods: Vec, + /// Initial values for `static` fields (in field declaration order). + pub static_values: Vec, +} + +impl ClassDef { + /// Create a minimal class with no superclass, no methods, no fields. + pub fn new(descriptor: impl Into) -> Self { + Self { + descriptor: descriptor.into(), + access_flags: 0, + superclass: None, + interfaces: Vec::new(), + source_file: None, + static_fields: Vec::new(), + instance_fields: Vec::new(), + direct_methods: Vec::new(), + virtual_methods: Vec::new(), + static_values: Vec::new(), + } + } + + pub fn access(mut self, flags: u32) -> Self { + self.access_flags = flags; + self + } + + pub fn superclass(mut self, desc: impl Into) -> Self { + self.superclass = Some(desc.into()); + self + } + + pub fn interface(mut self, desc: impl Into) -> Self { + self.interfaces.push(desc.into()); + self + } + + pub fn source_file(mut self, name: impl Into) -> Self { + self.source_file = Some(name.into()); + self + } + + pub fn add_static_field(&mut self, f: FieldDef) -> &mut Self { + self.static_fields.push(f); + self + } + + pub fn add_instance_field(&mut self, f: FieldDef) -> &mut Self { + self.instance_fields.push(f); + self + } + + pub fn add_direct_method(&mut self, m: MethodDef) -> &mut Self { + self.direct_methods.push(m); + self + } + + pub fn add_virtual_method(&mut self, m: MethodDef) -> &mut Self { + self.virtual_methods.push(m); + self + } +} + +// -- Top-level DEX IR ---------------------------------------------------------- + +/// The complete DEX intermediate representation. +/// +/// Build it up with [`DexIr::add_class`], then hand it to +/// [`crate::file::writer::DexWriter::write`] to produce valid DEX bytes. +#[derive(Clone, Debug, Default)] +pub struct DexIr { + /// DEX version integer (e.g. `35` for `"035\0"`). + pub version: u32, + /// All class definitions. The writer will sort them and assign + /// indices in the required order. + pub classes: Vec, +} + +impl DexIr { + /// Create an empty DEX IR targeting the given version (typically `35`). + pub fn new(version: u32) -> Self { + Self { version, classes: Vec::new() } + } + + /// Add a class definition. + pub fn add_class(&mut self, class: ClassDef) { + self.classes.push(class); + } + + /// Return a mutable reference to the class with the given descriptor, if present. + pub fn get_class_mut(&mut self, descriptor: &str) -> Option<&mut ClassDef> { + self.classes.iter_mut().find(|c| c.descriptor == descriptor) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn proto_key_shorty_primitives() { + let p = ProtoKey::new("V", ["I", "J"]); + assert_eq!(p.shorty(), "VIJ"); + } + + #[test] + fn proto_key_shorty_objects_and_arrays() { + let p = ProtoKey::new("Ljava/lang/String;", ["Ljava/lang/Object;", "[I"]); + assert_eq!(p.shorty(), "LLL"); // return=L, Object->L, [I->L + } + + #[test] + fn proto_key_ord() { + let a = ProtoKey::new("V", [] as [&str; 0]); + let b = ProtoKey::new("V", ["I"]); + assert!(a < b); + } + + #[test] + fn proto_key_from_descriptor() { + let p = ProtoKey::from_descriptor("(Ljava/lang/String;I)V").unwrap(); + assert_eq!(p.return_type, "V"); + assert_eq!(p.params, vec!["Ljava/lang/String;", "I"]); + } + + #[test] + fn parse_type_list_mixed() { + let types = parse_type_list("[ILjava/lang/String;B"); + assert_eq!(types, vec!["[I", "Ljava/lang/String;", "B"]); + } + + #[test] + fn class_def_builder() { + let mut c = ClassDef::new("Lcom/example/Foo;") + .access(0x0001) + .superclass("Ljava/lang/Object;"); + c.add_direct_method( + MethodDef::new("", ProtoKey::new("V", [] as [&str; 0])) + .access(0x0001), + ); + assert_eq!(c.direct_methods.len(), 1); + assert_eq!(c.descriptor, "Lcom/example/Foo;"); + } +} diff --git a/src/file/mod.rs b/src/file/mod.rs index 272912a..a86bfa9 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -1,7 +1,6 @@ use std::fmt::Display; use memmap2::{Mmap, MmapMut}; -use plain::Plain; pub mod structs; pub use structs::*; @@ -23,8 +22,19 @@ pub mod annotations; pub use annotations::*; pub mod debug; pub use debug::*; - -use crate::{dex_err, error::DexError, leb128::decode_leb128, utf, Result}; +pub mod patch; +pub use patch::{patch_class_access_flags, patch_instruction_word, update_checksum}; +pub mod editor; +pub use editor::DexEditor; +pub mod ir; +pub use ir::{ + BranchTarget, ClassDef as IrClassDef, CodeDef, DexIr, DexRef, EncodedValueIr, FieldDef as IrFieldDef, + InsnNode, MethodDef as IrMethodDef, ProtoKey, TryDef, +}; +pub mod writer; +pub use writer::DexWriter; +pub mod builder; +pub use builder::{CodeBuilder, DexIrBuilder}; pub const DEX_MAGIC: &[u8] = b"dex\n"; pub const DEX_MAGIC_VERSIONS: &[&[u8]] = &[ @@ -50,6 +60,12 @@ impl From<&'static str> for DexLocation { } } +impl From for DexLocation { + fn from(s: String) -> Self { + DexLocation::Path(s) + } +} + impl Display for DexLocation { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -59,819 +75,25 @@ impl Display for DexLocation { } } -pub type InMemoryDexFile<'a> = DexFile<'a, InMemoryDexContainer<'a>>; +pub type InMemoryDexFile<'a> = DexFile<'a, &'a [u8]>; pub type MmapDexFile<'a> = DexFile<'a, Mmap>; pub type MmapMutDexFile<'a> = DexFile<'a, MmapMut>; -pub struct DexFile<'a, T: DexContainer<'a> = Mmap> { - mmap: &'a T, - header: &'a Header, - - string_ids: &'a [StringId], - type_ids: &'a [TypeId], - field_ids: &'a [FieldId], - proto_ids: &'a [ProtoId], - method_ids: &'a [MethodId], - class_defs: &'a [ClassDef], - method_handles: &'a [MethodHandleItem], - call_site_ids: &'a [CallSiteIdItem], - - hiddenapi_data: Option<&'a HiddenapiClassData<'a>>, - - location: DexLocation, -} +pub mod dex_file; +pub use dex_file::*; -macro_rules! check_lt_result { - ($idx:expr, $count:expr, $item_ty:tt) => { - if ($idx as usize) >= ($count as usize) { - return dex_err!(DexIndexError { - index: $idx as u32, - item_ty: stringify!($item_ty), - max: $count as usize, - }); - } - }; -} +pub mod compact_dex; +pub use compact_dex::{CDEX_MAGIC, CDEX_MAGIC_VERSIONS}; -// writer -impl<'a, C: DexContainerMut<'a>> DexFile<'a, C> { - //TODO -} - -macro_rules! fn_id { - ($name:ident, $attr:ident, $ret_ty:ty, $idx_ty:ty, $(#[$meta:meta])* ) => { - $(#[$meta])* - #[inline(always)] - pub fn $name(&self, idx: $idx_ty) -> Result<&'a $ret_ty> { - check_lt_result!(idx, self.$attr.len(), $ret_ty); - Ok(&self.$attr[idx as usize]) - } - }; - ($name:ident, $attr:ident, Option: $ret_ty:ty, $fallback:ident, $idx_ty:ident, $(#[$meta:meta])*) => { - $(#[$meta])* - #[inline(always)] - pub fn $name(&'a self, idx: $idx_ty) -> Result> { - match idx { - $idx_ty::MAX => Ok(None), - _=> Ok(Some(self.$fallback(idx)?)), - } - } - }; - ($name:ident, $attr:ident, $ret_ty:ty[], $(#[$meta:meta])* ) => { - $(#[$meta])* - #[inline(always)] - pub fn $name(&'a self) -> &'a [$ret_ty] { - &self.$attr - } - }; - ($name:ident, $attr:ident, Idx: $ref_ty:ty, $(#[$meta:meta])* ) => { - $(#[$meta])* - #[inline(always)] - pub fn $name(&'a self, item: &'a $ref_ty) -> Result { - self.offset_of(self.$attr, item) - } - } -} - -impl<'a, C: DexContainer<'a>> DexFile<'a, C> { - #[inline] - fn header_available(base: &'a C) -> bool { - let size = base.len(); - size >= std::mem::size_of::
() && plain::is_aligned::
(base) - } - - pub fn get_section(base: &'a C, offset: u32, len: u32) -> &'a [T] { - if len == 0 { - return &[]; - } - // sanity checks so that this funtion will always return a valid slice - let size = base.len(); - let section_size = len as usize * std::mem::size_of::(); - if (offset as usize + section_size) >= size || offset as usize >= size { - return &[]; - } - - let data = &base[offset as usize..]; - T::slice_from_bytes_len(data, len as usize).unwrap_or_default() - } - - pub fn from_raw_parts(base: &'a C, location: DexLocation) -> Result> { - if !DexFile::header_available(base) { - return dex_err!(TruncatedFile); - } +pub mod signature; +pub use signature::Signature; - let header = match Header::from_bytes(base) { - Ok(header) => header, - // REVISIT: we already checked the header - Err(_) => return dex_err!(TruncatedFile), - }; - let mut dex = Self { - mmap: base, - header, - string_ids: DexFile::get_section(base, header.string_ids_off, header.string_ids_size), - type_ids: DexFile::get_section(base, header.type_ids_off, header.type_ids_size), - field_ids: DexFile::get_section(base, header.field_ids_off, header.field_ids_size), - proto_ids: DexFile::get_section(base, header.proto_ids_off, header.proto_ids_size), - method_ids: DexFile::get_section(base, header.method_ids_off, header.method_ids_size), - class_defs: DexFile::get_section(base, header.class_defs_off, header.class_defs_size), - method_handles: &[], - call_site_ids: &[], - hiddenapi_data: None, - location, - }; - - dex.init_sections_from_maplist(); - Ok(dex) - } - - pub fn open_file(container: &'a DexFileContainer) -> Result> { - let loc = container.get_location(); - let size = container.data().len(); - if size < std::mem::size_of::
() { - return dex_err!(DexFileError, "Invalid or truncated file {:?}", loc); - } - - DexFile::open( - container.data(), - DexLocation::Path(loc.to_string()), - if container.verify_checksum { - // currenlty supports only checksum - verifier::VerifyPreset::ChecksumOnly - } else { - verifier::VerifyPreset::None - }, - ) - } - - pub fn open( - container: &'a C, - location: DexLocation, - verify_preset: verifier::VerifyPreset, - ) -> Result> { - let dex = DexFile::from_raw_parts(container, location)?; - dex.init()?; - if verify_preset != verifier::VerifyPreset::None { - DexFile::verify(&dex, verify_preset)?; - } - Ok(dex) - } - - pub fn expected_header_size(&self) -> u32 { - let version = self.header.get_version(); - if version != 0 { - if version < 41 { - std::mem::size_of::
() as u32 - } else { - std::mem::size_of::() as u32 - } - } else { - 0 - } - } - - pub fn get_location(&self) -> &DexLocation { - &self.location - } - - #[inline(always)] - pub fn file_size(&self) -> usize { - self.mmap.len() - } - - #[inline(always)] - pub fn get_header(&self) -> &'a Header { - self.header - } - - // ------------------------------------------------------------------------------ - // strings - // ------------------------------------------------------------------------------ - - // TODO: add docs - fn_id!(get_string_id, string_ids, StringId, u32,); - fn_id!(get_string_ids, string_ids, StringId[],); - fn_id! {get_string_id_opt, string_ids, Option: StringId, get_string_id, u32,} - fn_id! {string_id_idx, string_ids, Idx: StringId, } - - #[inline(always)] - pub fn num_string_ids(&self) -> u32 { - self.header.string_ids_size - } - - #[inline] - pub fn get_string_data(&self, string_id: &StringId) -> Result<(u32, &'a [u8])> { - check_lt_result!(string_id.offset(), self.file_size(), "string-id"); - let (utf16_len, size) = match decode_leb128(&self.mmap[string_id.offset()..]) { - Ok((utf16_len, size)) => (utf16_len, size), - Err(DexError::VarIntError(e)) => { - return dex_err!(BadStringData { - offset: string_id.offset(), - kind: e - }); - } - _ => unreachable!(), - }; - - let start = string_id.offset() + size; - check_lt_result!(start, self.file_size(), "string-data"); - match &self.mmap[start..].iter().position(|x| *x == 0) { - Some(pos) => Ok((utf16_len, &self.mmap[start..start + pos + 1])), - None => dex_err!(BadStringDataMissingNullByte, start), - } - } +pub mod type_lookup_table; +pub use type_lookup_table::TypeLookupTable; - #[inline] - pub unsafe fn fast_get_utf8_str(&self, string_id: &StringId) -> Result { - let (size, data) = self.get_string_data(string_id)?; - Ok(String::from_utf8_unchecked(data[0..size as usize].to_vec())) - } - - #[inline] - pub unsafe fn fast_get_utf8_str_at(&self, idx: u32) -> Result { - let string_id = self.get_string_id(idx)?; - self.fast_get_utf8_str(string_id) - } - - #[inline(always)] - pub fn get_utf16_str_lossy(&self, string_id: &StringId) -> Result { - let (_, data) = self.get_string_data(string_id)?; - utf::mutf8_to_str_lossy(data) - } - - #[inline(always)] - pub fn get_utf16_str_lossy_at(&self, idx: u32) -> Result { - let string_id = self.get_string_id(idx)?; - self.get_utf16_str_lossy(string_id) - } - - #[inline(always)] - pub fn get_utf16_str(&self, string_id: &StringId) -> Result { - let (_, data) = self.get_string_data(string_id)?; - crate::utf::mutf8_to_str(data) - } - - #[inline(always)] - pub fn get_utf16_str_at(&self, idx: StringIndex) -> Result { - let string_id = self.get_string_id(idx)?; - self.get_utf16_str(string_id) - } - - #[inline(always)] - pub fn get_utf16_str_opt_at(&self, idx: StringIndex) -> Result> { - match idx { - StringIndex::MAX => Ok(None), - _ => Ok(Some(self.get_utf16_str_at(idx)?)), - } - } - - // ------------------------------------------------------------------------------ - // types - // ------------------------------------------------------------------------------ - fn_id!(get_type_id, type_ids, TypeId, TypeIndex,); - fn_id!(get_type_ids, type_ids, TypeId[],); - fn_id! {type_id_idx, type_ids, Idx: TypeId, } - fn_id! {get_type_id_opt, type_ids, Option: TypeId, get_type_id, TypeIndex,} - - #[inline(always)] - pub fn num_type_ids(&self) -> u32 { - self.header.type_ids_size - } - - #[inline(always)] - pub fn get_type_desc_utf16_lossy_at(&self, idx: TypeIndex) -> Result { - let type_id = self.get_type_id(idx)?; - self.get_utf16_str_lossy_at(type_id.descriptor_idx) - } - - #[inline(always)] - pub fn get_type_desc_utf16_lossy(&self, type_id: &TypeId) -> Result { - self.get_utf16_str_lossy_at(type_id.descriptor_idx) - } - - #[inline(always)] - pub fn get_type_desc_utf16(&self, type_id: &TypeId) -> Result { - self.get_utf16_str_at(type_id.descriptor_idx) - } - - #[inline(always)] - pub fn get_type_desc_utf16_at(&self, idx: TypeIndex) -> Result { - let type_id = self.get_type_id(idx)?; - self.get_utf16_str_at(type_id.descriptor_idx) - } - - // -- code item - #[inline(always)] - pub fn get_code_item(&self, offset: u32) -> Result> { - check_lt_result!(offset, self.file_size(), "code item offset"); - self.data_ptr(offset) - } - - #[inline(always)] - pub fn get_code_item_accessor(&self, offset: u32) -> Result> { - check_lt_result!(offset, self.file_size(), "code item offset"); - let code_item = self.non_null_data_ptr(offset)?; - CodeItemAccessor::from_code_item( - self, - code_item, - offset + std::mem::size_of::() as u32, - ) - } - - #[inline(always)] - pub fn get_insns_raw(&self, code_off: u32, size_in_code_units: u32) -> Result<&'a [u16]> { - check_lt_result!(code_off, self.file_size(), "code stream offset"); - self.non_null_array_data_ptr(code_off, size_in_code_units as usize) - } - - // ------------------------------------------------------------------------------ - // Debug Info - // ------------------------------------------------------------------------------ - #[inline(always)] - pub fn get_debug_info_accessor(&'a self, offset: u32) -> Result> { - check_lt_result!(offset, self.file_size(), "debug info offset"); - Ok(CodeItemDebugInfoAccessor::new( - &self.mmap[offset as usize..], - )) - } - - #[inline(always)] - pub fn get_debug_info_accessor_opt( - &'a self, - offset: u32, - ) -> Result>> { - match offset { - // WHY?: It seems that some applications incorrectly set the debug info offset to 0 - 0 | u32::MAX => Ok(None), - _ => Ok(Some(self.get_debug_info_accessor(offset)?)), - } - } - - // ------------------------------------------------------------------------------ - // field ids - // ------------------------------------------------------------------------------ - fn_id!(get_field_id, field_ids, FieldId, FieldIndex,); - fn_id!(get_field_ids, field_ids, FieldId[],); - fn_id! {field_id_idx, field_ids, Idx: FieldId, } - fn_id! {get_field_id_opt, field_ids, Option: FieldId, get_field_id, FieldIndex,} - - #[inline(always)] - pub fn num_field_ids(&self) -> u32 { - self.header.field_ids_size - } - - #[inline(always)] - pub fn get_field_name(&self, field_id: &FieldId) -> Result { - self.get_utf16_str_lossy_at(field_id.name_idx) - } - - #[inline(always)] - pub fn get_field_name_at(&self, idx: FieldIndex) -> Result { - let field_id = self.get_field_id(idx)?; - self.get_utf16_str_lossy_at(field_id.name_idx) - } - - // ------------------------------------------------------------------------------ - // proto ids - // ------------------------------------------------------------------------------ - fn_id!(get_proto_id, proto_ids, ProtoId, ProtoIndex,); - fn_id!(get_proto_ids, proto_ids, ProtoId[],); - fn_id! {proto_id_idx, proto_ids, Idx: ProtoId, } - fn_id! {get_proto_id_opt, proto_ids, Option: ProtoId, get_proto_id, ProtoIndex,} - - pub fn num_proto_ids(&self) -> u32 { - self.header.proto_ids_size - } - - pub fn get_shorty_at(&self, idx: ProtoIndex) -> Result { - let proto_id = self.get_proto_id(idx)?; - self.get_shorty(proto_id) - } - - pub fn get_shorty_lossy_at(&self, idx: ProtoIndex) -> Result { - let proto_id = self.get_proto_id(idx)?; - self.get_shorty_lossy(proto_id) - } - - pub fn get_shorty(&self, proto_id: &ProtoId) -> Result { - self.get_utf16_str_at(proto_id.shorty_idx) - } - - pub fn get_shorty_lossy(&self, proto_id: &ProtoId) -> Result { - self.get_utf16_str_lossy_at(proto_id.shorty_idx) - } - - //------------------------------------------------------------------------------ - // EncodedValue - //------------------------------------------------------------------------------ - pub fn get_encoded_value(&self, off: u32) -> Result { - check_lt_result!(off, self.file_size(), EncodedValue); - EncodedValue::new(&self.mmap[off as usize..]) - } - - //------------------------------------------------------------------------------ - // Method Ids - //------------------------------------------------------------------------------ - fn_id!(get_method_id, method_ids, MethodId, u32,); - fn_id!(get_method_ids, method_ids, MethodId[],); - fn_id! {method_id_idx, method_ids, Idx: MethodId, } - fn_id! {get_method_id_opt, method_ids, Option: MethodId, get_method_id, u32,} - - #[inline(always)] - pub fn num_method_ids(&self) -> u32 { - self.header.method_ids_size - } - - // classdef related methods - //------------------------------------------------------------------------------ - // ClassDefs - //------------------------------------------------------------------------------ - fn_id!(get_class_def, class_defs, ClassDef, u32,); - fn_id!(get_class_defs, class_defs, ClassDef[],); - fn_id! {class_def_idx, class_defs, Idx: ClassDef, } - fn_id! {get_class_def_opt, class_defs, Option: ClassDef, get_class_def, u32,} - - #[inline(always)] - pub fn num_class_defs(&self) -> u32 { - self.header.class_defs_size - } - - #[inline] - pub fn get_class_desc_utf16_lossy(&self, class_def: &ClassDef) -> Result { - self.get_type_desc_utf16_lossy_at(class_def.class_idx) - } - - #[inline] - pub fn get_class_desc_utf16(&self, class_def: &ClassDef) -> Result { - self.get_type_desc_utf16_at(class_def.class_idx) - } - - #[inline] - pub fn get_interfaces_list(&self, class_def: &ClassDef) -> Result>> { - self.get_type_list(class_def.interfaces_off) - } - - //------------------------------------------------------------------------------ - // Method Handles - //------------------------------------------------------------------------------ - #[inline(always)] - pub fn get_method_handle(&self, idx: u32) -> Result<&'a MethodHandleItem> { - check_lt_result!(idx, self.method_handles.len(), MethodHandleItem); - Ok(&self.method_handles[idx as usize]) - } - - #[inline(always)] - pub fn num_method_handles(&self) -> u32 { - self.method_handles.len() as u32 - } - - #[inline(always)] - pub fn get_method_handles(&self) -> &'a [MethodHandleItem] { - self.method_handles - } - - //------------------------------------------------------------------------------ - // CallSites - //------------------------------------------------------------------------------ - #[inline(always)] - pub fn get_call_site_id(&self, idx: u32) -> Result<&'a CallSiteIdItem> { - check_lt_result!(idx, self.call_site_ids.len(), CallSiteIdItem); - Ok(&self.call_site_ids[idx as usize]) - } - - #[inline(always)] - pub fn num_call_site_ids(&self) -> u32 { - self.call_site_ids.len() as u32 - } - - #[inline(always)] - pub fn get_call_site_ids(&self) -> &'a [CallSiteIdItem] { - self.call_site_ids - } - - //------------------------------------------------------------------------------ - // TryItem - //------------------------------------------------------------------------------ - pub fn get_try_items(&'a self, ca: &CodeItemAccessor<'_>) -> Result<&'a [TryItem]> { - // skip heavy work if there are no try items - match ca.get_tries_abs_off() { - None => Ok(&[]), - Some(tries_off) => self.get_try_items_raw(tries_off, ca.tries_size()), - } - } - - #[inline] - pub fn get_try_items_raw(&'a self, tries_off: u32, tries_size: u16) -> Result<&'a [TryItem]> { - check_lt_result!(tries_off, self.file_size(), TryItem); - self.non_null_array_data_ptr(tries_off, tries_size as usize) - } - - //------------------------------------------------------------------------------ - // EncodedCatchHandler - //------------------------------------------------------------------------------ - #[inline] - pub fn get_catch_handler_data( - &self, - ca: &CodeItemAccessor<'_>, - offset: usize, - ) -> Result> { - match ca.get_catch_handler_data_abs_off() { - None => Ok(None), - Some(data_offset) => { - let offset = data_offset as usize + offset; - check_lt_result!(offset, self.file_size(), CatchHandlerData); - - // TODO: handle values greater than u16 since u16::MAX is maximum offset - Ok(Some(&self.mmap[offset..])) - } - } - } - - #[inline] - pub fn iter_catch_handlers_at( - &self, - ca: &CodeItemAccessor<'_>, - offset: usize, - ) -> Result>> { - match self.get_catch_handler_data(ca, offset)? { - None => Ok(None), - Some(data) => Ok(Some(EncodedCatchHandlerIterator::new(data)?)), - } - } - - #[inline] - pub fn iter_catch_handlers( - &self, - ca: &CodeItemAccessor<'_>, - try_item: &TryItem, - ) -> Result>> { - self.iter_catch_handlers_at(ca, try_item.handler_off as usize) - } - - //------------------------------------------------------------------------------ - // Annotations - //------------------------------------------------------------------------------ - // see implementation in annotations.rs for accessor - pub fn get_annotation_set(&self, off: u32) -> Result> { - // this will not panic if offset is zero - match self.data_ptr::(off)? { - None => Ok(&[]), - Some(size) => { - let off = off as usize + std::mem::size_of::(); - check_lt_result!(off, self.file_size(), AnnotationSetItem); - self.non_null_array_data_ptr(off as u32, *size as usize) - } - } - } - - #[inline(always)] - pub fn get_field_annotation_set( - &'a self, - anno_item: &FieldAnnotationsItem, - ) -> Result> { - self.get_annotation_set(anno_item.annotations_off) - } - - #[inline(always)] - pub fn get_method_annotation_set( - &'a self, - anno_item: &MethodAnnotationsItem, - ) -> Result> { - self.get_annotation_set(anno_item.annotations_off) - } - - #[inline(always)] - pub fn get_parameter_annotation_set( - &'a self, - anno_item: &ParameterAnnotationsItem, - ) -> Result> { - self.get_annotation_set(anno_item.annotations_off) - } - - #[inline] - pub fn get_annotation(&self, annotation_off: u32) -> Result { - check_lt_result!(annotation_off, self.file_size(), Annotation); - AnnotationItem::from_raw_parts(&self.mmap[annotation_off as usize..]) - } - - //------------------------------------------------------------------------------ - // internal helpers - //------------------------------------------------------------------------------ - #[inline] - fn offset_of(&self, buf: &[U], o: &T) -> Result { - let start = buf.as_ptr() as usize; - let target = o as *const _ as usize; - let end = buf.as_ptr() as usize + self.file_size(); - - if target < start || target > end { - return dex_err!(UnknownObjectRef { - offset: target, - start, - end - }); - } - - Ok(((target - start) / std::mem::size_of::()) as u32) - } - - #[inline(always)] - pub fn get_type_list(&self, offset: u32) -> Result>> { - if offset == 0 { - return Ok(None); - } - - check_lt_result!(offset, self.file_size(), TypeList); - let length = u32::from_bytes(&self.mmap[offset as usize..]).unwrap(); - let data_off = offset + std::mem::size_of::() as u32; - - self.array_data_ptr(data_off, *length as usize) - } - - // private methods - #[inline] - pub fn data_ptr(&self, offset: u32) -> Result> { - match offset { - 0 => Ok(None), - _ => Ok(Some(self.non_null_data_ptr(offset)?)), - } - } - - pub fn non_null_data_ptr(&self, offset: u32) -> Result<&'a T> { - if offset == 0 { - panic!( - "Attempted to read a null pointer for data type {:?}.", - std::any::type_name::() - ); - } - match T::from_bytes(&self.mmap[offset as usize..]) { - Ok(v) => Ok(v), - Err(plain::Error::TooShort) => { - dex_err!(DexLayoutError, self, offset, std::any::type_name::(), 0) - } - Err(err) => panic!( - "Error decoding data type {:?}: {:?}", - std::any::type_name::(), - err - ), - } - } - - #[inline] - pub fn array_data_ptr(&self, offset: u32, len: usize) -> Result> { - match offset { - 0 => Ok(None), - _ => Ok(Some(self.non_null_array_data_ptr(offset, len)?)), - } - } - - pub fn non_null_array_data_ptr(&self, offset: u32, len: usize) -> Result<&'a [T]> { - if offset == 0 { - panic!( - "Attempted to read a null pointer for data type {:?}.", - std::any::type_name::() - ); - } - match T::slice_from_bytes_len(&self.mmap[offset as usize..], len) { - Ok(v) => Ok(v), - Err(plain::Error::TooShort) => dex_err!( - DexLayoutError, - self, - offset, - std::any::type_name::(), - len - ), - Err(plain::Error::BadAlignment) => todo!(), - } - } - - //------------------------------------------------------------------------------ - // Initialization - //------------------------------------------------------------------------------ - fn init(&self) -> Result<()> { - let container_size = self.file_size(); - if container_size < std::mem::size_of::
() { - return dex_err!( - DexFileError, - "Unable to open {:?}: File size is too small to fit dex header", - self.location - ); - } - - self.check_magic_and_version()?; - - let expected_header_size = self.expected_header_size(); - if expected_header_size < self.header.header_size { - return dex_err!( - DexFileError, - "Unable to open {:?}: Header size is {} but {} was expected", - self.location, - expected_header_size, - self.header.header_size - ); - } - - if container_size < self.header.file_size as usize { - return dex_err!( - DexFileError, - "Unable to open {:?}: File size is {} but the header expects {}", - self.location, - container_size, - self.header.file_size - ); - } - Ok(()) - } - - fn check_magic_and_version(&self) -> Result<()> { - if !self.is_magic_valid() { - return dex_err!( - DexFileError, - "Unrecognized magic number in {:?}: {:?}", - self.location, - &self.header.get_magic()[..4] - ); - } - - if !self.is_version_valid() { - return dex_err!( - DexFileError, - "Unrecognized dex version in {:?}: {:?}", - self.location, - &self.header.get_magic()[4..] - ); - } - Ok(()) - } - - #[inline] - fn maplist_available(&self) -> bool { - if self.header.map_off == 0x00 { - return false; - } - - let size = self.file_size(); - let end = (self.header.map_off as usize) + std::mem::size_of::(); - end > size || !plain::is_aligned::(&self.mmap[0..end]) - } - - fn init_sections_from_maplist(&mut self) { - if !self.maplist_available() { - // bad offset - return; - } - - let map_list_size_off = self.header.map_off; - let map_list_off = (self.header.map_off as usize) + std::mem::size_of::(); - if map_list_off >= self.file_size() { - // bad offset - return; - } - - let count: &u32 = match self.non_null_data_ptr(map_list_size_off) { - Ok(v) => v, - Err(_) => { - // bad file will be reported through verifier - return; - } - }; - let map_limit = - (self.file_size() - std::mem::size_of::() - map_list_size_off as usize) - / std::mem::size_of::(); - - if *count as usize > map_limit { - // bad file - return; - } - - // we should unwrap this here - let items = - match self.non_null_array_data_ptr::(map_list_off as u32, *count as usize) { - Ok(v) => v, - Err(_) => { - // bad file will be reported through verifier - return; - } - }; - for map_item in items { - match map_item.type_ { - MapItemType::MethodHandleItem => { - self.method_handles = - DexFile::get_section(self.mmap, map_item.off, map_item.size) - } - MapItemType::CallSiteIdItem => { - self.call_site_ids = - DexFile::get_section(self.mmap, map_item.off, map_item.size) - } - MapItemType::HiddenapiClassData => { - let item_off = map_item.off as usize; - self.hiddenapi_data = Some( - HiddenapiClassData::from_bytes( - &self.mmap[item_off..item_off + map_item.size as usize], - ) - .unwrap(), - ); - } - _ => {} - } - } - } +/// Whether a DEX file uses the standard (`dex\n`) or compact (`cdex`) format. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DexFormat { + Standard, + Compact, } diff --git a/src/file/patch.rs b/src/file/patch.rs new file mode 100644 index 0000000..45ddb71 --- /dev/null +++ b/src/file/patch.rs @@ -0,0 +1,268 @@ +use std::mem; + +use crate::{ + dex_err, + error::DexError, + file::{ClassDef, Header, MapItem}, + leb128::decode_leb128_off, + Result, +}; + +// --- Checksum ---------------------------------------------------------------- + +/// Recalculate the Adler32 checksum (over bytes `[12..]`) and write it to `data[8..12]`. +pub fn update_checksum(data: &mut [u8]) { + if data.len() < 12 { + return; + } + let sum = adler32::adler32(&data[12..]).unwrap_or(0); + data[8..12].copy_from_slice(&sum.to_le_bytes()); +} + +// --- ClassDef ---------------------------------------------------------------- + +const CLASS_DEF_SIZE: usize = mem::size_of::(); +/// Byte offset of `ClassDef.access_flags` within the `#[repr(C)]` struct. +/// Layout: class_idx(2) + pad(2) + access_flags(4) -> offset 4. +pub(crate) const CLASS_DEF_FLAGS_OFF: usize = 4; + +/// Overwrite `ClassDef.access_flags` for the class at `class_def_idx`. +/// +/// Returns an error if `class_def_idx` is out of range or the write would exceed the file. +pub fn patch_class_access_flags( + data: &mut [u8], + header: &Header, + class_def_idx: u32, + flags: u32, +) -> Result<()> { + if class_def_idx >= header.class_defs_size { + return dex_err!(DexIndexError { + index: class_def_idx, + max: header.class_defs_size as usize, + item_ty: "ClassDef" + }); + } + let off = + header.class_defs_off as usize + class_def_idx as usize * CLASS_DEF_SIZE + CLASS_DEF_FLAGS_OFF; + data.get_mut(off..off + 4) + .ok_or_else(|| { + DexError::DexFileError(format!("class_def[{class_def_idx}] out of bounds")) + })? + .copy_from_slice(&flags.to_le_bytes()); + Ok(()) +} + +// --- Instructions ------------------------------------------------------------ + +/// Byte offset of `CodeItem.insns[0]` from the start of the code item. +/// Layout: registers(2)+ins(2)+outs(2)+tries(2)+debug_off(4)+insns_size(4) = 16 bytes. +const CODE_ITEM_INSNS_OFF: usize = 16; + +/// Overwrite a single instruction word (`u16`) at code-unit offset `pc` inside a code item. +pub fn patch_instruction_word( + data: &mut [u8], + code_off: u32, + pc: u32, + word: u16, +) -> Result<()> { + if code_off == 0 { + return Err(DexError::TruncatedFile); + } + let item_start = code_off as usize; + let insns_size_off = item_start + 12; + + let insns_size = u32::from_le_bytes( + data.get(insns_size_off..insns_size_off + 4) + .ok_or(DexError::TruncatedFile)? + .try_into() + .unwrap(), + ); + if pc >= insns_size { + return dex_err!(BadInstructionOffset { + opcode: "patch", + offset: pc as usize, + size: insns_size as usize + }); + } + + let word_off = item_start + CODE_ITEM_INSNS_OFF + pc as usize * 2; + data.get_mut(word_off..word_off + 2) + .ok_or_else(|| DexError::DexFileError(format!("instruction word at {word_off} out of bounds")))? + .copy_from_slice(&word.to_le_bytes()); + Ok(()) +} + +// --- Internal helpers (used by editor.rs) ------------------------------------ + +/// Read the DEX header from the start of `data`, or `None` if too short / misaligned. +pub(crate) fn read_header(data: &[u8]) -> Option<&Header> { + plain::from_bytes::
(data).ok() +} + +/// Return the map item slice, or `None` if unavailable. +#[allow(dead_code)] +pub(crate) fn map_list(data: &[u8]) -> Option<&[MapItem]> { + let h = read_header(data)?; + let size_off = h.map_off as usize; + let count = u32::from_le_bytes(data.get(size_off..size_off + 4)?.try_into().ok()?) as usize; + let items_off = size_off + 4; + let items_end = items_off + count * mem::size_of::(); + plain::slice_from_bytes::(data.get(items_off..items_end)?).ok() +} + +/// Encode `value` as unsigned LEB128. +pub(crate) fn encode_uleb128(mut value: u32) -> Vec { + let mut out = Vec::with_capacity(5); + loop { + let mut byte = (value & 0x7F) as u8; + value >>= 7; + if value != 0 { + byte |= 0x80; + } + out.push(byte); + if value == 0 { + break; + } + } + out +} + +/// Decode a ULEB128 at `*pos` and advance `pos`. +pub(crate) fn skip_uleb128(data: &[u8], pos: &mut usize) -> Result { + decode_leb128_off::(data, pos) +} + +/// Byte-length of the ULEB128 encoding of `value`. +#[allow(dead_code)] +pub(crate) fn uleb128_len(value: u32) -> usize { + encode_uleb128(value).len() +} + +/// Read a `u16` at `off` (little-endian). +#[allow(dead_code)] +#[inline] +pub(crate) fn read_u16(data: &[u8], off: usize) -> u16 { + u16::from_le_bytes(data[off..off + 2].try_into().unwrap()) +} + +/// Read a `u32` at `off` (little-endian). +#[allow(dead_code)] +#[inline] +pub(crate) fn read_u32(data: &[u8], off: usize) -> u32 { + u32::from_le_bytes(data[off..off + 4].try_into().unwrap()) +} + +/// Write a `u32` at `off` (little-endian). +#[allow(dead_code)] +#[inline] +pub(crate) fn write_u32(data: &mut [u8], off: usize, v: u32) { + data[off..off + 4].copy_from_slice(&v.to_le_bytes()); +} + + +// --- Tests ------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::file::{DexFile, DexLocation}; + + const PRIME: &[u8] = include_bytes!("../../tests/prime/prime.dex"); + + // -- update_checksum ------------------------------------------------------ + + #[test] + fn checksum_roundtrip() { + let mut data = PRIME.to_vec(); + data[8] = 0xAA; + data[9] = 0xBB; + update_checksum(&mut data); + assert_eq!(&data[8..12], &PRIME[8..12]); + } + + #[test] + fn checksum_too_short_is_noop() { + let mut tiny = vec![0u8; 10]; + update_checksum(&mut tiny); // must not panic + } + + #[test] + fn checksum_exact_boundary() { + let mut data = vec![0u8; 12]; + update_checksum(&mut data); + let expected = adler32::adler32(&[][..]).unwrap_or(0); + assert_eq!(u32::from_le_bytes(data[8..12].try_into().unwrap()), expected); + } + + // -- patch_class_access_flags --------------------------------------------- + + #[test] + fn patch_flags_changes_value() { + let mut data = PRIME.to_vec(); + // Copy header bytes so we can release the borrow before mutating. + let header_copy = data[..112].to_vec(); + let header = read_header(&header_copy).expect("valid header"); + + let flags_off = header.class_defs_off as usize + CLASS_DEF_FLAGS_OFF; + let original = u32::from_le_bytes(data[flags_off..flags_off + 4].try_into().unwrap()); + + patch_class_access_flags(&mut data, header, 0, 0x0011).unwrap(); + + let patched = u32::from_le_bytes(data[flags_off..flags_off + 4].try_into().unwrap()); + assert_eq!(patched, 0x0011); + assert_ne!(patched, original); + } + + #[test] + fn patch_flags_out_of_bounds_errors() { + let mut data = PRIME.to_vec(); + let header_copy = data[..112].to_vec(); + let header = read_header(&header_copy).expect("valid header"); + assert!(patch_class_access_flags(&mut data, header, 9999, 0x0001).is_err()); + } + + #[test] + fn patch_flags_then_reparseable() { + let mut data = PRIME.to_vec(); + let header_copy = data[..112].to_vec(); + let header = read_header(&header_copy).expect("valid header"); + patch_class_access_flags(&mut data, header, 0, 0x0011).unwrap(); + update_checksum(&mut data); + DexFile::from_raw_parts(&data, DexLocation::InMemory) + .expect("re-parse after patch must succeed"); + } + + // -- patch_instruction_word ----------------------------------------------- + + #[test] + fn patch_insn_word_zero_offset_errors() { + let mut data = PRIME.to_vec(); + // code_off=0 is never valid + assert!(patch_instruction_word(&mut data, 0, 0, 0xFFFF).is_err()); + } + + #[test] + fn patch_insn_word_out_of_range_pc_errors() { + // Find a real code_off from the parsed DEX, then use an out-of-range PC. + let first_code_off = { + let buf = PRIME.to_vec(); + let dex = DexFile::from_raw_parts(&buf, DexLocation::InMemory).unwrap(); + let cd = dex.get_class_def(0).unwrap(); + if let Some(acc) = dex.get_class_accessor(cd).unwrap() { + acc.get_methods() + .unwrap() + .find(|m| m.code_offset != 0) + .map(|m| m.code_offset) + .unwrap_or(0) + } else { 0 } + }; + + if first_code_off != 0 { + let mut data = PRIME.to_vec(); + assert!( + patch_instruction_word(&mut data, first_code_off, 99999, 0xFFFF).is_err(), + "out-of-range PC must error" + ); + } + } +} diff --git a/src/file/signature.rs b/src/file/signature.rs new file mode 100644 index 0000000..54f9b3a --- /dev/null +++ b/src/file/signature.rs @@ -0,0 +1,47 @@ +use std::fmt; + +/// A decoded method signature in standard DEX format: `"(param1param2...)return_type"`. +/// +/// Matches ART's `Signature` class. Created via [`DexFile::get_method_signature`]. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] +pub struct Signature { + inner: String, + num_params: u32, + is_void: bool, +} + +impl Signature { + pub(super) fn new(inner: String, num_params: u32, is_void: bool) -> Self { + Self { + inner, + num_params, + is_void, + } + } + + /// Returns a no-signature sentinel (empty string, 0 params, not void). + pub fn no_signature() -> Self { + Self::default() + } + + /// Returns `true` if the return type is `void`. + pub fn is_void(&self) -> bool { + self.is_void + } + + /// Returns the number of explicit parameters. + pub fn num_params(&self) -> u32 { + self.num_params + } + + /// Returns the signature string in DEX format: `"(params)return_type"`. + pub fn as_str(&self) -> &str { + &self.inner + } +} + +impl fmt::Display for Signature { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.inner) + } +} diff --git a/src/file/structs.rs b/src/file/structs.rs index 16d3a30..e3281d3 100644 --- a/src/file/structs.rs +++ b/src/file/structs.rs @@ -653,25 +653,3 @@ pub fn annotation(&self) -> PyDexEncodedAnnotation { ); // <<< end python export -// -------------------------------------------------------------------- -// Python API -// -------------------------------------------------------------------- -// >>> begin python module export -#[cfg(feature = "python")] -#[pyo3::pymodule(name = "structs")] -pub(crate) mod py_structs { - - #[pymodule_export] - use super::{ - PyDexAnnotationElement, PyDexAnnotationItem, PyDexAnnotationsDirectoryItem, - PyDexCallSiteIdItem, PyDexCatchHandlerData, PyDexClassDef, PyDexCodeItem, - PyDexEncodedAnnotation, PyDexEncodedValue, PyDexFieldAnnotationsItem, PyDexFieldId, - PyDexMethodAnnotationsItem, PyDexMethodHandleItem, PyDexMethodId, - PyDexParameterAnnotationsItem, PyDexProtoId, PyDexStringId, PyDexTryItem, PyDexTypeId, - PyDexTypeItem, - }; - - #[pymodule_export] - use crate::file::header::PyDexHeader; -} -// <<< end python module export diff --git a/src/file/type_lookup_table.rs b/src/file/type_lookup_table.rs new file mode 100644 index 0000000..46a085c --- /dev/null +++ b/src/file/type_lookup_table.rs @@ -0,0 +1,42 @@ +use std::collections::HashMap; + +use super::{DexContainer, DexFile}; + +/// Fast O(1) lookup of `class_def_idx` by type descriptor. +/// +/// Equivalent to ART's `TypeLookupTable`. Built on demand from a [`DexFile`] +/// using a `HashMap` internally. The table is owned and does not borrow from +/// the DEX file, so it can outlive it. +pub struct TypeLookupTable { + table: HashMap, +} + +impl TypeLookupTable { + /// Builds a lookup table from all class definitions in `dex`. + pub fn new<'a, C: DexContainer<'a>>(dex: &'a DexFile<'a, C>) -> Self { + let mut table = HashMap::with_capacity(dex.num_class_defs() as usize); + for (idx, class_def) in dex.get_class_defs().iter().enumerate() { + if let Ok(desc) = dex.get_type_desc_utf16_at(class_def.class_idx) { + table.insert(desc, idx as u32); + } + } + TypeLookupTable { table } + } + + /// Returns the `class_def_idx` for the given type descriptor, or `None` if not found. + /// + /// `descriptor` must be in DEX format, e.g. `"Ljava/lang/String;"`. + pub fn lookup(&self, descriptor: &str) -> Option { + self.table.get(descriptor).copied() + } + + /// Returns the number of entries in the table. + pub fn len(&self) -> usize { + self.table.len() + } + + /// Returns `true` if the table contains no entries. + pub fn is_empty(&self) -> bool { + self.table.is_empty() + } +} diff --git a/src/file/verifier.rs b/src/file/verifier.rs index 458f267..bf3e78c 100644 --- a/src/file/verifier.rs +++ b/src/file/verifier.rs @@ -3,7 +3,8 @@ use adler32; use crate::{dex_err, error::DexError, Result}; use super::{ - DexContainer, DexFile, Header, HeaderV41, DEX_ENDIAN_CONSTANT, DEX_MAGIC, DEX_MAGIC_VERSIONS, + DexContainer, DexFile, Header, CDEX_MAGIC, CDEX_MAGIC_VERSIONS, DEX_ENDIAN_CONSTANT, + DEX_MAGIC, DEX_MAGIC_VERSIONS, }; #[derive(Debug, PartialEq, Eq)] @@ -15,12 +16,13 @@ pub enum VerifyPreset { impl<'a, C: DexContainer<'a>> DexFile<'a, C> { pub fn is_magic_valid(&self) -> bool { - &self.header.get_magic()[..4] == DEX_MAGIC + let magic4 = &self.header.get_magic()[..4]; + magic4 == DEX_MAGIC || magic4 == CDEX_MAGIC } pub fn is_version_valid(&self) -> bool { let version_raw = &self.header.get_magic()[4..]; - DEX_MAGIC_VERSIONS.contains(&version_raw) + DEX_MAGIC_VERSIONS.contains(&version_raw) || CDEX_MAGIC_VERSIONS.contains(&version_raw) } // TODO: can be changed into enum @@ -41,50 +43,9 @@ fn check_header<'a, C>(dex: &DexFile<'a, C>, preset: VerifyPreset) -> Result<()> where C: DexContainer<'a>, { - let size = dex.file_size(); - if size < std::mem::size_of::
() { - return dex_err!(TruncatedFile); - } - - if !dex.is_magic_valid() { - return dex_err!(BadFileMagic); - } - - if !dex.is_version_valid() { - return dex_err!(UnknownDexVersion { - version: dex.header.get_version() - }); - } - - // check file size from header - let version = dex.header.get_version(); - let file_size = dex.header.file_size as usize; - let header_size = if version >= 41 { - std::mem::size_of::() - } else { - std::mem::size_of::
() - }; - - if file_size < header_size { - return dex_err!(FileSizeAtLeast { - actual: file_size, - expected: header_size - }); - } - if file_size > size { - return dex_err!(FileSizeAtMost { - actual: file_size, - expected: size - }); - } - - // check header size - if dex.header.header_size as usize != header_size { - return dex_err!(BadHeaderSize { - size: dex.header.header_size, - expected: header_size as u32 - }); - } + // Structural checks (truncation, magic, version, file/header size) are + // already enforced by DexFile::init(), which runs before verify(). Here + // we only handle the checks that init() intentionally defers. // check endian if dex.header.endian_tag != DEX_ENDIAN_CONSTANT { @@ -104,7 +65,7 @@ where _ => {} }; - let header = dex.header; + let header = &dex.header; check_valid_offset_and_size(dex, header.link_off, header.link_size, "link")?; check_valid_offset_and_size( dex, diff --git a/src/file/writer.rs b/src/file/writer.rs new file mode 100644 index 0000000..e4f821f --- /dev/null +++ b/src/file/writer.rs @@ -0,0 +1,1113 @@ +//! Serialize a [`DexIr`] to valid standard-DEX bytes. +//! +//! [`DexWriter::write`] is the single entry point. It sorts all pools, assigns +//! integer indices, and writes the binary representation in a single forward pass +//! with a backpatch table for forward references. +//! +//! # Performance +//! * String deduplication uses `HashMap` for O(1) insert + one O(M log M) sort. +//! * All output is written into a single pre-allocated `Vec`. +//! * Backpatching fills in cross-references after the referenced items are written. + +use std::collections::HashMap; + +use crate::{ + file::{ + builder::encode_insn, + ir::{ + BranchTarget, ClassDef, CodeDef, DexIr, DexRef, EncodedValueIr, + FieldDef, MethodDef, ProtoKey, TryDef, + }, + patch::{encode_uleb128, update_checksum}, + }, + utf::str_to_mutf8, + Result, +}; + +// -- Output buffer with backpatch support -------------------------------------- + +struct Out { + data: Vec, +} + +#[allow(dead_code)] +impl Out { + fn new(capacity: usize) -> Self { + Self { data: Vec::with_capacity(capacity) } + } + + fn len(&self) -> usize { + self.data.len() + } + + fn write_u8(&mut self, v: u8) { + self.data.push(v); + } + + fn write_u16(&mut self, v: u16) { + self.data.extend_from_slice(&v.to_le_bytes()); + } + + fn write_u32(&mut self, v: u32) { + self.data.extend_from_slice(&v.to_le_bytes()); + } + + fn write_i32(&mut self, v: i32) { + self.data.extend_from_slice(&v.to_le_bytes()); + } + + fn write_bytes(&mut self, b: &[u8]) { + self.data.extend_from_slice(b); + } + + fn write_uleb128(&mut self, v: u32) { + self.data.extend(encode_uleb128(v)); + } + + fn write_sleb128(&mut self, mut v: i32) { + loop { + let mut byte = (v & 0x7F) as u8; + v >>= 7; + let done = v == 0 && (byte & 0x40 == 0) || v == -1 && (byte & 0x40 != 0); + if !done { + byte |= 0x80; + } + self.data.push(byte); + if done { + break; + } + } + } + + /// Reserve 4 bytes (initialised to zero) for a later backpatch. + /// Returns the position of the reserved slot. + fn reserve_u32(&mut self) -> usize { + let pos = self.data.len(); + self.data.extend_from_slice(&[0, 0, 0, 0]); + pos + } + + /// Fill in a previously reserved u32 slot. + fn patch_u32(&mut self, pos: usize, v: u32) { + self.data[pos..pos + 4].copy_from_slice(&v.to_le_bytes()); + } + + /// Pad to a 4-byte boundary. + fn align4(&mut self) { + while !self.data.len().is_multiple_of(4) { + self.data.push(0); + } + } +} + +// -- Sorted pools -------------------------------------------------------------- + +type FieldKey = (Vec, Vec, Vec); +type FieldMap = BTreeMap; +type FieldHashMap = HashMap; + +/// Collect + sort all strings, types, protos, fields, methods from `ir`. +struct Pools { + /// Sorted MUTF-8 string contents (without null terminator). + strings: Vec>, + string_idx: HashMap, u32>, + + /// Sorted type descriptors (as MUTF-8 bytes). + types: Vec>, + type_idx: HashMap, u32>, + + /// Sorted protos. + protos: Vec, + proto_idx: HashMap, + + /// Sorted field keys: (class_desc, name, field_type) — all MUTF-8. + fields: Vec, + field_idx: FieldHashMap, + + /// Sorted method keys: (class_desc, name, proto) — class+name as MUTF-8. + methods: Vec<(Vec, Vec, ProtoKey)>, + method_idx: HashMap<(Vec, Vec, ProtoKey), u32>, +} + +#[allow(dead_code)] +impl Pools { + fn build(ir: &DexIr) -> Self { + use std::collections::BTreeMap; + + // Use BTreeMap to auto-sort on insertion. + let mut strings: BTreeMap, ()> = BTreeMap::new(); + let mut types: BTreeMap, ()> = BTreeMap::new(); + let mut protos: BTreeMap = BTreeMap::new(); + let mut fields: FieldMap = BTreeMap::new(); + let mut methods: BTreeMap<(Vec, Vec, ProtoKey), ()> = BTreeMap::new(); + + let add_str = |s: &str, strings: &mut BTreeMap, ()>| { + strings.insert(str_to_mutf8_no_null(s), ()); + }; + let add_type = |s: &str, + strings: &mut BTreeMap, ()>, + types: &mut BTreeMap, ()>| { + let m = str_to_mutf8_no_null(s); + strings.insert(m.clone(), ()); + types.insert(m, ()); + }; + let add_proto = |p: &ProtoKey, + strings: &mut BTreeMap, ()>, + types: &mut BTreeMap, ()>, + protos: &mut BTreeMap| { + let shorty = p.shorty(); + add_str(&shorty, strings); + add_type(&p.return_type, strings, types); + for param in &p.params { + add_type(param, strings, types); + } + protos.insert(p.clone(), ()); + }; + let _ = add_proto; + + for class in &ir.classes { + // class descriptor + add_type(&class.descriptor, &mut strings, &mut types); + // superclass + if let Some(ref s) = class.superclass { + add_type(s, &mut strings, &mut types); + } + // interfaces + for iface in &class.interfaces { + add_type(iface, &mut strings, &mut types); + } + // source file + if let Some(ref sf) = class.source_file { + add_str(sf, &mut strings); + } + // fields + for f in class.static_fields.iter().chain(class.instance_fields.iter()) { + collect_field(f, &class.descriptor, &mut strings, &mut types, &mut fields); + } + // methods + for m in class.direct_methods.iter().chain(class.virtual_methods.iter()) { + collect_method( + m, + &class.descriptor, + &mut strings, + &mut types, + &mut protos, + &mut fields, + &mut methods, + ); + } + } + + // Materialise sorted vecs + index maps. + let strings: Vec> = strings.into_keys().collect(); + let string_idx: HashMap, u32> = + strings.iter().enumerate().map(|(i, k)| (k.clone(), i as u32)).collect(); + + let types: Vec> = types.into_keys().collect(); + let type_idx: HashMap, u32> = + types.iter().enumerate().map(|(i, k)| (k.clone(), i as u32)).collect(); + + let protos: Vec = protos.into_keys().collect(); + let proto_idx: HashMap = + protos.iter().enumerate().map(|(i, k)| (k.clone(), i as u32)).collect(); + + let fields: Vec = fields.into_keys().collect(); + let field_idx: FieldHashMap = + fields.iter().enumerate().map(|(i, k)| (k.clone(), i as u32)).collect(); + + let methods: Vec<(Vec, Vec, ProtoKey)> = methods.into_keys().collect(); + let method_idx: HashMap<(Vec, Vec, ProtoKey), u32> = + methods.iter().enumerate().map(|(i, k)| (k.clone(), i as u32)).collect(); + + Pools { strings, string_idx, types, type_idx, protos, proto_idx, fields, field_idx, methods, method_idx } + } + + fn string_idx_of(&self, s: &str) -> u32 { + let m = str_to_mutf8_no_null(s); + *self.string_idx.get(&m).unwrap_or_else(|| panic!("string not in pool: {s:?}")) + } + + fn type_idx_of(&self, s: &str) -> u32 { + let m = str_to_mutf8_no_null(s); + *self.type_idx.get(&m).unwrap_or_else(|| panic!("type not in pool: {s:?}")) + } + + fn proto_idx_of(&self, p: &ProtoKey) -> u32 { + *self.proto_idx.get(p).unwrap_or_else(|| panic!("proto not in pool: {p:?}")) + } + + fn field_idx_of(&self, class: &str, name: &str, ty: &str) -> u32 { + let key = (str_to_mutf8_no_null(class), str_to_mutf8_no_null(name), str_to_mutf8_no_null(ty)); + *self.field_idx.get(&key).unwrap_or_else(|| panic!("field not in pool: {class}.{name}:{ty}")) + } + + fn method_idx_of(&self, class: &str, name: &str, proto: &ProtoKey) -> u32 { + let key = (str_to_mutf8_no_null(class), str_to_mutf8_no_null(name), proto.clone()); + *self.method_idx.get(&key).unwrap_or_else(|| panic!("method not in pool: {class}.{name}")) + } + + fn ref_idx(&self, r: &DexRef) -> u32 { + match r { + DexRef::String(s) => self.string_idx_of(s), + DexRef::Type(s) => self.type_idx_of(s), + DexRef::Field { class, name, field_type } => { + self.field_idx_of(class, name, field_type) + } + DexRef::Method { class, name, proto } => self.method_idx_of(class, name, proto), + DexRef::Proto(p) => self.proto_idx_of(p), + } + } +} + +// -- Pool collector helpers ---------------------------------------------------- + +use std::collections::BTreeMap; + +fn collect_field( + f: &FieldDef, + class_desc: &str, + strings: &mut BTreeMap, ()>, + types: &mut BTreeMap, ()>, + fields: &mut FieldMap, +) { + let c = str_to_mutf8_no_null(class_desc); + let n = str_to_mutf8_no_null(&f.name); + let t = str_to_mutf8_no_null(&f.field_type); + strings.insert(n.clone(), ()); + let ft = str_to_mutf8_no_null(&f.field_type); + strings.insert(ft, ()); + types.insert(t.clone(), ()); + let type_desc = str_to_mutf8_no_null(&f.field_type); + types.insert(type_desc, ()); + fields.insert((c, n, t), ()); +} + +fn collect_method( + m: &MethodDef, + class_desc: &str, + strings: &mut BTreeMap, ()>, + types: &mut BTreeMap, ()>, + protos: &mut BTreeMap, + fields: &mut FieldMap, + methods: &mut BTreeMap<(Vec, Vec, ProtoKey), ()>, +) { + let c = str_to_mutf8_no_null(class_desc); + let n = str_to_mutf8_no_null(&m.name); + strings.insert(n.clone(), ()); + + // Proto: shorty + return type + params + let shorty = m.proto.shorty(); + strings.insert(str_to_mutf8_no_null(&shorty), ()); + types.insert(str_to_mutf8_no_null(&m.proto.return_type), ()); + strings.insert(str_to_mutf8_no_null(&m.proto.return_type), ()); + for p in &m.proto.params { + types.insert(str_to_mutf8_no_null(p), ()); + strings.insert(str_to_mutf8_no_null(p), ()); + } + protos.insert(m.proto.clone(), ()); + methods.insert((c, n, m.proto.clone()), ()); + + // Collect references from instructions + if let Some(code) = &m.code { + collect_code_refs(code, strings, types, protos, fields, methods); + } +} + +fn collect_code_refs( + code: &CodeDef, + strings: &mut BTreeMap, ()>, + types: &mut BTreeMap, ()>, + protos: &mut BTreeMap, + fields: &mut FieldMap, + methods: &mut BTreeMap<(Vec, Vec, ProtoKey), ()>, +) { + for node in &code.insns { + if let Some(r) = &node.reference { + collect_dexref(r, strings, types, protos, fields, methods); + } + } +} + +fn collect_dexref( + r: &DexRef, + strings: &mut BTreeMap, ()>, + types: &mut BTreeMap, ()>, + protos: &mut BTreeMap, + fields: &mut FieldMap, + methods: &mut BTreeMap<(Vec, Vec, ProtoKey), ()>, +) { + match r { + DexRef::String(s) => { + strings.insert(str_to_mutf8_no_null(s), ()); + } + DexRef::Type(t) => { + strings.insert(str_to_mutf8_no_null(t), ()); + types.insert(str_to_mutf8_no_null(t), ()); + } + DexRef::Field { class, name, field_type } => { + let c = str_to_mutf8_no_null(class); + let n = str_to_mutf8_no_null(name); + let ft = str_to_mutf8_no_null(field_type); + strings.insert(c.clone(), ()); + types.insert(c.clone(), ()); + strings.insert(n.clone(), ()); + strings.insert(ft.clone(), ()); + types.insert(ft.clone(), ()); + fields.insert((c, n, ft), ()); + } + DexRef::Method { class, name, proto } => { + let c = str_to_mutf8_no_null(class); + let n = str_to_mutf8_no_null(name); + strings.insert(c.clone(), ()); + types.insert(c.clone(), ()); + strings.insert(n.clone(), ()); + let shorty = proto.shorty(); + strings.insert(str_to_mutf8_no_null(&shorty), ()); + strings.insert(str_to_mutf8_no_null(&proto.return_type), ()); + types.insert(str_to_mutf8_no_null(&proto.return_type), ()); + for p in &proto.params { + strings.insert(str_to_mutf8_no_null(p), ()); + types.insert(str_to_mutf8_no_null(p), ()); + } + protos.insert(proto.clone(), ()); + methods.insert((c, n, proto.clone()), ()); + } + DexRef::Proto(p) => { + let shorty = p.shorty(); + strings.insert(str_to_mutf8_no_null(&shorty), ()); + strings.insert(str_to_mutf8_no_null(&p.return_type), ()); + types.insert(str_to_mutf8_no_null(&p.return_type), ()); + for param in &p.params { + strings.insert(str_to_mutf8_no_null(param), ()); + types.insert(str_to_mutf8_no_null(param), ()); + } + protos.insert(p.clone(), ()); + } + } +} + +// -- MUTF-8 helper ------------------------------------------------------------- + +/// Encode a Rust `&str` to MUTF-8 **without** the null terminator. +fn str_to_mutf8_no_null(s: &str) -> Vec { + let mut v = str_to_mutf8(s); + if v.last() == Some(&0) { + v.pop(); + } + v +} + +// -- Map list tracking --------------------------------------------------------- + +#[derive(Clone)] +struct MapEntry { + type_code: u16, + count: u32, + offset: u32, +} + +// -- DEX Writer ---------------------------------------------------------------- + +/// Serialise a [`DexIr`] to a valid standard-DEX byte vector. +pub struct DexWriter; + +impl DexWriter { + /// Build a complete DEX file from the IR and return the raw bytes. + /// + /// The output has a recalculated Adler32 checksum and correct `file_size`. + pub fn write(ir: DexIr) -> Result> { + let pools = Pools::build(&ir); + let mut out = Out::new(1 << 16); + let mut map: Vec = Vec::new(); + + // -- Header placeholder (112 bytes) ----------------------------------- + let header_start = out.len(); + // magic: "dex\n" + version + "\0" + let version_str = format!("{:03}\0", ir.version); + out.write_bytes(b"dex\n"); + out.write_bytes(version_str.as_bytes()); + // checksum placeholder (4 bytes) + let checksum_pos = out.len(); + out.write_u32(0); + // SHA-1 signature (20 bytes — we leave it as zeros; most tools don't verify) + for _ in 0..20 { + out.write_u8(0); + } + // file_size placeholder + let file_size_pos = out.len(); + out.write_u32(0); + // header_size = 0x70 + out.write_u32(0x70); + // endian_tag + out.write_u32(0x12345678); + // link_size + link_off (unused) + out.write_u32(0); + out.write_u32(0); + // map_off placeholder + let map_off_pos = out.len(); + out.write_u32(0); + // string_ids_size + string_ids_off + out.write_u32(pools.strings.len() as u32); + let string_ids_off_pos = out.len(); + out.write_u32(0); + // type_ids_size + type_ids_off + out.write_u32(pools.types.len() as u32); + let type_ids_off_pos = out.len(); + out.write_u32(0); + // proto_ids_size + proto_ids_off + out.write_u32(pools.protos.len() as u32); + let proto_ids_off_pos = out.len(); + out.write_u32(0); + // field_ids_size + field_ids_off + out.write_u32(pools.fields.len() as u32); + let field_ids_off_pos = out.len(); + out.write_u32(0); + // method_ids_size + method_ids_off + out.write_u32(pools.methods.len() as u32); + let method_ids_off_pos = out.len(); + out.write_u32(0); + // class_defs_size + class_defs_off + out.write_u32(ir.classes.len() as u32); + let class_defs_off_pos = out.len(); + out.write_u32(0); + // data_size + data_off (filled in at end) + let data_size_pos = out.len(); + out.write_u32(0); + let data_off_pos = out.len(); + out.write_u32(0); + + assert_eq!(out.len() - header_start, 0x70, "header must be exactly 112 bytes"); + map.push(MapEntry { type_code: 0x0000, count: 1, offset: header_start as u32 }); + + // -- string_ids -------------------------------------------------------- + let string_ids_off = out.len() as u32; + out.patch_u32(string_ids_off_pos, string_ids_off); + // One u32 per string: placeholder, patched when string_data is written. + let mut string_data_off_positions: Vec = Vec::with_capacity(pools.strings.len()); + for _ in &pools.strings { + string_data_off_positions.push(out.reserve_u32()); + } + if !pools.strings.is_empty() { + map.push(MapEntry { + type_code: 0x0001, + count: pools.strings.len() as u32, + offset: string_ids_off, + }); + } + + // -- type_ids ---------------------------------------------------------- + let type_ids_off = out.len() as u32; + out.patch_u32(type_ids_off_pos, type_ids_off); + for type_mutf8 in &pools.types { + let s_idx = pools.string_idx[type_mutf8]; + out.write_u32(s_idx); + } + if !pools.types.is_empty() { + map.push(MapEntry { + type_code: 0x0002, + count: pools.types.len() as u32, + offset: type_ids_off, + }); + } + + // -- proto_ids --------------------------------------------------------- + let proto_ids_off = out.len() as u32; + out.patch_u32(proto_ids_off_pos, proto_ids_off); + let mut proto_params_off_positions: Vec = Vec::with_capacity(pools.protos.len()); + for proto in &pools.protos { + let shorty_s = proto.shorty(); + let shorty_idx = pools.string_idx_of(&shorty_s); + let return_idx = pools.type_idx_of(&proto.return_type) as u16; + out.write_u32(shorty_idx); + out.write_u16(return_idx); + out.write_u16(0); // pad + proto_params_off_positions.push(out.reserve_u32()); // parameters_off placeholder + } + if !pools.protos.is_empty() { + map.push(MapEntry { + type_code: 0x0003, + count: pools.protos.len() as u32, + offset: proto_ids_off, + }); + } + + // -- field_ids --------------------------------------------------------- + let field_ids_off = out.len() as u32; + out.patch_u32(field_ids_off_pos, field_ids_off); + for (class_m, name_m, type_m) in &pools.fields { + let class_tidx = pools.type_idx[class_m] as u16; + let type_tidx = pools.type_idx[type_m] as u16; + let name_sidx = pools.string_idx[name_m]; + out.write_u16(class_tidx); + out.write_u16(type_tidx); + out.write_u32(name_sidx); + } + if !pools.fields.is_empty() { + map.push(MapEntry { + type_code: 0x0004, + count: pools.fields.len() as u32, + offset: field_ids_off, + }); + } + + // -- method_ids -------------------------------------------------------- + let method_ids_off = out.len() as u32; + out.patch_u32(method_ids_off_pos, method_ids_off); + for (class_m, name_m, proto_key) in &pools.methods { + let class_tidx = pools.type_idx[class_m] as u16; + let proto_pidx = pools.proto_idx[proto_key] as u16; + let name_sidx = pools.string_idx[name_m]; + out.write_u16(class_tidx); + out.write_u16(proto_pidx); + out.write_u32(name_sidx); + } + if !pools.methods.is_empty() { + map.push(MapEntry { + type_code: 0x0005, + count: pools.methods.len() as u32, + offset: method_ids_off, + }); + } + + // -- class_defs -------------------------------------------------------- + // Sort class_defs: topological ordering by descriptor (alphabetical gives + // a valid ordering for typical class hierarchies; a proper topological + // sort would be needed for full correctness when superclasses appear later). + let mut sorted_classes = ir.classes; + sorted_classes.sort_by(|a, b| a.descriptor.cmp(&b.descriptor)); + + let class_defs_off = out.len() as u32; + out.patch_u32(class_defs_off_pos, class_defs_off); + + // For each class we'll need to backpatch: + // interfaces_off (word at class_def + 12) + // class_data_off (word at class_def + 24) + struct ClassSlots { + interfaces_off_pos: usize, + class_data_off_pos: usize, + } + let mut class_slots: Vec = Vec::with_capacity(sorted_classes.len()); + + for class in &sorted_classes { + let class_tidx = pools.type_idx_of(&class.descriptor) as u16; + let super_tidx: u16 = class + .superclass + .as_deref() + .map(|s| pools.type_idx_of(s) as u16) + .unwrap_or(0xFFFF); + let source_sidx: u32 = class + .source_file + .as_deref() + .map(|s| pools.string_idx_of(s)) + .unwrap_or(0xFFFF_FFFF); + + // class_idx (u16) + pad (u16) + out.write_u16(class_tidx); + out.write_u16(0); + // access_flags (u32) + out.write_u32(class.access_flags); + // superclass_idx (u16) + pad (u16) + out.write_u16(super_tidx); + out.write_u16(0); + // interfaces_off placeholder + let interfaces_off_pos = out.reserve_u32(); + // source_file_idx + out.write_u32(source_sidx); + // annotations_off = 0 + out.write_u32(0); + // class_data_off placeholder + let class_data_off_pos = out.reserve_u32(); + // static_values_off = 0 (TODO: encode static values) + out.write_u32(0); + + class_slots.push(ClassSlots { interfaces_off_pos, class_data_off_pos }); + } + if !sorted_classes.is_empty() { + map.push(MapEntry { + type_code: 0x0006, + count: sorted_classes.len() as u32, + offset: class_defs_off, + }); + } + + // ═══ DATA SECTION begins here ═══════════════════════════════════════ + let data_off = out.len() as u32; + out.patch_u32(data_off_pos, data_off); + + // -- type_lists (proto parameters + class interfaces) ------------------ + // Build a deduplication map so identical type lists share one entry. + let mut type_list_cache: HashMap, u32> = HashMap::new(); + + let write_type_list = |out: &mut Out, + _map: &mut Vec, + type_list_cache: &mut HashMap, u32>, + type_indices: Vec| -> u32 { + if type_indices.is_empty() { + return 0; + } + if let Some(&off) = type_list_cache.get(&type_indices) { + return off; + } + out.align4(); + let off = out.len() as u32; + out.write_u32(type_indices.len() as u32); + for tidx in &type_indices { + out.write_u16(*tidx); + } + type_list_cache.insert(type_indices, off); + // map entry updated after all lists are written + off + }; + + // Write proto type lists and patch proto_ids. + let mut type_list_offsets: Vec = Vec::with_capacity(pools.protos.len()); + for proto in &pools.protos { + let type_indices: Vec = + proto.params.iter().map(|p| pools.type_idx_of(p) as u16).collect(); + let off = write_type_list( + &mut out, + &mut map, + &mut type_list_cache, + type_indices, + ); + type_list_offsets.push(off); + } + for (i, off) in type_list_offsets.iter().enumerate() { + out.patch_u32(proto_params_off_positions[i], *off); + } + // Write class interface lists and patch class_defs. + for (i, class) in sorted_classes.iter().enumerate() { + let type_indices: Vec = + class.interfaces.iter().map(|iface| pools.type_idx_of(iface) as u16).collect(); + let off = write_type_list( + &mut out, + &mut map, + &mut type_list_cache, + type_indices, + ); + out.patch_u32(class_slots[i].interfaces_off_pos, off); + } + // Count type list entries for the map. + let type_list_count = type_list_cache.len() as u32; + if type_list_count > 0 { + // Find the earliest type list offset for the map entry. + let min_off = *type_list_cache.values().min().unwrap(); + map.push(MapEntry { type_code: 0x1001, count: type_list_count, offset: min_off }); + } + + // -- code_items -------------------------------------------------------- + // Build method -> code_off map; used later when writing class_data. + let mut code_offsets: HashMap<(usize, bool, usize), u32> = HashMap::new(); + // Key: (class_index, is_virtual, method_index_in_list) + let mut code_item_count = 0u32; + let mut code_items_first_off = 0u32; + + for (ci, class) in sorted_classes.iter().enumerate() { + for (is_virtual, methods) in + [(false, &class.direct_methods), (true, &class.virtual_methods)] + { + for (mi, method) in methods.iter().enumerate() { + if let Some(code) = &method.code { + out.align4(); + let off = out.len() as u32; + if code_item_count == 0 { + code_items_first_off = off; + } + code_offsets.insert((ci, is_virtual, mi), off); + write_code_item(&mut out, code, &pools); + code_item_count += 1; + } + } + } + } + if code_item_count > 0 { + map.push(MapEntry { + type_code: 0x2001, + count: code_item_count, + offset: code_items_first_off, + }); + } + + // -- class_data_items -------------------------------------------------- + let mut class_data_count = 0u32; + let mut class_data_first_off = 0u32; + + for (ci, class) in sorted_classes.iter().enumerate() { + let has_data = !class.static_fields.is_empty() + || !class.instance_fields.is_empty() + || !class.direct_methods.is_empty() + || !class.virtual_methods.is_empty(); + if !has_data { + continue; + } + let cdi_off = out.len() as u32; + if class_data_count == 0 { + class_data_first_off = cdi_off; + } + out.patch_u32(class_slots[ci].class_data_off_pos, cdi_off); + class_data_count += 1; + + write_class_data( + &mut out, + class, + &pools, + &code_offsets, + ci, + ); + } + if class_data_count > 0 { + map.push(MapEntry { + type_code: 0x2000, + count: class_data_count, + offset: class_data_first_off, + }); + } + + // -- string_data_items ------------------------------------------------- + let string_data_first_off = out.len() as u32; + for (i, mutf8) in pools.strings.iter().enumerate() { + let sdi_off = out.len() as u32; + out.patch_u32(string_data_off_positions[i], sdi_off); + // ULEB128 UTF-16 length + let utf16_len = mutf8_to_utf16_len(mutf8); + out.write_uleb128(utf16_len as u32); + // MUTF-8 bytes + out.write_bytes(mutf8); + // null terminator + out.write_u8(0); + } + if !pools.strings.is_empty() { + map.push(MapEntry { + type_code: 0x2002, + count: pools.strings.len() as u32, + offset: string_data_first_off, + }); + } + + // -- map_list ---------------------------------------------------------- + out.align4(); + let map_off = out.len() as u32; + out.patch_u32(map_off_pos, map_off); + + // Sort map by type code (required by DEX spec). + // Add the map_list entry itself. + map.push(MapEntry { type_code: 0x1000, count: 1, offset: map_off }); + map.sort_by_key(|e| e.type_code); + + out.write_u32(map.len() as u32); + for entry in &map { + out.write_u16(entry.type_code); + out.write_u16(0); // unused + out.write_u32(entry.count); + out.write_u32(entry.offset); + } + + // -- Finalise header --------------------------------------------------- + let file_size = out.len() as u32; + out.patch_u32(file_size_pos, file_size); + + let data_size = file_size - data_off; + out.patch_u32(data_size_pos, data_size); + + // Patch remaining header offset fields (already patched above, but + // the ones that might have been left as 0 for empty sections). + if pools.strings.is_empty() { + out.patch_u32(string_ids_off_pos, 0); + } + if pools.types.is_empty() { + out.patch_u32(type_ids_off_pos, 0); + } + if pools.protos.is_empty() { + out.patch_u32(proto_ids_off_pos, 0); + } + if pools.fields.is_empty() { + out.patch_u32(field_ids_off_pos, 0); + } + if pools.methods.is_empty() { + out.patch_u32(method_ids_off_pos, 0); + } + if sorted_classes.is_empty() { + out.patch_u32(class_defs_off_pos, 0); + } + + // Checksum + update_checksum(&mut out.data); + + let _ = checksum_pos; // Written by update_checksum + + Ok(out.data) + } +} + +// -- Code item serialisation --------------------------------------------------- + +fn write_code_item(out: &mut Out, code: &CodeDef, pools: &Pools) { + // Encode all InsnNodes, resolving pool references now. + let mut encoded: Vec = Vec::with_capacity(code.insns.len() * 2); + for node in &code.insns { + let ref_idx = node.reference.as_ref().map(|r| pools.ref_idx(r)); + let branch_offset = node.target.as_ref().and_then(|t| match t { + BranchTarget::Offset(o) => Some(*o), + BranchTarget::Label(_) => None, // should not reach here after build() + }); + let words = encode_insn(node.opcode, &node.regs, node.literal, ref_idx, branch_offset) + .expect("instruction encoding failed after successful build()"); + encoded.extend(words); + } + + out.write_u16(code.registers); + out.write_u16(code.ins); + out.write_u16(code.outs); + out.write_u16(code.tries.len() as u16); + out.write_u32(0); // debug_info_off = 0 + out.write_u32(encoded.len() as u32); + for w in &encoded { + out.write_u16(*w); + } + if !code.tries.is_empty() { + if !encoded.len().is_multiple_of(2) { + out.write_u16(0); + } + write_try_items(out, &code.tries); + } +} + +fn write_try_items(out: &mut Out, tries: &[TryDef]) { + // Write try_item array + for t in tries { + out.write_u32(t.start); + out.write_u16(t.count); + // handler_off filled after handler list is written; for now write 0 and skip + out.write_u16(0); + } + // Encoded catch handler list (1 byte: handler count per try block) + // This is a simplified implementation; full handler encoding is complex. + out.write_uleb128(tries.len() as u32); + for t in tries { + let n = t.handlers.len(); + let has_catch_all = t.handlers.iter().any(|h| h.type_desc.is_none()); + let typed = t.handlers.iter().filter(|h| h.type_desc.is_some()).count(); + let sleb_val = if has_catch_all { -(typed as i32) } else { typed as i32 }; + out.write_sleb128(sleb_val); + for h in &t.handlers { + if h.type_desc.is_some() { + out.write_uleb128(0); // type_idx placeholder + out.write_uleb128(h.address); + } + } + if has_catch_all { + let catch_all = t.handlers.iter().find(|h| h.type_desc.is_none()).unwrap(); + out.write_uleb128(catch_all.address); + } + let _ = n; + } +} + +// -- Class data item serialisation --------------------------------------------- + +fn write_class_data( + out: &mut Out, + class: &ClassDef, + pools: &Pools, + code_offsets: &HashMap<(usize, bool, usize), u32>, + ci: usize, +) { + out.write_uleb128(class.static_fields.len() as u32); + out.write_uleb128(class.instance_fields.len() as u32); + out.write_uleb128(class.direct_methods.len() as u32); + out.write_uleb128(class.virtual_methods.len() as u32); + + // Fields use delta-encoding of field_idx + let mut prev_idx = 0u32; + for f in &class.static_fields { + let fidx = pools.field_idx_of(&class.descriptor, &f.name, &f.field_type); + out.write_uleb128(fidx - prev_idx); + out.write_uleb128(f.access_flags); + prev_idx = fidx; + } + prev_idx = 0; + for f in &class.instance_fields { + let fidx = pools.field_idx_of(&class.descriptor, &f.name, &f.field_type); + out.write_uleb128(fidx - prev_idx); + out.write_uleb128(f.access_flags); + prev_idx = fidx; + } + + // Methods use delta-encoding of method_idx + prev_idx = 0; + for (mi, m) in class.direct_methods.iter().enumerate() { + let midx = pools.method_idx_of(&class.descriptor, &m.name, &m.proto); + out.write_uleb128(midx - prev_idx); + out.write_uleb128(m.access_flags); + let code_off = code_offsets.get(&(ci, false, mi)).copied().unwrap_or(0); + out.write_uleb128(code_off); + prev_idx = midx; + } + prev_idx = 0; + for (mi, m) in class.virtual_methods.iter().enumerate() { + let midx = pools.method_idx_of(&class.descriptor, &m.name, &m.proto); + out.write_uleb128(midx - prev_idx); + out.write_uleb128(m.access_flags); + let code_off = code_offsets.get(&(ci, true, mi)).copied().unwrap_or(0); + out.write_uleb128(code_off); + prev_idx = midx; + } +} + +// -- EncodedValue serialisation ----------------------------------------------- + +#[allow(dead_code)] +fn _write_encoded_value(out: &mut Out, v: &EncodedValueIr, pools: &Pools) { + match v { + EncodedValueIr::Byte(b) => { + out.write_u8(0x00); // VALUE_BYTE, arg=0 + out.write_u8(*b as u8); + } + EncodedValueIr::Int(i) => { + let bytes = i.to_le_bytes(); + let size = value_size(*i as i64); + out.write_u8(0x04 | ((size - 1) << 5)); // VALUE_INT + out.write_bytes(&bytes[..size as usize]); + } + EncodedValueIr::Long(l) => { + let bytes = l.to_le_bytes(); + let size = value_size(*l); + out.write_u8(0x06 | ((size - 1) << 5)); // VALUE_LONG + out.write_bytes(&bytes[..size as usize]); + } + EncodedValueIr::Boolean(b) => { + out.write_u8(0x1f | ((*b as u8) << 5)); // VALUE_BOOLEAN + } + EncodedValueIr::String(s) => { + let idx = pools.string_idx_of(s); + let bytes = idx.to_le_bytes(); + let size = unsigned_value_size(idx as i64); + out.write_u8(0x17 | ((size - 1) << 5)); + out.write_bytes(&bytes[..size as usize]); + } + EncodedValueIr::Null => { + out.write_u8(0x1e); // VALUE_NULL, arg=0 + } + _ => { + // Fallback: write null for unsupported types + out.write_u8(0x1e); + } + } +} + +#[allow(dead_code)] +fn value_size(v: i64) -> u8 { + if v >= i8::MIN as i64 && v <= i8::MAX as i64 { + 1 + } else if v >= i16::MIN as i64 && v <= i16::MAX as i64 { + 2 + } else if v >= i32::MIN as i64 && v <= i32::MAX as i64 { + 4 + } else { + 8 + } +} + +#[allow(dead_code)] +fn unsigned_value_size(v: i64) -> u8 { + if v <= 0xFF { + 1 + } else if v <= 0xFFFF { + 2 + } else if v <= 0xFF_FFFF { + 3 + } else { + 4 + } +} + +// -- UTF-16 length computation ------------------------------------------------- + +/// Count UTF-16 code units for a MUTF-8 byte sequence (without null terminator). +fn mutf8_to_utf16_len(mutf8: &[u8]) -> usize { + let mut i = 0; + let mut count = 0; + while i < mutf8.len() { + let b = mutf8[i]; + if b & 0x80 == 0 { + i += 1; + } else if b & 0xE0 == 0xC0 { + // 2-byte sequence (includes MUTF-8 null 0xC0 0x80) + i += 2; + } else if b & 0xF0 == 0xE0 { + // 3-byte: single UTF-16 unit (including surrogate pairs from MUTF-8) + i += 3; + } else { + i += 1; // fallback + } + count += 1; + } + count +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::file::{ + ir::{ClassDef, MethodDef, ProtoKey}, + modifiers::{ACC_PUBLIC, ACC_STATIC}, + builder::CodeBuilder, + DexFile, DexLocation, + }; + + #[test] + fn write_empty_dex() { + let ir = DexIr::new(35); + let bytes = DexWriter::write(ir).unwrap(); + let slice = bytes.as_slice(); + let dex = DexFile::from_raw_parts(&slice, DexLocation::InMemory).unwrap(); + assert_eq!(dex.get_header().get_version(), 35); + } + + #[test] + fn write_class_no_methods() { + let mut ir = DexIr::new(35); + ir.add_class( + ClassDef::new("Lcom/example/Empty;") + .access(ACC_PUBLIC) + .superclass("Ljava/lang/Object;"), + ); + let bytes = DexWriter::write(ir).unwrap(); + let slice = bytes.as_slice(); + let dex = DexFile::from_raw_parts(&slice, DexLocation::InMemory).unwrap(); + assert_eq!(dex.num_class_defs(), 1); + } + + #[test] + fn write_class_with_method_and_string_ref() { + // Builds a class with a method that references a string constant. + // This exercises DexRef resolution through collect_code_refs + write_code_item. + let mut ir = DexIr::new(35); + let mut class = ClassDef::new("Ltest/Hello;") + .access(ACC_PUBLIC) + .superclass("Ljava/lang/Object;"); + + let mut code = CodeBuilder::new(3, 1, 2); + code.emit(r#"sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;"#).unwrap(); + code.emit(r#"const-string v1, "Hello, DEX!""#).unwrap(); + code.emit(r#"invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V"#).unwrap(); + code.emit("return-void").unwrap(); + + class.add_direct_method( + MethodDef::new("main", ProtoKey::new("V", ["[Ljava/lang/String;"])) + .access(ACC_PUBLIC | ACC_STATIC) + .code(code.build().unwrap()), + ); + + ir.add_class(class); + let bytes = DexWriter::write(ir).unwrap(); + let slice = bytes.as_slice(); + let dex = DexFile::from_raw_parts(&slice, DexLocation::InMemory).unwrap(); + + assert_eq!(dex.num_class_defs(), 1); + // "Hello, DEX!" must be in the string pool + let found = (0..dex.num_string_ids()) + .any(|i| dex.get_str_at(i).ok().as_deref() == Some("Hello, DEX!")); + assert!(found, "string 'Hello, DEX!' not found in pool"); + } +} diff --git a/src/leb128.rs b/src/leb128.rs index 35cc240..91a5f6b 100644 --- a/src/leb128.rs +++ b/src/leb128.rs @@ -36,26 +36,3 @@ pub fn decode_leb128p1_off(data_in: &[u8], ptr_pos: &mut usize) -> Result { pub fn decode_sleb128(data_in: &[u8], ptr_pos: &mut usize) -> Result { Ok(leb128fmt::decode_sint_slice::(data_in, ptr_pos)?) } - -// python exports -#[cfg(feature = "python")] -#[pyo3::pymodule(name = "leb128")] -pub(crate) mod py_leb128 { - use pyo3::PyResult; - - #[pyo3::pyfunction] - pub fn decode_uleb128(data_in: &[u8]) -> PyResult<(u32, usize)> { - Ok(super::decode_leb128::(data_in)?) - } - - #[pyo3::pyfunction] - pub fn decode_sleb128(data_in: &[u8]) -> PyResult { - Ok(super::decode_sleb128(data_in, &mut 0)?) - } - - #[pyo3::pyfunction] - pub fn decode_leb128p1(data_in: &[u8]) -> PyResult<(i32, usize)> { - Ok(super::decode_leb128p1(data_in)?) - } -} -// end python exports diff --git a/src/lib.rs b/src/lib.rs index eddb7d9..de213d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,8 +4,11 @@ pub mod error; pub mod file; pub mod leb128; pub mod utf; +#[cfg(feature = "vdex")] +pub mod vdex; pub mod desc_names; +pub mod primitive; pub type Result = result::Result; @@ -17,26 +20,38 @@ pub(crate) mod py; mod _internal { #[pymodule_export] - use crate::file::container::py_container; + use crate::py::container::py_container; #[pymodule_export] use crate::py::file::py_file; #[pymodule_export] - use crate::error::py_error; + use crate::py::error::py_error; #[pymodule_export] - use crate::file::structs::py_structs; + use crate::py::structs::py_structs; #[pymodule_export] - use crate::utf::py_utf; + use crate::py::utf::py_utf; #[pymodule_export] - use crate::leb128::py_leb128; + use crate::py::leb128::py_leb128; #[pymodule_export] - use crate::file::class_accessor::py_class_accessor; + use crate::py::class_accessor::py_class_accessor; #[pymodule_export] use crate::file::instruction::py_code; + + #[pymodule_export] + use crate::py::primitive::py_primitive; + + #[pymodule_export] + use crate::py::type_lookup_table::py_type_lookup_table; + + #[pymodule_export] + use crate::py::editor::py_editor; + + #[pymodule_export] + use crate::py::builder::py_builder; } diff --git a/src/primitive.rs b/src/primitive.rs new file mode 100644 index 0000000..6c34537 --- /dev/null +++ b/src/primitive.rs @@ -0,0 +1,151 @@ +/// Java primitive type classification, matching ART's `Primitive` class. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum PrimitiveType { + /// Reference (non-primitive) type. + Not = 0, + Boolean = 1, + Byte = 2, + Char = 3, + Short = 4, + Int = 5, + Long = 6, + Float = 7, + Double = 8, + Void = 9, +} + +impl PrimitiveType { + /// Returns the `PrimitiveType` for the given JVM descriptor character. + pub fn from_char(c: char) -> Self { + match c { + 'Z' => Self::Boolean, + 'B' => Self::Byte, + 'C' => Self::Char, + 'S' => Self::Short, + 'I' => Self::Int, + 'J' => Self::Long, + 'F' => Self::Float, + 'D' => Self::Double, + 'V' => Self::Void, + _ => Self::Not, + } + } + + /// Returns the single-char DEX type descriptor, or `None` for `Not`. + pub fn descriptor(self) -> Option<&'static str> { + match self { + Self::Boolean => Some("Z"), + Self::Byte => Some("B"), + Self::Char => Some("C"), + Self::Short => Some("S"), + Self::Int => Some("I"), + Self::Long => Some("J"), + Self::Float => Some("F"), + Self::Double => Some("D"), + Self::Void => Some("V"), + Self::Not => None, + } + } + + /// Returns the fully-qualified descriptor for the boxed version of this type, or `None` for `Not`. + pub fn boxed_descriptor(self) -> Option<&'static str> { + match self { + Self::Boolean => Some("Ljava/lang/Boolean;"), + Self::Byte => Some("Ljava/lang/Byte;"), + Self::Char => Some("Ljava/lang/Character;"), + Self::Short => Some("Ljava/lang/Short;"), + Self::Int => Some("Ljava/lang/Integer;"), + Self::Long => Some("Ljava/lang/Long;"), + Self::Float => Some("Ljava/lang/Float;"), + Self::Double => Some("Ljava/lang/Double;"), + Self::Void => Some("Ljava/lang/Void;"), + Self::Not => None, + } + } + + /// Returns the storage size in bytes (0 for `Void`, 4 for object references). + pub fn component_size(self) -> usize { + match self { + Self::Void => 0, + Self::Boolean | Self::Byte => 1, + Self::Char | Self::Short => 2, + Self::Int | Self::Float | Self::Not => 4, + Self::Long | Self::Double => 8, + } + } + + /// Returns `log2(component_size())`. + pub fn component_size_shift(self) -> u32 { + match self { + Self::Void | Self::Boolean | Self::Byte => 0, + Self::Char | Self::Short => 1, + Self::Int | Self::Float | Self::Not => 2, + Self::Long | Self::Double => 3, + } + } + + /// Returns `true` for numeric primitive types (byte/char/short/int/long/float/double). + pub fn is_numeric(self) -> bool { + matches!( + self, + Self::Byte | Self::Char | Self::Short | Self::Int | Self::Long | Self::Float | Self::Double + ) + } + + /// Returns `true` for 64-bit types (`long` or `double`). + pub fn is_64bit(self) -> bool { + matches!(self, Self::Long | Self::Double) + } + + /// Returns `true` if this is any primitive type (not `Not`). + pub fn is_primitive(self) -> bool { + !matches!(self, Self::Not) + } + + /// Returns the human-readable Java type name (e.g. `"int"`, `"boolean"`, `"Object"`). + pub fn pretty_name(self) -> &'static str { + match self { + Self::Not => "Object", + Self::Boolean => "boolean", + Self::Byte => "byte", + Self::Char => "char", + Self::Short => "short", + Self::Int => "int", + Self::Long => "long", + Self::Float => "float", + Self::Double => "double", + Self::Void => "void", + } + } +} + +impl std::fmt::Display for PrimitiveType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.pretty_name()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn roundtrip_descriptors() { + for pt in [ + PrimitiveType::Boolean, + PrimitiveType::Byte, + PrimitiveType::Char, + PrimitiveType::Short, + PrimitiveType::Int, + PrimitiveType::Long, + PrimitiveType::Float, + PrimitiveType::Double, + PrimitiveType::Void, + ] { + let desc = pt.descriptor().unwrap(); + let back = PrimitiveType::from_char(desc.chars().next().unwrap()); + assert_eq!(pt, back); + } + assert_eq!(PrimitiveType::from_char('X'), PrimitiveType::Not); + } +} diff --git a/src/py.rs b/src/py.rs index 44aff67..6b8451c 100644 --- a/src/py.rs +++ b/src/py.rs @@ -1,4 +1,14 @@ pub(crate) mod file; +pub(crate) mod leb128; +pub(crate) mod utf; +pub(crate) mod error; +pub(crate) mod container; +pub(crate) mod class_accessor; +pub(crate) mod structs; +pub(crate) mod primitive; +pub(crate) mod type_lookup_table; +pub(crate) mod editor; +pub(crate) mod builder; macro_rules! rs_type_wrapper { ($src_type:ty, $py_type:ident, $rs_type:ident, name: $name:literal, module: $module:literal) => { diff --git a/src/py/builder.rs b/src/py/builder.rs new file mode 100644 index 0000000..63ecce5 --- /dev/null +++ b/src/py/builder.rs @@ -0,0 +1,439 @@ +//! Python bindings for the DEX mutation system. +//! +//! Exposes [`DexIrBuilder`], [`IrClassDef`], [`IrMethodDef`], [`IrFieldDef`], +//! [`CodeBuilder`], [`CodeDef`] and [`ProtoKey`] to Python under +//! `dexrs._internal.builder`. +//! +//! # Python Quick-start +//! +//! ```python +//! from dexrs.builder import DexIrBuilder, IrClassDef, IrMethodDef, CodeBuilder +//! +//! # 1. Build a class +//! cls = IrClassDef("Lhello/World;") +//! cls.set_superclass("Ljava/lang/Object;") +//! cls.set_access(0x0001) # ACC_PUBLIC +//! +//! # 2. Assemble a method body +//! code = CodeBuilder(registers=3, ins=1, outs=2) +//! code.emit('sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;') +//! code.emit('const-string v1, "Hello!"') +//! code.emit('invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V') +//! code.emit('return-void') +//! +//! # 3. Attach method to class +//! method = IrMethodDef("main", "([Ljava/lang/String;)V", 0x0009) +//! method.set_code(code.build()) +//! cls.add_direct_method(method) +//! +//! # 4. Assemble the DEX +//! builder = DexIrBuilder(version=35) +//! builder.add_class(cls) +//! dex_bytes = builder.write() # -> bytes +//! ``` + +use pyo3::{exceptions::PyValueError, prelude::*, types::PyBytes}; + +use crate::file::{ + builder::CodeBuilder, + ir::{ClassDef as IrClassDef, CodeDef, DexIr, FieldDef as IrFieldDef, MethodDef as IrMethodDef, ProtoKey}, + writer::DexWriter, +}; + +// -- ProtoKey ----------------------------------------------------------------- + +/// A method prototype: return type + parameter types. +/// +/// ```python +/// from dexrs._internal.builder import ProtoKey +/// p = ProtoKey("V", ["I", "Ljava/lang/String;"]) +/// assert p.shorty() == "VIL" +/// ``` +#[pyclass(name = "ProtoKey", module = "dexrs._internal.builder")] +pub struct PyProtoKey { + pub(crate) inner: ProtoKey, +} + +#[pymethods] +impl PyProtoKey { + /// ``ProtoKey(return_type, params)`` + #[new] + pub fn new(return_type: &str, params: Vec) -> Self { + Self { inner: ProtoKey::new(return_type, params) } + } + + /// Parse a JVM method descriptor such as ``"([Ljava/lang/String;)V"`` into + /// a :class:`ProtoKey`. Returns ``None`` if the descriptor is malformed. + #[staticmethod] + pub fn from_descriptor(desc: &str) -> Option { + ProtoKey::from_descriptor(desc).map(|p| Self { inner: p }) + } + + /// The return type descriptor, e.g. ``"V"`` or ``"Ljava/lang/String;"``. + #[getter] + pub fn return_type(&self) -> &str { + &self.inner.return_type + } + + /// List of parameter type descriptors. + #[getter] + pub fn params(&self) -> Vec { + self.inner.params.clone() + } + + /// Compute the shorty descriptor (e.g. ``"VIL"`` for ``(I Ljava/lang/String;)V``). + pub fn shorty(&self) -> String { + self.inner.shorty() + } + + pub fn __repr__(&self) -> String { + format!("ProtoKey({:?}, {:?})", self.inner.return_type, self.inner.params) + } +} + +// -- CodeDef ------------------------------------------------------------------ + +/// A resolved code item produced by :meth:`CodeBuilder.build`. +/// +/// Attach to a method with :meth:`IrMethodDef.set_code`. +#[pyclass(name = "CodeDef", module = "dexrs._internal.builder")] +pub struct PyCodeDef { + pub(crate) inner: CodeDef, +} + +#[pymethods] +impl PyCodeDef { + #[getter] + pub fn registers(&self) -> u16 { + self.inner.registers + } + #[getter] + pub fn ins(&self) -> u16 { + self.inner.ins + } + #[getter] + pub fn outs(&self) -> u16 { + self.inner.outs + } + #[getter] + pub fn insns_count(&self) -> usize { + self.inner.insns.len() + } + + pub fn __repr__(&self) -> String { + format!( + "CodeDef(registers={}, ins={}, outs={}, insns={})", + self.inner.registers, + self.inner.ins, + self.inner.outs, + self.inner.insns.len() + ) + } +} + +// -- CodeBuilder -------------------------------------------------------------- + +/// Assembles DEX bytecode from disassembly text. +/// +/// Usage:: +/// +/// code = CodeBuilder(registers=2, ins=1, outs=0) +/// code.emit("const/4 v0, #1") +/// code.emit("return v0") +/// code_def = code.build() +/// +/// The builder is consumed by :meth:`build`; any further calls raise +/// :exc:`ValueError`. +#[pyclass(name = "CodeBuilder", module = "dexrs._internal.builder")] +pub struct PyCodeBuilder { + inner: Option, +} + +#[pymethods] +impl PyCodeBuilder { + /// ``CodeBuilder(registers, ins, outs)`` + /// + /// :param registers: Total number of registers (locals + params). + /// :param ins: Number of incoming parameter registers. + /// :param outs: Number of registers required for outgoing calls. + #[new] + pub fn new(registers: u16, ins: u16, outs: u16) -> Self { + Self { inner: Some(CodeBuilder::new(registers, ins, outs)) } + } + + /// Parse and emit one disassembly line. + /// + /// :param line: A single Dalvik disassembly line such as + /// ``"invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V"``. + /// :raises ValueError: If the line cannot be parsed. + pub fn emit(&mut self, line: &str) -> PyResult<()> { + self.inner + .as_mut() + .ok_or_else(|| PyValueError::new_err("CodeBuilder already consumed by build()"))? + .emit(line) + .map_err(|e| PyValueError::new_err(e.to_string())) + } + + /// Place a named label at the current instruction position. + /// + /// Reference the label in branch instructions as ``:label``, e.g. + /// ``"if-eqz v0, :end"``. + pub fn label(&mut self, name: &str) -> PyResult<()> { + self.inner + .as_mut() + .ok_or_else(|| PyValueError::new_err("CodeBuilder already consumed by build()"))? + .label(name); + Ok(()) + } + + /// Resolve branches and return a :class:`CodeDef`. + /// + /// The builder is consumed and cannot be used after this call. + /// + /// :raises ValueError: If a referenced label is undefined. + pub fn build(&mut self) -> PyResult { + let builder = self + .inner + .take() + .ok_or_else(|| PyValueError::new_err("CodeBuilder already consumed by build()"))?; + let code = builder.build().map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(PyCodeDef { inner: code }) + } + + pub fn __repr__(&self) -> &str { + if self.inner.is_some() { "CodeBuilder(active)" } else { "CodeBuilder(consumed)" } + } +} + +// -- IrFieldDef --------------------------------------------------------------- + +/// A field declaration inside a class IR. +/// +/// Normally created via :meth:`IrClassDef.add_static_field` / +/// :meth:`IrClassDef.add_instance_field` rather than directly. +#[pyclass(name = "IrFieldDef", module = "dexrs._internal.builder")] +pub struct PyIrFieldDef { + pub(crate) inner: IrFieldDef, +} + +#[pymethods] +impl PyIrFieldDef { + /// ``IrFieldDef(name, field_type, access_flags=0)`` + #[new] + #[pyo3(signature = (name, field_type, access_flags = 0))] + pub fn new(name: &str, field_type: &str, access_flags: u32) -> Self { + Self { inner: IrFieldDef::new(name, field_type).access(access_flags) } + } + + #[getter] + pub fn name(&self) -> &str { &self.inner.name } + #[getter] + pub fn field_type(&self) -> &str { &self.inner.field_type } + #[getter] + pub fn access_flags(&self) -> u32 { self.inner.access_flags } + #[setter] + pub fn set_access_flags(&mut self, v: u32) { self.inner.access_flags = v; } +} + +// -- IrMethodDef -------------------------------------------------------------- + +/// A method declaration (optionally with a body). +/// +/// ```python +/// method = IrMethodDef("", "()V", 0x10001) # constructor, public +/// code = CodeBuilder(registers=1, ins=1, outs=0) +/// code.emit("return-void") +/// method.set_code(code.build()) +/// ``` +#[pyclass(name = "IrMethodDef", module = "dexrs._internal.builder")] +pub struct PyIrMethodDef { + pub(crate) inner: IrMethodDef, +} + +#[pymethods] +impl PyIrMethodDef { + /// ``IrMethodDef(name, descriptor, access_flags)`` + /// + /// :param name: Method name, e.g. ``"main"`` or ``""``. + /// :param descriptor: JVM method descriptor, e.g. ``"([Ljava/lang/String;)V"``. + /// :param access_flags: Access flags (``ACC_PUBLIC`` = 0x0001, etc.). + #[new] + #[pyo3(signature = (name, descriptor, access_flags = 0))] + pub fn new(name: &str, descriptor: &str, access_flags: u32) -> Self { + let proto = ProtoKey::from_descriptor(descriptor) + .unwrap_or_else(|| ProtoKey::new("V", [] as [&str; 0])); + Self { inner: IrMethodDef::new(name, proto).access(access_flags) } + } + + #[getter] + pub fn name(&self) -> &str { &self.inner.name } + #[getter] + pub fn access_flags(&self) -> u32 { self.inner.access_flags } + #[setter] + pub fn set_access_flags(&mut self, v: u32) { self.inner.access_flags = v; } + + /// Attach a :class:`CodeDef` as this method's body. + pub fn set_code(&mut self, code: &PyCodeDef) { + self.inner.code = Some(code.inner.clone()); + } + + pub fn __repr__(&self) -> String { + format!("IrMethodDef({:?}, {})", self.inner.name, self.inner.proto.shorty()) + } +} + +// -- IrClassDef --------------------------------------------------------------- + +/// A complete class definition for the DEX IR. +/// +/// ```python +/// cls = IrClassDef("Lcom/example/Foo;") +/// cls.set_access(0x0001) +/// cls.set_superclass("Ljava/lang/Object;") +/// cls.add_instance_field("mValue", "I", 0x0002) # private int mValue +/// cls.add_direct_method(constructor_method) +/// cls.add_virtual_method(overridden_method) +/// ``` +#[pyclass(name = "IrClassDef", module = "dexrs._internal.builder")] +pub struct PyIrClassDef { + pub(crate) inner: IrClassDef, +} + +#[pymethods] +impl PyIrClassDef { + /// ``IrClassDef(descriptor)`` + /// + /// :param descriptor: Full DEX type descriptor, e.g. ``"Lcom/example/Foo;"``. + #[new] + pub fn new(descriptor: &str) -> Self { + Self { inner: IrClassDef::new(descriptor) } + } + + #[getter] + pub fn descriptor(&self) -> &str { &self.inner.descriptor } + + /// Set the class access flags (e.g. ``0x0001`` for public). + pub fn set_access(&mut self, flags: u32) { + self.inner.access_flags = flags; + } + + /// Set the superclass descriptor (e.g. ``"Ljava/lang/Object;"``). + pub fn set_superclass(&mut self, desc: &str) { + self.inner.superclass = Some(desc.to_string()); + } + + /// Add an implemented interface. + pub fn add_interface(&mut self, desc: &str) { + self.inner.interfaces.push(desc.to_string()); + } + + /// Set the source file name (used in debug info, optional). + pub fn set_source_file(&mut self, name: &str) { + self.inner.source_file = Some(name.to_string()); + } + + /// Add a static field declaration. + #[pyo3(signature = (name, field_type, access_flags = 0))] + pub fn add_static_field(&mut self, name: &str, field_type: &str, access_flags: u32) { + self.inner.static_fields.push(IrFieldDef::new(name, field_type).access(access_flags)); + } + + /// Add an instance field declaration. + #[pyo3(signature = (name, field_type, access_flags = 0))] + pub fn add_instance_field(&mut self, name: &str, field_type: &str, access_flags: u32) { + self.inner.instance_fields.push(IrFieldDef::new(name, field_type).access(access_flags)); + } + + /// Add a direct method (````, ````, or ``static``/``private``). + pub fn add_direct_method(&mut self, method: &PyIrMethodDef) { + self.inner.direct_methods.push(method.inner.clone()); + } + + /// Add a virtual (overridable) method. + pub fn add_virtual_method(&mut self, method: &PyIrMethodDef) { + self.inner.virtual_methods.push(method.inner.clone()); + } + + pub fn __repr__(&self) -> String { + format!("IrClassDef({:?})", self.inner.descriptor) + } +} + +// -- DexIrBuilder ------------------------------------------------------------- + +/// Builds a complete DEX file from scratch. +/// +/// ```python +/// builder = DexIrBuilder(version=35) +/// builder.add_class(cls) +/// dex_bytes = builder.write() +/// +/// # Or equivalently: +/// with open("output.dex", "wb") as f: +/// f.write(builder.write()) +/// ``` +#[pyclass(name = "DexIrBuilder", module = "dexrs._internal.builder")] +pub struct PyDexIrBuilder { + inner: DexIr, +} + +#[pymethods] +impl PyDexIrBuilder { + /// ``DexIrBuilder(version=35)`` + /// + /// :param version: DEX version integer. Use ``35`` (Android 5+) unless + /// you specifically need a newer format. + #[new] + #[pyo3(signature = (version = 35))] + pub fn new(version: u32) -> Self { + Self { inner: DexIr::new(version) } + } + + /// Add a class definition to the IR. + /// + /// The class is cloned into the builder; the original :class:`IrClassDef` + /// is still usable after this call. + pub fn add_class(&mut self, cls: &PyIrClassDef) { + self.inner.add_class(cls.inner.clone()); + } + + /// Serialize all classes to a valid DEX byte string. + /// + /// :returns: Raw DEX bytes (``bytes`` object). + /// :raises ValueError: If the IR contains inconsistencies that prevent + /// serialization. + pub fn write<'py>(&self, py: Python<'py>) -> PyResult> { + let bytes = DexWriter::write(self.inner.clone()) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + Ok(PyBytes::new(py, &bytes)) + } + + /// Number of classes currently in the IR. + pub fn class_count(&self) -> usize { + self.inner.classes.len() + } + + pub fn __repr__(&self) -> String { + format!("DexIrBuilder(version={}, classes={})", self.inner.version, self.inner.classes.len()) + } +} + +// -- Module registration ------------------------------------------------------- + +#[pyo3::pymodule(name = "builder")] +pub(crate) mod py_builder { + #[pymodule_export] + use super::PyProtoKey; + #[pymodule_export] + use super::PyCodeDef; + #[pymodule_export] + use super::PyCodeBuilder; + #[pymodule_export] + use super::PyIrFieldDef; + #[pymodule_export] + use super::PyIrMethodDef; + #[pymodule_export] + use super::PyIrClassDef; + #[pymodule_export] + use super::PyDexIrBuilder; +} diff --git a/src/py/class_accessor.rs b/src/py/class_accessor.rs new file mode 100644 index 0000000..d2dc512 --- /dev/null +++ b/src/py/class_accessor.rs @@ -0,0 +1,5 @@ +#[pyo3::pymodule] +pub mod py_class_accessor { + #[pymodule_export] + use crate::file::class_accessor::{PyClassAccessor, PyDexField, PyDexMethod}; +} diff --git a/src/py/container.rs b/src/py/container.rs new file mode 100644 index 0000000..5a6e456 --- /dev/null +++ b/src/py/container.rs @@ -0,0 +1,5 @@ +#[pyo3::pymodule(name = "container")] +pub(crate) mod py_container { + #[pymodule_export] + use crate::file::container::{PyFileDexContainer, PyInMemoryDexContainer}; +} diff --git a/src/py/editor.rs b/src/py/editor.rs new file mode 100644 index 0000000..14c5f0d --- /dev/null +++ b/src/py/editor.rs @@ -0,0 +1,152 @@ +use pyo3::exceptions::PyIOError; +use pyo3::prelude::*; +use std::path::PathBuf; + +use crate::file::DexEditor; + +/// Python-facing wrapper for `DexEditor`. +/// +/// Construct with `DexEditor.from_file(path)` or `DexEditor.from_bytes(data)`. +#[pyclass(name = "DexEditor", module = "dexrs._internal.editor")] +pub struct PyDexEditor { + // Option so we can move out on build()/write_to() + inner: Option, +} + +impl PyDexEditor { + fn editor_mut(&mut self) -> PyResult<&mut DexEditor> { + self.inner + .as_mut() + .ok_or_else(|| PyIOError::new_err("DexEditor already consumed by build() or write_to()")) + } + + fn take_editor(&mut self) -> PyResult { + self.inner + .take() + .ok_or_else(|| PyIOError::new_err("DexEditor already consumed by build() or write_to()")) + } +} + +#[pymethods] +impl PyDexEditor { + /// Open a DEX file from disk. + /// + /// ```python + /// editor = DexEditor.from_file("classes.dex") + /// ``` + #[staticmethod] + pub fn from_file(path: &str) -> PyResult { + let editor = DexEditor::from_file(PathBuf::from(path).as_ref()) + .map_err(|e| PyIOError::new_err(e.to_string()))?; + Ok(PyDexEditor { inner: Some(editor) }) + } + + /// Construct a `DexEditor` from raw bytes. + /// + /// ```python + /// with open("classes.dex", "rb") as f: + /// editor = DexEditor.from_bytes(f.read()) + /// ``` + #[staticmethod] + pub fn from_bytes(data: &[u8]) -> PyResult { + let editor = DexEditor::from_bytes(data.to_vec()) + .map_err(|e| PyIOError::new_err(e.to_string()))?; + Ok(PyDexEditor { inner: Some(editor) }) + } + + /// Set the access flags on a class definition. + /// + /// `class_desc` accepts dotted (`com.example.Foo`), slash (`com/example/Foo`), + /// or descriptor (`Lcom/example/Foo;`) form. + /// + /// ```python + /// editor.set_class_access_flags("com.example.Foo", 0x0001) # public + /// ``` + pub fn set_class_access_flags(&mut self, class_desc: &str, flags: u32) -> PyResult<()> { + self.editor_mut()? + .set_class_access_flags(class_desc, flags) + .map_err(|e| PyIOError::new_err(e.to_string())) + } + + /// Set the access flags on a specific method inside a class. + /// + /// LEB128 re-encoding is handled automatically when the flag width changes. + /// + /// ```python + /// editor.set_method_access_flags("LMain;", "run", 0x0001) # public + /// ``` + pub fn set_method_access_flags( + &mut self, + class_desc: &str, + method_name: &str, + flags: u32, + ) -> PyResult<()> { + self.editor_mut()? + .set_method_access_flags(class_desc, method_name, flags) + .map_err(|e| PyIOError::new_err(e.to_string())) + } + + /// Zero out the HiddenapiClassData section and remove its map entry. + /// + /// Useful when the modified DEX is loaded by a runtime that rejects + /// hidden-API annotations. + /// + /// ```python + /// editor.clear_hiddenapi_flags() + /// ``` + pub fn clear_hiddenapi_flags(&mut self) -> PyResult<()> { + self.editor_mut()? + .clear_hiddenapi_flags() + .map_err(|e| PyIOError::new_err(e.to_string())) + } + + /// Rename a class, updating the string pool, type references, and checksum. + /// + /// Both `old_name` and `new_name` accept dotted, slash, or descriptor form. + /// + /// ```python + /// editor.rename_class("LMain;", "LRenamedMain;") + /// ``` + pub fn rename_class(&mut self, old_name: &str, new_name: &str) -> PyResult<()> { + self.editor_mut()? + .rename_class(old_name, new_name) + .map_err(|e| PyIOError::new_err(e.to_string())) + } + + /// Finalise edits: recalculate the Adler32 checksum and return the + /// modified DEX as a `bytes` object. + /// + /// The editor is consumed after this call. + /// + /// ```python + /// data = editor.build() + /// with open("out.dex", "wb") as f: + /// f.write(data) + /// ``` + pub fn build(&mut self) -> PyResult> { + let data = self + .take_editor()? + .build() + .map_err(|e| PyIOError::new_err(e.to_string()))?; + Python::with_gil(|py| Ok(pyo3::types::PyBytes::new(py, &data).into())) + } + + /// Finalise edits and write the modified DEX directly to `path`. + /// + /// The editor is consumed after this call. + /// + /// ```python + /// editor.write_to("out.dex") + /// ``` + pub fn write_to(&mut self, path: &str) -> PyResult<()> { + self.take_editor()? + .write_to(PathBuf::from(path).as_ref()) + .map_err(|e| PyIOError::new_err(e.to_string())) + } +} + +#[pyo3::pymodule(name = "editor")] +pub(crate) mod py_editor { + #[pymodule_export] + use super::PyDexEditor; +} diff --git a/src/py/error.rs b/src/py/error.rs new file mode 100644 index 0000000..52685e3 --- /dev/null +++ b/src/py/error.rs @@ -0,0 +1,28 @@ +use pyo3::exceptions::PyException; + +pyo3::create_exception!(dexrs._internal.error, PyDexError, PyException); + +impl From for pyo3::PyErr { + fn from(err: crate::error::DexError) -> pyo3::PyErr { + PyDexError::new_err(err.to_string()) + } +} + +/// Generic errors not wrapped by dexrs (e.g. IO errors from container opening). +#[derive(Debug, thiserror::Error)] +pub enum GenericError { + #[error(transparent)] + IOError(#[from] std::io::Error), +} + +impl From for pyo3::PyErr { + fn from(err: GenericError) -> pyo3::PyErr { + pyo3::exceptions::PyIOError::new_err(err.to_string()) + } +} + +#[pyo3::pymodule(name = "error")] +pub(crate) mod py_error { + #[pymodule_export] + use super::PyDexError as PyDexErrorExport; +} diff --git a/src/py/file.rs b/src/py/file.rs index cba3caf..2654764 100644 --- a/src/py/file.rs +++ b/src/py/file.rs @@ -13,8 +13,56 @@ use crate::file::{ }; use crate::file::class_accessor::PyClassAccessor; +use crate::file::signature::Signature; +use crate::py::structs::PyLocalInfo; + +// --------------------------------------------------------------------------- +// PySignature +// --------------------------------------------------------------------------- + +/// A decoded method signature: `"(param1param2...)return_type"`. +#[pyo3::pyclass(name = "Signature", module = "dexrs._internal.file")] +pub struct PySignature { + inner: String, + num_params: u32, + is_void: bool, +} + +impl From for PySignature { + fn from(s: Signature) -> Self { + PySignature { + is_void: s.is_void(), + num_params: s.num_params(), + inner: s.as_str().to_owned(), + } + } +} + +#[pyo3::pymethods] +impl PySignature { + /// Number of explicit parameters in the signature. + #[getter] + pub fn num_params(&self) -> u32 { + self.num_params + } + + /// `True` if the return type is `void`. + #[getter] + pub fn is_void(&self) -> bool { + self.is_void + } + + pub fn __str__(&self) -> &str { + &self.inner + } + + pub fn __repr__(&self) -> String { + format!("Signature({:?})", self.inner) + } +} #[allow(non_camel_case_types)] +#[allow(clippy::upper_case_acronyms)] #[derive(Clone, Copy, PartialEq, Eq)] #[pyo3::pyclass(name = "VerifyPreset", module = "dexrs._internal.file", eq, eq_int)] pub enum PyVerifyPreset { @@ -23,9 +71,9 @@ pub enum PyVerifyPreset { NONE = 3, } -impl Into for PyVerifyPreset { - fn into(self) -> VerifyPreset { - match self { +impl From for VerifyPreset { + fn from(val: PyVerifyPreset) -> Self { + match val { PyVerifyPreset::ALL => VerifyPreset::All, PyVerifyPreset::CHECKSUM_ONLY => VerifyPreset::ChecksumOnly, PyVerifyPreset::NONE => VerifyPreset::None, @@ -58,6 +106,7 @@ pub struct PyDexFileImpl { macro_rules! bind_dex { ($dex_file:ident, $dex_type:ident, $c:ident, $py:ident) => {{ + #[allow(clippy::missing_transmute_annotations)] let static_dex = unsafe { std::mem::transmute($dex_file) }; let inner = RsDexFile::$dex_type { container: $c.clone_ref($py), @@ -71,68 +120,37 @@ macro_rules! bind_dex { impl PyDexFileImpl {} -macro_rules! dex_container_check { - ($container:ident, $py:ident, $method:expr) => { - if $container.get_refcnt($py) == 0 { - return Err(PyValueError::new_err(concat!( - "Tried to execute DexFile::", - stringify!($method), - " on a dex container that was deleted by Python!" - ))); - } - }; +fn check_container_alive(container: &Py, py: Python) -> PyResult<()> { + if container.get_refcnt(py) == 0 { + return Err(PyValueError::new_err( + "DexFile: the backing container was deleted by Python", + )); + } + Ok(()) } -// REVISIT: this can be reduced -macro_rules! dex_action_impl { - ($this:ident, $method:ident, $py:ident) => {{ - match &$this.inner.as_ref() { - RsDexFile::InMemory { dex, container } => { - dex_container_check!(container, $py, $method); - dex.$method() - } - RsDexFile::File { dex, container } => { - dex_container_check!(container, $py, $method); - dex.$method() - } - } - }}; - ($this:ident, $method:ident($($args:tt)*)?, $py:ident) => {{ - match &$this.inner.as_ref() { - RsDexFile::InMemory { dex, container } => { - dex_container_check!(container, $py, $method); - dex.$method($($args)*)? - } - RsDexFile::File { dex, container } => { - dex_container_check!(container, $py, $method); - dex.$method($($args)*)? - } - } - }}; - ($this:ident, $method:ident, $arg:expr, $py:ident) => {{ - match &$this.inner.as_ref() { - RsDexFile::InMemory { dex, container } => { - dex_container_check!(container, $py, $method); - dex.$method($arg) +/// Dispatch a closure over the inner `DexFile`, checking that the Python +/// container is still alive first. The closure receives a reference to the +/// concrete `DexFile<'_, C>` and should return `PyResult`. +macro_rules! with_dex { + ($this:ident, $py:ident, |$dex:ident| $body:expr) => { + match $this.inner.as_ref() { + RsDexFile::InMemory { + dex: $dex, + container, + } => { + check_container_alive(container, $py)?; + $body } - RsDexFile::File { dex, container } => { - dex_container_check!(container, $py, $method); - dex.$method($arg) + RsDexFile::File { + dex: $dex, + container, + } => { + check_container_alive(container, $py)?; + $body } } - }}; - ($this:ident, unsafe { $method:ident }, $arg:expr, $py:ident) => {{ - match &$this.inner.as_ref() { - RsDexFile::InMemory { dex, container } => { - dex_container_check!(container, $py, $method); - unsafe { dex.$method($arg)? } - } - RsDexFile::File { dex, container } => { - dex_container_check!(container, $py, $method); - unsafe { dex.$method($arg)? } - } - } - }}; + }; } #[pyo3::pymethods] @@ -173,7 +191,17 @@ impl PyDexFileImpl { } pub fn get_header<'py>(&self, py: Python<'py>) -> PyResult { - Ok(dex_action_impl!(self, get_header, py).into()) + Ok(with_dex!(self, py, |dex| dex.get_header().into())) + } + + /// Returns `True` for CompactDex (`cdex` magic) files. + pub fn is_compact_dex<'py>(&self, py: Python<'py>) -> PyResult { + Ok(with_dex!(self, py, |dex| dex.is_compact_dex())) + } + + /// Returns `True` for standard DEX (`dex\n` magic) files. + pub fn is_standard_dex<'py>(&self, py: Python<'py>) -> PyResult { + Ok(with_dex!(self, py, |dex| dex.is_standard_dex())) } // ---------------------------------------------------------------------------- @@ -184,7 +212,7 @@ impl PyDexFileImpl { py: Python<'py>, index: StringIndex, ) -> PyResult { - Ok(dex_action_impl!(self, get_string_id(index)?, py).into()) + Ok(with_dex!(self, py, |dex| dex.get_string_id(index)?.into())) } pub fn get_string_id_opt<'py>( @@ -192,18 +220,18 @@ impl PyDexFileImpl { py: Python<'py>, index: StringIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_string_id_opt(index)?, py).map(Into::into)) + Ok(with_dex!(self, py, |dex| Ok::<_, crate::error::DexError>(dex.get_string_id_opt(index)?.map(Into::into)))?) } pub fn num_string_ids<'py>(&self, py: Python<'py>) -> PyResult { - Ok(dex_action_impl!(self, num_string_ids, py)) + Ok(with_dex!(self, py, |dex| dex.num_string_ids())) } // ---------------------------------------------------------------------------- // Type Ids // ---------------------------------------------------------------------------- pub fn get_type_id<'py>(&self, py: Python<'py>, index: TypeIndex) -> PyResult { - Ok(dex_action_impl!(self, get_type_id(index)?, py).into()) + Ok(with_dex!(self, py, |dex| dex.get_type_id(index)?.into())) } pub fn get_type_id_opt<'py>( @@ -211,11 +239,11 @@ impl PyDexFileImpl { py: Python<'py>, index: TypeIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_type_id_opt(index)?, py).map(Into::into)) + Ok(with_dex!(self, py, |dex| Ok::<_, crate::error::DexError>(dex.get_type_id_opt(index)?.map(Into::into)))?) } pub fn num_type_ids<'py>(&self, py: Python<'py>) -> PyResult { - Ok(dex_action_impl!(self, num_type_ids, py)) + Ok(with_dex!(self, py, |dex| dex.num_type_ids())) } pub fn get_type_desc<'py>( @@ -224,27 +252,27 @@ impl PyDexFileImpl { type_id: Py, ) -> PyResult { let rs_type_id = &type_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_type_desc_utf16(rs_type_id)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_type_desc_utf16(rs_type_id)?)) } pub fn get_type_desc_at<'py>(&self, py: Python<'py>, index: TypeIndex) -> PyResult { - Ok(dex_action_impl!(self, get_type_desc_utf16_at(index)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_type_desc_utf16_at(index)?)) } pub fn pretty_type_at<'py>(&self, py: Python<'py>, index: TypeIndex) -> PyResult { - Ok(dex_action_impl!(self, pretty_type_at, index, py)) + Ok(with_dex!(self, py, |dex| dex.pretty_type_at(index))) } pub fn pretty_type<'py>(&self, py: Python<'py>, type_id: Py) -> PyResult { let rs_type_id = &type_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, pretty_type, rs_type_id, py)) + Ok(with_dex!(self, py, |dex| dex.pretty_type(rs_type_id))) } // ---------------------------------------------------------------------------- // Field Ids // ---------------------------------------------------------------------------- pub fn get_field_id<'py>(&self, py: Python<'py>, index: FieldIndex) -> PyResult { - Ok(dex_action_impl!(self, get_field_id(index)?, py).into()) + Ok(with_dex!(self, py, |dex| dex.get_field_id(index)?.into())) } pub fn get_field_id_opt<'py>( @@ -252,11 +280,11 @@ impl PyDexFileImpl { py: Python<'py>, index: FieldIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_field_id_opt(index)?, py).map(Into::into)) + Ok(with_dex!(self, py, |dex| Ok::<_, crate::error::DexError>(dex.get_field_id_opt(index)?.map(Into::into)))?) } pub fn num_field_ids<'py>(&self, py: Python<'py>) -> PyResult { - Ok(dex_action_impl!(self, num_field_ids, py)) + Ok(with_dex!(self, py, |dex| dex.num_field_ids())) } pub fn get_field_name<'py>( @@ -265,18 +293,18 @@ impl PyDexFileImpl { field_id: Py, ) -> PyResult { let rs_field_id = &field_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_field_name(rs_field_id)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_field_name(rs_field_id)?)) } pub fn get_field_name_at<'py>(&self, py: Python<'py>, index: FieldIndex) -> PyResult { - Ok(dex_action_impl!(self, get_field_name_at(index)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_field_name_at(index)?)) } // ---------------------------------------------------------------------------- // Proto Ids // ---------------------------------------------------------------------------- pub fn get_proto_id<'py>(&self, py: Python<'py>, index: ProtoIndex) -> PyResult { - Ok(dex_action_impl!(self, get_proto_id(index)?, py).into()) + Ok(with_dex!(self, py, |dex| dex.get_proto_id(index)?.into())) } pub fn get_proto_id_opt<'py>( @@ -284,27 +312,27 @@ impl PyDexFileImpl { py: Python<'py>, index: ProtoIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_proto_id_opt(index)?, py).map(Into::into)) + Ok(with_dex!(self, py, |dex| Ok::<_, crate::error::DexError>(dex.get_proto_id_opt(index)?.map(Into::into)))?) } pub fn num_proto_ids<'py>(&self, py: Python<'py>) -> PyResult { - Ok(dex_action_impl!(self, num_proto_ids, py)) + Ok(with_dex!(self, py, |dex| dex.num_proto_ids())) } pub fn get_shorty<'py>(&self, py: Python<'py>, proto_id: Py) -> PyResult { let rs_proto_id = &proto_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_shorty(rs_proto_id)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_shorty(rs_proto_id)?)) } pub fn get_shorty_at<'py>(&self, py: Python<'py>, index: ProtoIndex) -> PyResult { - Ok(dex_action_impl!(self, get_shorty_at(index)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_shorty_at(index)?)) } // ---------------------------------------------------------------------------- // method ids // ---------------------------------------------------------------------------- pub fn get_method_id<'py>(&self, py: Python<'py>, index: u32) -> PyResult { - Ok(dex_action_impl!(self, get_method_id(index)?, py).into()) + Ok(with_dex!(self, py, |dex| dex.get_method_id(index)?.into())) } pub fn get_method_id_opt<'py>( @@ -312,18 +340,18 @@ impl PyDexFileImpl { py: Python<'py>, index: u32, ) -> PyResult> { - Ok(dex_action_impl!(self, get_method_id_opt(index)?, py).map(Into::into)) + Ok(with_dex!(self, py, |dex| Ok::<_, crate::error::DexError>(dex.get_method_id_opt(index)?.map(Into::into)))?) } pub fn num_method_ids<'py>(&self, py: Python<'py>) -> PyResult { - Ok(dex_action_impl!(self, num_method_ids, py)) + Ok(with_dex!(self, py, |dex| dex.num_method_ids())) } //------------------------------------------------------------------------------ // ClassDefs //------------------------------------------------------------------------------ pub fn get_class_def<'py>(&self, py: Python<'py>, index: u32) -> PyResult { - Ok(dex_action_impl!(self, get_class_def(index)?, py).into()) + Ok(with_dex!(self, py, |dex| dex.get_class_def(index)?.into())) } pub fn get_class_def_opt<'py>( @@ -331,11 +359,11 @@ impl PyDexFileImpl { py: Python<'py>, index: u32, ) -> PyResult> { - Ok(dex_action_impl!(self, get_class_def_opt(index)?, py).map(Into::into)) + Ok(with_dex!(self, py, |dex| Ok::<_, crate::error::DexError>(dex.get_class_def_opt(index)?.map(Into::into)))?) } pub fn num_class_defs<'py>(&self, py: Python<'py>) -> PyResult { - Ok(dex_action_impl!(self, num_class_defs, py)) + Ok(with_dex!(self, py, |dex| dex.num_class_defs())) } pub fn get_class_desc<'py>( @@ -344,11 +372,7 @@ impl PyDexFileImpl { class_def: Py, ) -> PyResult { let rs_class_def = &class_def.try_borrow(py)?.0; - Ok(dex_action_impl!( - self, - get_class_desc_utf16(rs_class_def)?, - py - )) + Ok(with_dex!(self, py, |dex| dex.get_class_desc_utf16(rs_class_def)?)) } pub fn get_interfaces_list<'py>( @@ -357,10 +381,9 @@ impl PyDexFileImpl { class_def: Py, ) -> PyResult>> { let rs_class_def = &class_def.try_borrow(py)?.0; - Ok( - dex_action_impl!(self, get_interfaces_list(rs_class_def)?, py) - .map(|x| x.iter().map(Into::into).collect()), - ) + Ok(with_dex!(self, py, |dex| dex + .get_interfaces_list(rs_class_def)? + .map(|x| x.iter().map(Into::into).collect()))) } // ---------------------------------------------------------------------------- @@ -372,7 +395,7 @@ impl PyDexFileImpl { class_def: Py, ) -> PyResult> { let rs_class_def = &class_def.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_class_accessor(rs_class_def)?, py).map(Into::into)) + Ok(with_dex!(self, py, |dex| dex.get_class_accessor(rs_class_def)?.map(Into::into))) } // ---------------------------------------------------------------------------- @@ -383,7 +406,7 @@ impl PyDexFileImpl { py: Python<'py>, code_offset: u32, ) -> PyResult { - Ok(dex_action_impl!(self, get_code_item_accessor(code_offset)?, py).into()) + Ok(with_dex!(self, py, |dex| dex.get_code_item_accessor(code_offset)?.into())) } //------------------------------------------------------------------------------ @@ -395,12 +418,11 @@ impl PyDexFileImpl { ca: Py, ) -> PyResult> { let code_item_accessor = &ca.try_borrow(py)?.inner.0; - Ok( - dex_action_impl!(self, get_try_items(code_item_accessor)?, py) - .into_iter() - .map(Into::into) - .collect::>(), - ) + Ok(with_dex!(self, py, |dex| dex + .get_try_items(code_item_accessor)? + .iter() + .map(Into::into) + .collect::>())) } //------------------------------------------------------------------------------ @@ -414,11 +436,7 @@ impl PyDexFileImpl { ) -> PyResult> { let code_item_accessor = &ca.try_borrow(py)?.inner.0; let rs_try_item = &try_item.try_borrow(py)?.0; - let iterator = dex_action_impl!( - self, - iter_catch_handlers(code_item_accessor, rs_try_item)?, - py - ); + let iterator = with_dex!(self, py, |dex| dex.iter_catch_handlers(code_item_accessor, rs_try_item)?); match iterator { None => Ok(vec![]), Some(iterator) => Ok(iterator @@ -436,7 +454,7 @@ impl PyDexFileImpl { py: Python<'py>, offset: u32, ) -> PyResult> { - Ok(dex_action_impl!(self, get_annotation_set(offset)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_annotation_set(offset)?)) } pub fn get_annotation<'py>( @@ -444,7 +462,7 @@ impl PyDexFileImpl { py: Python<'py>, offset: u32, ) -> PyResult { - Ok(dex_action_impl!(self, get_annotation(offset)?, py).into()) + Ok(with_dex!(self, py, |dex| dex.get_annotation(offset)?.into())) } pub fn get_class_annotation_accessor<'py>( @@ -453,19 +471,14 @@ impl PyDexFileImpl { class_def: Py, ) -> PyResult { let rs_class_def = &class_def.try_borrow(py)?.0; - Ok(dex_action_impl!( - self, - get_class_annotation_accessor(rs_class_def.annotations_off)?, - py - ) - .into()) + Ok(with_dex!(self, py, |dex| dex.get_class_annotation_accessor(rs_class_def.annotations_off)?.into())) } // ---------------------------------------------------------------------------- // string data // ---------------------------------------------------------------------------- pub fn get_utf16_at<'py>(&self, py: Python<'py>, index: StringIndex) -> PyResult { - Ok(dex_action_impl!(self, get_utf16_str_at(index)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_str_at(index)?)) } pub fn get_utf16<'py>( @@ -474,7 +487,7 @@ impl PyDexFileImpl { py_string_id: Py, ) -> PyResult { let string_id = &py_string_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_utf16_str(string_id)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_str(string_id)?)) } pub fn get_utf16_opt_at<'py>( @@ -482,11 +495,11 @@ impl PyDexFileImpl { py: Python<'py>, index: StringIndex, ) -> PyResult> { - Ok(dex_action_impl!(self, get_utf16_str_opt_at(index)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_str_opt_at(index)?)) } pub fn get_utf16_lossy_at<'py>(&self, py: Python<'py>, index: StringIndex) -> PyResult { - Ok(dex_action_impl!(self, get_utf16_str_lossy_at(index)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_str_lossy_at(index)?)) } pub fn get_utf16_lossy<'py>( @@ -495,7 +508,7 @@ impl PyDexFileImpl { py_string_id: Py, ) -> PyResult { let string_id = &py_string_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_utf16_str_lossy(string_id)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_str_lossy(string_id)?)) } pub fn get_string_data<'py>( @@ -504,7 +517,7 @@ impl PyDexFileImpl { py_string_id: Py, ) -> PyResult<(u32, &'py [u8])> { let string_id = &py_string_id.try_borrow(py)?.0; - Ok(dex_action_impl!(self, get_string_data(string_id)?, py)) + Ok(with_dex!(self, py, |dex| dex.get_string_data(string_id)?)) } // unsafe string API @@ -514,21 +527,109 @@ impl PyDexFileImpl { py_string_id: Py, ) -> PyResult { let string_id = &py_string_id.try_borrow(py)?.0; - Ok(dex_action_impl!( - self, - unsafe { fast_get_utf8_str }, - &string_id, - py - )) + // SAFETY: caller accepts that invalid MUTF-8 may produce garbage output + Ok(with_dex!(self, py, |dex| unsafe { dex.fast_get_utf8_str(string_id)? })) } pub fn fast_get_utf8_at<'py>(&self, py: Python<'py>, index: StringIndex) -> PyResult { - Ok(dex_action_impl!( - self, - unsafe { fast_get_utf8_str_at }, - index, - py - )) + // SAFETY: caller accepts that invalid MUTF-8 may produce garbage output + Ok(with_dex!(self, py, |dex| unsafe { dex.fast_get_utf8_str_at(index)? })) + } + + // ---------------------------------------------------------------------------- + // ART-parity additions + // ---------------------------------------------------------------------------- + + /// Returns the Java-visible access flags for a class def (lower 16 bits). + pub fn get_java_access_flags<'py>( + &self, + py: Python<'py>, + class_def: Py, + ) -> PyResult { + let cd = class_def.try_borrow(py)?; + Ok(cd.0.access_flags & 0xFFFF) + } + + /// Returns the parameter type list for a proto_id, or `None` if it has no parameters. + pub fn get_proto_parameters<'py>( + &self, + py: Python<'py>, + proto_id: Py, + ) -> PyResult>> { + let pid = proto_id.try_borrow(py)?; + Ok(with_dex!(self, py, |dex| { + dex.get_proto_parameters(&pid.0)? + .map(|tl| tl.iter().map(Into::into).collect()) + })) + } + + /// Returns the method signature as a rich `Signature` object. + pub fn get_method_signature<'py>( + &self, + py: Python<'py>, + method_idx: u32, + ) -> PyResult { + Ok(with_dex!(self, py, |dex| { + dex.get_method_signature(method_idx)?.into() + })) + } + + /// Builds a `TypeLookupTable` for fast O(1) class lookup by type descriptor. + pub fn build_type_lookup_table<'py>( + &self, + py: Python<'py>, + ) -> PyResult { + Ok(with_dex!(self, py, |dex| { + crate::py::type_lookup_table::PyTypeLookupTable(dex.build_type_lookup_table()) + })) + } + + /// Decodes the hidden-API flags stream for a given class. + /// + /// `class_def_idx` identifies the class; `count` is the total number of + /// fields + methods (determines how many ULEB128 values to decode). + pub fn get_hiddenapi_class_flags<'py>( + &self, + py: Python<'py>, + class_def_idx: u32, + count: usize, + ) -> PyResult>> { + Ok(with_dex!(self, py, |dex| { + dex.get_hiddenapi_class_flags(class_def_idx, count) + })) + } + + /// Returns the source line number for the given DEX program counter. + /// + /// `debug_info_off` is the offset from `CodeItem::debug_info_off`. + /// Returns `None` if no position entry covers `dex_pc`. + pub fn get_line_for_pc<'py>( + &self, + py: Python<'py>, + debug_info_off: u32, + dex_pc: u32, + ) -> PyResult> { + Ok(with_dex!(self, py, |dex| { + dex.get_debug_info_accessor(debug_info_off)?.get_line_for_pc(dex_pc)? + })) + } + + /// Decodes the local variable table from a debug info stream. + /// + /// Returns a list of `LocalInfo` entries (including still-live locals at + /// end of method). `num_regs` should be `CodeItem.registers_size`. + pub fn decode_local_info<'py>( + &self, + py: Python<'py>, + debug_info_off: u32, + num_regs: u16, + ) -> PyResult> { + Ok(with_dex!(self, py, |dex| { + let accessor = dex.get_debug_info_accessor(debug_info_off)?; + let mut locals = Vec::new(); + accessor.decode_local_info(num_regs, |li| locals.push(PyLocalInfo::from(li)))?; + locals + })) } } @@ -537,5 +638,5 @@ impl PyDexFileImpl { pub(crate) mod py_file { #[pymodule_export] - use super::{PyDexFileImpl, PyVerifyPreset}; + use super::{PyDexFileImpl, PySignature, PyVerifyPreset}; } diff --git a/src/py/leb128.rs b/src/py/leb128.rs new file mode 100644 index 0000000..62b0e2d --- /dev/null +++ b/src/py/leb128.rs @@ -0,0 +1,19 @@ +#[pyo3::pymodule(name = "leb128")] +pub(crate) mod py_leb128 { + use pyo3::PyResult; + + #[pyo3::pyfunction] + pub fn decode_uleb128(data_in: &[u8]) -> PyResult<(u32, usize)> { + Ok(crate::leb128::decode_leb128::(data_in)?) + } + + #[pyo3::pyfunction] + pub fn decode_sleb128(data_in: &[u8]) -> PyResult { + Ok(crate::leb128::decode_sleb128(data_in, &mut 0)?) + } + + #[pyo3::pyfunction] + pub fn decode_leb128p1(data_in: &[u8]) -> PyResult<(i32, usize)> { + Ok(crate::leb128::decode_leb128p1(data_in)?) + } +} diff --git a/src/py/primitive.rs b/src/py/primitive.rs new file mode 100644 index 0000000..01572f9 --- /dev/null +++ b/src/py/primitive.rs @@ -0,0 +1,105 @@ +use crate::primitive::PrimitiveType; + +#[allow(non_camel_case_types)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[pyo3::pyclass( + name = "PrimitiveType", + module = "dexrs._internal.primitive", + eq, + eq_int +)] +pub enum PyPrimitiveType { + Not = 0, + Boolean = 1, + Byte = 2, + Char = 3, + Short = 4, + Int = 5, + Long = 6, + Float = 7, + Double = 8, + Void = 9, +} + +impl From for PyPrimitiveType { + fn from(p: PrimitiveType) -> Self { + match p { + PrimitiveType::Not => PyPrimitiveType::Not, + PrimitiveType::Boolean => PyPrimitiveType::Boolean, + PrimitiveType::Byte => PyPrimitiveType::Byte, + PrimitiveType::Char => PyPrimitiveType::Char, + PrimitiveType::Short => PyPrimitiveType::Short, + PrimitiveType::Int => PyPrimitiveType::Int, + PrimitiveType::Long => PyPrimitiveType::Long, + PrimitiveType::Float => PyPrimitiveType::Float, + PrimitiveType::Double => PyPrimitiveType::Double, + PrimitiveType::Void => PyPrimitiveType::Void, + } + } +} + +impl From for PrimitiveType { + fn from(p: PyPrimitiveType) -> Self { + match p { + PyPrimitiveType::Not => PrimitiveType::Not, + PyPrimitiveType::Boolean => PrimitiveType::Boolean, + PyPrimitiveType::Byte => PrimitiveType::Byte, + PyPrimitiveType::Char => PrimitiveType::Char, + PyPrimitiveType::Short => PrimitiveType::Short, + PyPrimitiveType::Int => PrimitiveType::Int, + PyPrimitiveType::Long => PrimitiveType::Long, + PyPrimitiveType::Float => PrimitiveType::Float, + PyPrimitiveType::Double => PrimitiveType::Double, + PyPrimitiveType::Void => PrimitiveType::Void, + } + } +} + +#[pyo3::pymethods] +impl PyPrimitiveType { + /// Returns the single-char DEX type descriptor, or `None` for `Not`. + pub fn descriptor(&self) -> Option<&'static str> { + PrimitiveType::from(*self).descriptor() + } + + /// Returns the boxed class descriptor, or `None` for `Not`. + pub fn boxed_descriptor(&self) -> Option<&'static str> { + PrimitiveType::from(*self).boxed_descriptor() + } + + /// Returns the storage size in bytes. + pub fn component_size(&self) -> usize { + PrimitiveType::from(*self).component_size() + } + + /// Returns `True` for numeric primitive types. + pub fn is_numeric(&self) -> bool { + PrimitiveType::from(*self).is_numeric() + } + + /// Returns `True` for 64-bit types (long or double). + pub fn is_64bit(&self) -> bool { + PrimitiveType::from(*self).is_64bit() + } + + /// Returns the human-readable Java type name. + pub fn pretty_name(&self) -> &'static str { + PrimitiveType::from(*self).pretty_name() + } + + pub fn __str__(&self) -> &'static str { + PrimitiveType::from(*self).pretty_name() + } + + /// Creates a `PrimitiveType` from a JVM descriptor character. + #[staticmethod] + pub fn from_char(c: char) -> PyPrimitiveType { + PrimitiveType::from_char(c).into() + } +} + +#[pyo3::pymodule(name = "primitive")] +pub(crate) mod py_primitive { + #[pymodule_export] + use super::PyPrimitiveType; +} diff --git a/src/py/structs.rs b/src/py/structs.rs new file mode 100644 index 0000000..30552f3 --- /dev/null +++ b/src/py/structs.rs @@ -0,0 +1,63 @@ +use crate::file::debug::LocalInfo; + +/// Python representation of a local variable decoded from a debug info stream. +#[pyo3::pyclass(name = "LocalInfo", module = "dexrs._internal.structs")] +pub struct PyLocalInfo { + #[pyo3(get)] + pub name_idx: Option, + #[pyo3(get)] + pub descriptor_idx: Option, + #[pyo3(get)] + pub signature_idx: Option, + #[pyo3(get)] + pub start_address: u32, + #[pyo3(get)] + pub end_address: u32, + #[pyo3(get)] + pub reg: u16, + #[pyo3(get)] + pub is_live: bool, +} + +impl From<&LocalInfo> for PyLocalInfo { + fn from(li: &LocalInfo) -> Self { + PyLocalInfo { + name_idx: li.name_idx, + descriptor_idx: li.descriptor_idx, + signature_idx: li.signature_idx, + start_address: li.start_address, + end_address: li.end_address, + reg: li.reg, + is_live: li.is_live, + } + } +} + +#[pyo3::pymethods] +impl PyLocalInfo { + pub fn __repr__(&self) -> String { + format!( + "LocalInfo(reg={}, range={}..{}, name={:?})", + self.reg, self.start_address, self.end_address, self.name_idx + ) + } +} + +#[pyo3::pymodule(name = "structs")] +pub(crate) mod py_structs { + #[pymodule_export] + use crate::file::structs::{ + PyDexAnnotationElement, PyDexAnnotationItem, PyDexAnnotationsDirectoryItem, + PyDexCallSiteIdItem, PyDexCatchHandlerData, PyDexClassDef, PyDexCodeItem, + PyDexEncodedAnnotation, PyDexEncodedValue, PyDexFieldAnnotationsItem, PyDexFieldId, + PyDexMethodAnnotationsItem, PyDexMethodHandleItem, PyDexMethodId, + PyDexParameterAnnotationsItem, PyDexProtoId, PyDexStringId, PyDexTryItem, PyDexTypeId, + PyDexTypeItem, + }; + + #[pymodule_export] + use crate::file::header::PyDexHeader; + + #[pymodule_export] + use super::PyLocalInfo; +} diff --git a/src/py/type_lookup_table.rs b/src/py/type_lookup_table.rs new file mode 100644 index 0000000..d99abf9 --- /dev/null +++ b/src/py/type_lookup_table.rs @@ -0,0 +1,35 @@ +use crate::file::TypeLookupTable; + +/// Fast O(1) class lookup table. Build via `DexFile.build_type_lookup_table()`. +#[pyo3::pyclass(name = "TypeLookupTable", module = "dexrs._internal.type_lookup_table")] +pub struct PyTypeLookupTable(pub(crate) TypeLookupTable); + +#[pyo3::pymethods] +impl PyTypeLookupTable { + /// Returns the `class_def_idx` for `descriptor`, or `None` if not found. + /// + /// `descriptor` must be in DEX format, e.g. `"Ljava/lang/String;"`. + pub fn lookup(&self, descriptor: &str) -> Option { + self.0.lookup(descriptor) + } + + /// Returns the number of classes in the table. + pub fn __len__(&self) -> usize { + self.0.len() + } + + /// Returns `True` if `descriptor` is in the table. + pub fn __contains__(&self, descriptor: &str) -> bool { + self.0.lookup(descriptor).is_some() + } + + pub fn __repr__(&self) -> String { + format!("TypeLookupTable({} classes)", self.0.len()) + } +} + +#[pyo3::pymodule(name = "type_lookup_table")] +pub(crate) mod py_type_lookup_table { + #[pymodule_export] + use super::PyTypeLookupTable; +} diff --git a/src/py/utf.rs b/src/py/utf.rs new file mode 100644 index 0000000..2c68478 --- /dev/null +++ b/src/py/utf.rs @@ -0,0 +1,33 @@ +#[pyo3::pymodule(name = "mutf8")] +pub(crate) mod py_utf { + use crate::error::DexError; + use pyo3::PyResult; + + #[pyo3::pyfunction] + pub fn mutf8_to_str(utf8_data_in: &[u8]) -> PyResult { + if let Some(end) = utf8_data_in.iter().position(|&x| x == 0) { + Ok(crate::utf::mutf8_to_str(&utf8_data_in[0..=end])?) + } else { + Err(DexError::BadStringDataMissingNullByte(utf8_data_in.as_ptr() as usize).into()) + } + } + + #[pyo3::pyfunction] + pub fn mutf8_to_str_lossy(utf8_data_in: &[u8]) -> PyResult { + if let Some(end) = utf8_data_in.iter().position(|&x| x == 0) { + Ok(crate::utf::mutf8_to_str_lossy(&utf8_data_in[0..=end])?) + } else { + Err(DexError::BadStringDataMissingNullByte(utf8_data_in.as_ptr() as usize).into()) + } + } + + #[pyo3::pyfunction] + pub fn str_to_mutf8(str_data_in: &str) -> Vec { + crate::utf::str_to_mutf8(str_data_in) + } + + #[pyo3::pyfunction] + pub fn str_to_mutf8_lossy(str_data_in: &str) -> Vec { + crate::utf::str_to_mutf8_lossy(str_data_in) + } +} diff --git a/src/utf.rs b/src/utf.rs index b7a296c..ecd9fe4 100644 --- a/src/utf.rs +++ b/src/utf.rs @@ -24,44 +24,6 @@ pub fn str_to_mutf8_lossy(str_data_in: &str) -> Vec { utf16_to_mutf8(&utf16_data_in, &options) } -// python exports -#[cfg(feature = "python")] -#[pyo3::pymodule(name = "mutf8")] -pub(crate) mod py_utf { - - use crate::error::DexError; - use pyo3::PyResult; - - #[pyo3::pyfunction] - pub fn mutf8_to_str(utf8_data_in: &[u8]) -> PyResult { - if let Some(end) = utf8_data_in.iter().position(|&x| x == 0) { - Ok(super::mutf8_to_str(&utf8_data_in[0..=end])?) - } else { - Err(DexError::BadStringDataMissingNullByte(utf8_data_in.as_ptr() as usize).into()) - } - } - - #[pyo3::pyfunction] - pub fn mutf8_to_str_lossy(utf8_data_in: &[u8]) -> PyResult { - if let Some(end) = utf8_data_in.iter().position(|&x| x == 0) { - Ok(super::mutf8_to_str_lossy(&utf8_data_in[0..=end])?) - } else { - Err(DexError::BadStringDataMissingNullByte(utf8_data_in.as_ptr() as usize).into()) - } - } - - #[pyo3::pyfunction] - pub fn str_to_mutf8(str_data_in: &str) -> Vec { - super::str_to_mutf8(str_data_in) - } - - #[pyo3::pyfunction] - pub fn str_to_mutf8_lossy(str_data_in: &str) -> Vec { - super::str_to_mutf8_lossy(str_data_in) - } -} -// end python exports - #[inline] fn utf16_from_utf8(utf8_data_in: &[u8], offset: &mut usize) -> u32 { let one = utf8_data_in[*offset]; @@ -101,7 +63,7 @@ fn trailing_utf16_char(maybe_pair: u32) -> u16 { #[inline(always)] fn leading_utf16_char(maybe_pair: u32) -> u16 { - (maybe_pair & 0x0000FFFFF) as u16 + (maybe_pair & 0x0000FFFF) as u16 } #[inline(always)] @@ -138,12 +100,12 @@ pub fn mutf8_len(utf8_data_in: &[u8], utf8_in_len: usize) -> Result { in_idx += 1; len += 1; if ic & 0x80 == 0 { - continue; // one byze encoding + continue; // one byte encoding } in_idx += 1; if ic & 0x20 == 0 { - // two byze encoding + // two byte encoding continue; } @@ -200,7 +162,7 @@ fn convert_mutf8_to_utf16(utf8_data_in: &[u8], utf8_in_len: usize, out_chars: us } fn utf16_to_mutf8(utf16_in: &[u16], options: &Options) -> Vec { - let mut mutf8_len = 0; // trailing null byte + let mut mutf8_len = 0; convert_utf16_to_mutf8(utf16_in, options, |_| mutf8_len += 1); let mut mutf8_out; @@ -208,7 +170,7 @@ fn utf16_to_mutf8(utf16_in: &[u16], options: &Options) -> Vec { // only ascii chars mutf8_out = utf16_in.iter().map(|ch| *ch as u8).collect(); } else { - mutf8_out = vec![0x00; mutf8_len + 1]; + mutf8_out = Vec::with_capacity(mutf8_len + 1); convert_utf16_to_mutf8(utf16_in, options, |ch| mutf8_out.push(ch)); } diff --git a/src/vdex/mod.rs b/src/vdex/mod.rs new file mode 100644 index 0000000..2f1c501 --- /dev/null +++ b/src/vdex/mod.rs @@ -0,0 +1,695 @@ +//! VDEX file parsing. +//! +//! VDEX files are produced by dex2oat and contain: +//! - Checksums of the embedded DEX files for integrity verification. +//! - Optionally, the raw DEX files themselves (kDexFileSection). +//! - Verifier dependency data (kVerifierDepsSection). +//! - Type lookup tables (kTypeLookupTableSection). +//! +//! This module closely follows the layout defined in +//! `art/runtime/vdex_file.h` from the Android Open Source Project. +//! +//! # Container pattern +//! +//! [`VdexFile`] is generic over `C: DexContainer<'a>`, mirroring [`DexFile`]. +//! Common aliases: +//! ```ignore +//! type InMemoryVdexFile<'a> = VdexFile<'a, &'a [u8]>; +//! type MmapVdexFile<'a> = VdexFile<'a, Mmap>; +//! ``` + +use memmap2::MmapAsRawDesc; +use plain::Plain; + +use crate::{ + error::DexError, + file::{DexContainer, Header}, + Result, +}; + +// -- Constants ----------------------------------------------------------------- + +/// Magic bytes at the start of every VDEX file. +pub const VDEX_MAGIC: &[u8; 4] = b"vdex"; + +/// The only VDEX format version currently supported. +/// +/// Matches `kVdexVersion` in `art/runtime/vdex_file.h`. +pub const VDEX_VERSION: &[u8; 4] = b"027\0"; + +/// Total number of sections in a VDEX file. +pub const VDEX_NUM_SECTIONS: usize = 4; + +// -- Section kind -------------------------------------------------------------- + +/// Identifies a section within the VDEX file. +/// +/// The numeric values must match the `VdexSection` enum in ART. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u32)] +pub enum VdexSection { + /// Adler-32 checksums of the embedded DEX files (one `u32` per file). + Checksum = 0, + /// Concatenated DEX file bytes (optional; absent in metadata-only VDEXes). + DexFile = 1, + /// Encoded verifier-dependency data. + VerifierDeps = 2, + /// Type lookup tables for fast class resolution. + TypeLookupTable = 3, +} + +impl VdexSection { + fn name(self) -> &'static str { + match self { + VdexSection::Checksum => "checksum", + VdexSection::DexFile => "dex_file", + VdexSection::VerifierDeps => "verifier_deps", + VdexSection::TypeLookupTable => "type_lookup_table", + } + } +} + +// -- On-disk structures -------------------------------------------------------- + +/// Fixed-size file header at offset 0. +/// +/// Layout (12 bytes): +/// ```text +/// magic_[4] = b"vdex" +/// vdex_version_[4] = b"027\0" +/// number_of_sections_ = 4 (u32 LE) +/// ``` +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct VdexFileHeader { + pub magic: [u8; 4], + pub vdex_version: [u8; 4], + pub number_of_sections: u32, +} + +// SAFETY: VdexFileHeader is a flat C struct with no padding or interior +// mutability, and can be safely reinterpreted from aligned byte sequences. +unsafe impl Plain for VdexFileHeader {} + +impl VdexFileHeader { + /// Returns `true` when the magic bytes are `"vdex"`. + #[inline] + pub fn is_magic_valid(&self) -> bool { + &self.magic == VDEX_MAGIC + } + + /// Returns `true` when the version string matches the supported version. + #[inline] + pub fn is_version_valid(&self) -> bool { + &self.vdex_version == VDEX_VERSION + } +} + +/// Per-section descriptor stored immediately after [`VdexFileHeader`]. +/// +/// Layout (12 bytes): +/// ```text +/// section_kind u32 LE — VdexSection discriminant +/// section_offset u32 LE — byte offset from start of file +/// section_size u32 LE — byte length of section (0 = absent) +/// ``` +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct VdexSectionHeader { + pub section_kind: u32, + pub section_offset: u32, + pub section_size: u32, +} + +// SAFETY: Same reasoning as VdexFileHeader. +unsafe impl Plain for VdexSectionHeader {} + +// -- VdexFile ------------------------------------------------------------------ + +// -- Type aliases -------------------------------------------------------------- + +pub type InMemoryVdexFile<'a> = VdexFile<'a, &'a [u8]>; +pub type MmapVdexFile<'a> = VdexFile<'a, memmap2::Mmap>; + +// -- VdexFile ------------------------------------------------------------------ + +/// Parsed view of a VDEX file backed by a [`DexContainer`]. +/// +/// The type parameter `C` is the backing store (mmap, `&[u8]`, `Vec`][crate::file::DexFile] design. +/// The lifetime `'a` ties all data references to the container. +/// +/// # Construction +/// +/// ```ignore +/// // From an in-memory byte slice: +/// let vdex = VdexFile::from_raw_parts(&data, ())?; +/// +/// // From a memory-mapped file: +/// let mmap = unsafe { Mmap::map(&file)? }; +/// let vdex = VdexFile::from_raw_parts(&mmap, ())?; +/// ``` +pub struct VdexFile<'a, C: DexContainer<'a> = memmap2::Mmap> { + /// Reference to the backing container — same field name and semantics as + /// `DexFile::mmap`. + pub(crate) mmap: &'a C, + /// Owned copy of the file-level header, read via `plain::copy_from_bytes` + /// so that the container does not need to be aligned. + header: VdexFileHeader, + /// Section descriptors in VDEX order (Checksum … TypeLookupTable). + sections: Vec, +} + +impl<'a, C: DexContainer<'a>> VdexFile<'a, C> { + // -- Helper: raw slice from container -------------------------------------- + + /// Returns `&'a [u8]` for `container[start..end]`, propagating the + /// container's lifetime — the same pattern used by `DexFile::get_section`. + #[inline] + fn raw_slice(base: &'a C, start: usize, end: usize) -> &'a [u8] { + &base[start..end] + } + + // -- Construction ---------------------------------------------------------- + + /// Parse and validate a VDEX file from a container. + /// + /// Mirrors `DexFile::from_raw_parts(base, location)`. + /// + /// Returns an error when: + /// - The buffer is too short to hold the file header + section headers. + /// - The magic bytes are not `"vdex"`. + /// - The version string is not the supported version. + /// - Any section descriptor places data outside the container. + pub fn from_raw_parts(base: &'a C) -> Result { + let data_len = base.len(); + let header_size = std::mem::size_of::(); + + if data_len < header_size { + return Err(DexError::TruncatedVdexFile { size: data_len }); + } + + // Read the file header into an owned value (alignment-independent). + let mut header = unsafe { std::mem::zeroed::() }; + plain::copy_from_bytes(&mut header, base).map_err(|_| DexError::TruncatedVdexFile { + size: data_len, + })?; + + if !header.is_magic_valid() { + return Err(DexError::BadVdexMagic); + } + if !header.is_version_valid() { + return Err(DexError::UnknownVdexVersion { + version: header.vdex_version, + }); + } + + let n = header.number_of_sections as usize; + let sections_start = header_size; + let section_hdr_size = std::mem::size_of::(); + let sections_end = sections_start + .checked_add(n * section_hdr_size) + .ok_or(DexError::TruncatedVdexFile { size: data_len })?; + + if data_len < sections_end { + return Err(DexError::TruncatedVdexFile { size: data_len }); + } + + // Read section headers into an owned Vec (alignment-independent). + let mut sections = Vec::with_capacity(n); + for i in 0..n { + let off = sections_start + i * section_hdr_size; + let mut sec = unsafe { std::mem::zeroed::() }; + plain::copy_from_bytes(&mut sec, &base[off..]) + .map_err(|_| DexError::TruncatedVdexFile { size: data_len })?; + sections.push(sec); + } + + let vdex = VdexFile { mmap: base, header, sections }; + vdex.validate_sections()?; + Ok(vdex) + } + + // -- Header accessors ------------------------------------------------------ + + /// Raw file header. + #[inline] + pub fn file_header(&self) -> &VdexFileHeader { + &self.header + } + + /// Number of sections declared in the header. + #[inline] + pub fn num_sections(&self) -> u32 { + self.header.number_of_sections + } + + /// Total byte length of the underlying container. + #[inline] + pub fn size(&self) -> usize { + self.mmap.len() + } + + // -- Section accessors ----------------------------------------------------- + + /// Returns the [`VdexSectionHeader`] for `kind`, or `None` if the file + /// does not have that many sections. + #[inline] + pub fn get_section_header(&self, kind: VdexSection) -> Option<&VdexSectionHeader> { + self.sections.get(kind as usize) + } + + /// Returns a raw `&'a [u8]` for the given section (empty when absent). + /// + /// The returned slice borrows directly from the container — no copying. + pub fn get_section_data(&self, kind: VdexSection) -> &'a [u8] { + let Some(hdr) = self.get_section_header(kind) else { + return &[]; + }; + if hdr.section_size == 0 { + return &[]; + } + let start = hdr.section_offset as usize; + let end = start + hdr.section_size as usize; + Self::raw_slice(self.mmap, start, end) + } + + // -- Checksum section ------------------------------------------------------ + + /// Number of DEX files whose checksums are stored in the checksum section. + #[inline] + pub fn num_dex_files(&self) -> u32 { + match self.get_section_header(VdexSection::Checksum) { + Some(hdr) => hdr.section_size / std::mem::size_of::() as u32, + None => 0, + } + } + + /// Returns the full slice of DEX-file checksums (borrows from container). + pub fn dex_checksums(&self) -> &'a [u32] { + let data = self.get_section_data(VdexSection::Checksum); + u32::slice_from_bytes(data).unwrap_or(&[]) + } + + /// Returns the Adler-32 checksum of the DEX file at `index`. + pub fn dex_checksum_at(&self, index: u32) -> Result { + let n = self.num_dex_files(); + if index >= n { + return Err(DexError::VdexDexIndexOutOfRange { + index, + num_dex_files: n, + }); + } + Ok(self.dex_checksums()[index as usize]) + } + + // -- DEX file section ------------------------------------------------------ + + /// Returns `true` when the VDEX contains embedded DEX file bytes. + #[inline] + pub fn has_dex_section(&self) -> bool { + self.get_section_header(VdexSection::DexFile) + .is_some_and(|h| h.section_size != 0) + } + + /// Returns the raw bytes of the DEX file at `index` as a `&'a [u8]` slice + /// that borrows directly from the container — no copying. + /// + /// DEX files inside the section are stored back-to-back with 4-byte + /// alignment (matching `OatWriter::SeekToDexFiles`). + pub fn get_dex_file_data(&self, index: u32) -> Result<&'a [u8]> { + let n = self.num_dex_files(); + if index >= n { + return Err(DexError::VdexDexIndexOutOfRange { + index, + num_dex_files: n, + }); + } + if !self.has_dex_section() { + return Err(DexError::BadVdexSection { + section: "dex_file", + msg: "VDEX does not contain a DEX file section".to_string(), + }); + } + + let sec_hdr = self.get_section_header(VdexSection::DexFile).unwrap(); + let sec_start = sec_hdr.section_offset as usize; + let sec_end = sec_start + sec_hdr.section_size as usize; + + let mut offset = sec_start; + for i in 0..=index { + let dex_header_end = offset + std::mem::size_of::
(); + if dex_header_end > sec_end { + return Err(DexError::BadVdexSection { + section: "dex_file", + msg: format!("DEX header for index {i} extends beyond section bounds"), + }); + } + + let mut dex_hdr = unsafe { std::mem::zeroed::
() }; + plain::copy_from_bytes(&mut dex_hdr, &self.mmap[offset..]) + .map_err(|_| DexError::BadVdexSection { + section: "dex_file", + msg: format!("Cannot read DEX header at offset {offset} for index {i}"), + })?; + + let file_size = dex_hdr.file_size as usize; + let dex_end = offset + file_size; + if dex_end > sec_end { + return Err(DexError::BadVdexSection { + section: "dex_file", + msg: format!( + "DEX file {i} at offset {offset} with size {file_size} overflows section" + ), + }); + } + + if i == index { + return Ok(Self::raw_slice(self.mmap, offset, dex_end)); + } + + offset = align_up(dex_end, 4); + } + + Err(DexError::VdexDexIndexOutOfRange { + index, + num_dex_files: n, + }) + } + + /// Returns an iterator over the raw byte slices of each embedded DEX file. + /// + /// Each item is `Result<&'a [u8]>`. To parse a DEX file call + /// [`DexFile::from_raw_parts`][crate::file::DexFile::from_raw_parts] on + /// the slice: + /// + /// ```ignore + /// for raw in vdex.iter_dex_files() { + /// let dex = raw.and_then(|b| DexFile::from_raw_parts(b, DexLocation::InMemory)); + /// } + /// ``` + pub fn iter_dex_files(&self) -> impl Iterator> + '_ { + (0..self.num_dex_files()).map(|i| self.get_dex_file_data(i)) + } + + // -- Verifier deps section ------------------------------------------------- + + /// Raw bytes of the verifier-dependency section (empty when absent). + #[inline] + pub fn verifier_deps_data(&self) -> &'a [u8] { + self.get_section_data(VdexSection::VerifierDeps) + } + + // -- Type lookup table section --------------------------------------------- + + /// Returns `true` when the VDEX includes a type-lookup-table section. + #[inline] + pub fn has_type_lookup_table_section(&self) -> bool { + self.num_sections() as usize > VdexSection::TypeLookupTable as usize + } + + /// Raw bytes of the type-lookup-table section (empty when absent). + #[inline] + pub fn type_lookup_table_data(&self) -> &'a [u8] { + self.get_section_data(VdexSection::TypeLookupTable) + } + + // -- Checksum matching ----------------------------------------------------- + + /// Returns `true` when the checksums stored in the VDEX match those in + /// `dex_headers` (count and order must both match). + pub fn matches_dex_checksums(&self, dex_headers: &[&Header]) -> bool { + if dex_headers.len() as u32 != self.num_dex_files() { + return false; + } + self.dex_checksums() + .iter() + .zip(dex_headers) + .all(|(stored, hdr)| *stored == hdr.checksum) + } + + // -- Private helpers ------------------------------------------------------- + + fn validate_sections(&self) -> Result<()> { + let data_len = self.mmap.len(); + for sec in &self.sections { + if sec.section_size == 0 { + continue; + } + let end = (sec.section_offset as usize) + .checked_add(sec.section_size as usize) + .ok_or_else(|| DexError::BadVdexSection { + section: section_name_from_kind(sec.section_kind), + msg: "offset+size overflows usize".to_string(), + })?; + if end > data_len { + return Err(DexError::BadVdexSection { + section: section_name_from_kind(sec.section_kind), + msg: format!("section end ({end}) exceeds file size ({data_len})"), + }); + } + } + Ok(()) + } +} + +// -- VdexFileContainer --------------------------------------------------------- + +/// Owning builder for a memory-mapped VDEX file. +/// +/// Mirrors [`DexFileContainer`][crate::file::DexFileContainer]: keeps the +/// `Mmap` alive and exposes an [`open`][VdexFileContainer::open] method that +/// returns a [`MmapVdexFile`] borrowing from it. +pub struct VdexFileContainer { + mmap: memmap2::Mmap, +} + +impl VdexFileContainer { + /// Memory-map `file` and return a container ready for parsing. + /// + /// # Panics + /// Panics if the OS `mmap` call fails (mirrors `DexFileContainer::new`). + pub fn new(file: T) -> Self { + // SAFETY: read-only mmap of a file descriptor supplied by the caller. + Self { mmap: unsafe { memmap2::Mmap::map(file).unwrap() } } + } + + /// Parse the memory-mapped bytes as a VDEX file. + pub fn open(&self) -> Result> { + VdexFile::from_raw_parts(&self.mmap) + } + + /// Raw bytes of the mapped file. + pub fn data(&self) -> &memmap2::Mmap { + &self.mmap + } +} + +// -- Free helpers -------------------------------------------------------------- + +/// Round `value` up to the next multiple of `align` (which must be a power of +/// two). +#[inline] +fn align_up(value: usize, align: usize) -> usize { + debug_assert!(align.is_power_of_two()); + (value + align - 1) & !(align - 1) +} + +/// Map a raw section-kind `u32` to a human-readable name for error messages. +fn section_name_from_kind(kind: u32) -> &'static str { + match kind { + 0 => VdexSection::Checksum.name(), + 1 => VdexSection::DexFile.name(), + 2 => VdexSection::VerifierDeps.name(), + 3 => VdexSection::TypeLookupTable.name(), + _ => "unknown", + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // -- Helpers --------------------------------------------------------------- + + /// Build a minimal VDEX file that has no DEX section but carries + /// `checksums` in the checksum section. + fn build_vdex(checksums: &[u32]) -> Vec { + // Compute section offsets. + let header_size = std::mem::size_of::(); + let section_headers_size = + VDEX_NUM_SECTIONS * std::mem::size_of::(); + let checksum_section_offset = header_size + section_headers_size; + let checksum_section_size = checksums.len() * std::mem::size_of::(); + + // Total file size (no verifier deps, no type lookup table, no dex). + let file_size = checksum_section_offset + checksum_section_size; + + let mut out = vec![0u8; file_size]; + + // Write VdexFileHeader. + out[0..4].copy_from_slice(VDEX_MAGIC); + out[4..8].copy_from_slice(VDEX_VERSION); + out[8..12].copy_from_slice(&(VDEX_NUM_SECTIONS as u32).to_le_bytes()); + + // Write section headers. + let write_section = |buf: &mut Vec, idx: usize, kind: u32, offset: u32, size: u32| { + let base = header_size + idx * std::mem::size_of::(); + buf[base..base + 4].copy_from_slice(&kind.to_le_bytes()); + buf[base + 4..base + 8].copy_from_slice(&offset.to_le_bytes()); + buf[base + 8..base + 12].copy_from_slice(&size.to_le_bytes()); + }; + + // kChecksumSection + write_section( + &mut out, + 0, + VdexSection::Checksum as u32, + checksum_section_offset as u32, + checksum_section_size as u32, + ); + // kDexFileSection — absent + write_section(&mut out, 1, VdexSection::DexFile as u32, 0, 0); + // kVerifierDepsSection — absent + write_section(&mut out, 2, VdexSection::VerifierDeps as u32, 0, 0); + // kTypeLookupTableSection — absent + write_section(&mut out, 3, VdexSection::TypeLookupTable as u32, 0, 0); + + // Write checksums. + for (i, c) in checksums.iter().enumerate() { + let base = checksum_section_offset + i * 4; + out[base..base + 4].copy_from_slice(&c.to_le_bytes()); + } + + out + } + + // -- Tests ----------------------------------------------------------------- + + #[test] + fn valid_header_magic_and_version() { + let data = build_vdex(&[]); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + assert!(vdex.file_header().is_magic_valid()); + assert!(vdex.file_header().is_version_valid()); + } + + #[test] + fn num_sections_matches_constant() { + let data = build_vdex(&[]); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + assert_eq!(vdex.num_sections() as usize, VDEX_NUM_SECTIONS); + } + + #[test] + fn num_dex_files_zero_when_no_checksums() { + let data = build_vdex(&[]); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + assert_eq!(vdex.num_dex_files(), 0); + } + + #[test] + fn num_dex_files_matches_checksum_count() { + let data = build_vdex(&[0xdeadbeef, 0xcafef00d]); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + assert_eq!(vdex.num_dex_files(), 2); + } + + #[test] + fn dex_checksum_at_returns_correct_values() { + let checksums = [0x11111111u32, 0x22222222, 0x33333333]; + let data = build_vdex(&checksums); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + for (i, expected) in checksums.iter().enumerate() { + assert_eq!(vdex.dex_checksum_at(i as u32).unwrap(), *expected); + } + } + + #[test] + fn dex_checksum_out_of_range_returns_error() { + let data = build_vdex(&[0xdeadbeef]); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + assert!(matches!( + vdex.dex_checksum_at(1), + Err(DexError::VdexDexIndexOutOfRange { index: 1, .. }) + )); + } + + #[test] + fn bad_magic_returns_error() { + let mut data = build_vdex(&[]); + data[0] = b'w'; // corrupt magic + assert!(matches!( + VdexFile::from_raw_parts(&data), + Err(DexError::BadVdexMagic) + )); + } + + #[test] + fn bad_version_returns_error() { + let mut data = build_vdex(&[]); + data[4] = b'0'; + data[5] = b'0'; + data[6] = b'1'; // version "001\0" — not supported + assert!(matches!( + VdexFile::from_raw_parts(&data), + Err(DexError::UnknownVdexVersion { .. }) + )); + } + + #[test] + fn truncated_data_returns_error() { + let data = build_vdex(&[0x12345678]); + // Too short to hold even the header. Use a &[u8] container. + let truncated: &[u8] = &data[..4]; + assert!(matches!( + VdexFile::from_raw_parts(&truncated), + Err(DexError::TruncatedVdexFile { .. }) + )); + } + + #[test] + fn has_dex_section_false_for_metadata_only_vdex() { + let data = build_vdex(&[0xdeadbeef]); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + assert!(!vdex.has_dex_section()); + } + + #[test] + fn verifier_deps_data_empty_when_absent() { + let data = build_vdex(&[]); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + assert!(vdex.verifier_deps_data().is_empty()); + } + + #[test] + fn matches_dex_checksums_empty_succeeds() { + let data = build_vdex(&[]); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + assert!(vdex.matches_dex_checksums(&[])); + } + + #[test] + fn matches_dex_checksums_mismatch_fails() { + use crate::file::Header; + let data = build_vdex(&[0xdeadbeef]); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + + // Build a fake Header with a different checksum. + let mut fake_header = unsafe { std::mem::zeroed::
() }; + // Set the checksum field (offset 8 within Header). + fake_header.checksum = 0x12345678; + assert!(!vdex.matches_dex_checksums(&[&fake_header])); + } + + #[test] + fn matches_dex_checksums_correct_checksum_succeeds() { + use crate::file::Header; + let data = build_vdex(&[0xdeadbeef]); + let vdex = VdexFile::from_raw_parts(&data).unwrap(); + + let mut fake_header = unsafe { std::mem::zeroed::
() }; + fake_header.checksum = 0xdeadbeef; + assert!(vdex.matches_dex_checksums(&[&fake_header])); + } +} From c6fb6614bcf31cf4e3fbbcdad8412815ce94df62 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Mon, 6 Apr 2026 09:26:08 +0200 Subject: [PATCH 42/46] update LICENSE --- LICENSE | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 LICENSE diff --git a/LICENSE b/LICENSE old mode 100755 new mode 100644 From f560fce83018a340a22a3ec615cd02901f4828ca Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Mon, 6 Apr 2026 09:26:40 +0200 Subject: [PATCH 43/46] feat(python): update stub files --- LICENSE | 0 examples/dex_basic_ops.rs | 4 +- examples/dex_edit.rs | 91 ++ examples/dex_strings.rs | 6 +- python/dexrs/__init__.py | 51 +- python/dexrs/_internal/__init__.pyi | 28 + python/dexrs/_internal/annotation.pyi | 32 +- python/dexrs/_internal/builder.pyi | 122 +++ python/dexrs/_internal/class_accessor.pyi | 72 +- python/dexrs/_internal/code/__init__.pyi | 825 +++++++++++-------- python/dexrs/_internal/code/code_flags.pyi | 13 +- python/dexrs/_internal/code/flags.pyi | 31 +- python/dexrs/_internal/code/signatures.pyi | 16 +- python/dexrs/_internal/code/verify_flags.pyi | 88 +- python/dexrs/_internal/code/vreg.pyi | 107 ++- python/dexrs/_internal/container.pyi | 96 ++- python/dexrs/_internal/editor.pyi | 158 ++++ python/dexrs/_internal/error.pyi | 18 + python/dexrs/_internal/file.pyi | 362 ++++++-- python/dexrs/_internal/leb128.pyi | 54 +- python/dexrs/_internal/mutf8.pyi | 59 +- python/dexrs/_internal/primitive.pyi | 149 ++++ python/dexrs/_internal/structs.pyi | 172 +++- python/dexrs/_internal/type_lookup_table.pyi | 69 ++ python/dexrs/builder.py | 57 ++ python/dexrs/code.py | 31 + python/dexrs/container.py | 21 + python/dexrs/editor.py | 25 + python/dexrs/error.py | 16 + python/dexrs/file.py | 21 + python/dexrs/leb128.py | 24 + python/dexrs/mutf8.py | 33 + python/dexrs/primitive.py | 23 + python/dexrs/py.typed | 0 python/dexrs/type_lookup_table.py | 23 + python/tests/conftest.py | 29 + python/tests/test_builder.py | 212 +++++ python/tests/test_dex_editor.py | 232 ++++++ src/bin/dexrs/commands/disasm.rs | 2 +- src/bin/dexrs/commands/inspect.rs | 10 +- src/bin/dexrs/commands/patch.rs | 4 +- src/bin/dexrs/commands/vdex.rs | 8 +- src/bin/dexrs/highlight.rs | 4 +- src/bin/dexrs/tui/app.rs | 8 +- src/bin/dexrs/tui/events.rs | 2 +- src/bin/dexrs/tui/ui.rs | 10 +- src/file/builder.rs | 14 +- src/file/ir.rs | 4 +- src/file/writer.rs | 6 +- src/vdex/mod.rs | 22 +- 50 files changed, 2926 insertions(+), 538 deletions(-) mode change 100644 => 100755 LICENSE create mode 100644 examples/dex_edit.rs mode change 100644 => 100755 python/dexrs/__init__.py mode change 100644 => 100755 python/dexrs/_internal/__init__.pyi mode change 100644 => 100755 python/dexrs/_internal/annotation.pyi create mode 100755 python/dexrs/_internal/builder.pyi mode change 100644 => 100755 python/dexrs/_internal/class_accessor.pyi mode change 100644 => 100755 python/dexrs/_internal/code/__init__.pyi mode change 100644 => 100755 python/dexrs/_internal/code/code_flags.pyi mode change 100644 => 100755 python/dexrs/_internal/code/flags.pyi mode change 100644 => 100755 python/dexrs/_internal/code/signatures.pyi mode change 100644 => 100755 python/dexrs/_internal/code/verify_flags.pyi mode change 100644 => 100755 python/dexrs/_internal/code/vreg.pyi mode change 100644 => 100755 python/dexrs/_internal/container.pyi create mode 100755 python/dexrs/_internal/editor.pyi mode change 100644 => 100755 python/dexrs/_internal/error.pyi mode change 100644 => 100755 python/dexrs/_internal/file.pyi mode change 100644 => 100755 python/dexrs/_internal/leb128.pyi mode change 100644 => 100755 python/dexrs/_internal/mutf8.pyi create mode 100755 python/dexrs/_internal/primitive.pyi mode change 100644 => 100755 python/dexrs/_internal/structs.pyi create mode 100755 python/dexrs/_internal/type_lookup_table.pyi create mode 100755 python/dexrs/builder.py mode change 100644 => 100755 python/dexrs/code.py mode change 100644 => 100755 python/dexrs/container.py create mode 100755 python/dexrs/editor.py mode change 100644 => 100755 python/dexrs/error.py mode change 100644 => 100755 python/dexrs/file.py mode change 100644 => 100755 python/dexrs/leb128.py mode change 100644 => 100755 python/dexrs/mutf8.py create mode 100755 python/dexrs/primitive.py mode change 100644 => 100755 python/dexrs/py.typed create mode 100755 python/dexrs/type_lookup_table.py create mode 100644 python/tests/conftest.py create mode 100644 python/tests/test_builder.py create mode 100644 python/tests/test_dex_editor.py diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/examples/dex_basic_ops.rs b/examples/dex_basic_ops.rs index 4fa6bcb..bfbac89 100644 --- a/examples/dex_basic_ops.rs +++ b/examples/dex_basic_ops.rs @@ -15,7 +15,7 @@ fn dex_get_method(dex: &DexFile<'_>) -> Result<()> { // name is a string. To resolve everything manually, you would need // to fetch the string id first - let name = dex.get_utf16_str_at(method_id.name_idx)?; + let name = dex.get_str_at(method_id.name_idx)?; let proto_id = dex.get_proto_id(method_id.proto_idx)?; // the declaring class name is just a TypeId, which points to a @@ -44,7 +44,7 @@ fn dex_get_field(dex: &DexFile<'_>) -> Result<()> { assert!(dex.field_id_idx(field_id)? == 0); // same as for methods - let name = dex.get_utf16_str_at(field_id.name_idx)?; + let name = dex.get_str_at(field_id.name_idx)?; let type_name = dex.get_type_desc_utf16_at(field_id.type_idx)?; let class_name = dex.get_type_desc_utf16_at(field_id.class_idx)?; diff --git a/examples/dex_edit.rs b/examples/dex_edit.rs new file mode 100644 index 0000000..1050e31 --- /dev/null +++ b/examples/dex_edit.rs @@ -0,0 +1,91 @@ +//! Demonstrates DEX file modification using `DexEditor` (Tier 2 API) and +//! the low-level `update_checksum` helper (Tier 1). +//! +//! Run with: +//! ``` +//! cargo run --example dex_edit -- path/to/classes.dex +//! ``` + +use std::{env, fs, path::Path}; + +use dexrs::file::{patch::update_checksum, DexEditor}; + +fn main() -> Result<(), Box> { + let path = env::args() + .nth(1) + .unwrap_or_else(|| "classes.dex".to_string()); + + example_set_flags(&path, "/tmp/out_flags.dex")?; + example_rename_class(&path, "/tmp/out_renamed.dex")?; + example_method_flags(&path, "/tmp/out_method.dex")?; + example_manual_checksum(&path)?; + + Ok(()) +} + +/// Change class access flags via name lookup. +fn example_set_flags(src: &str, out: &str) -> Result<(), Box> { + let mut editor = DexEditor::from_file(Path::new(src))?; + + // Accepts dotted ("com.example.Foo"), slash ("com/example/Foo"), + // or descriptor ("Lcom/example/Foo;") form. + editor.set_class_access_flags("LMain;", 0x0011 /* public final */)?; + + // Strip hidden-API restriction metadata (no-op if section is absent). + let _ = editor.clear_hiddenapi_flags(); + + // build() recalculates the Adler32 checksum and returns the final bytes. + let bytes = editor.build()?; + fs::write(out, &bytes)?; + println!("set_flags -> {out}"); + Ok(()) +} + +/// Rename a class and every cross-reference in the string pool. +fn example_rename_class(src: &str, out: &str) -> Result<(), Box> { + let mut editor = DexEditor::from_file(Path::new(src))?; + + // Same MUTF-8 byte length -> in-place patch + sort fixup. + // Different byte length -> full string-pool rebuild + offset adjustment. + editor.rename_class("LMain;", "LRenamedMain;")?; + + // write_to() combines build() + fs::write in one call. + editor.write_to(Path::new(out))?; + println!("rename -> {out}"); + Ok(()) +} + +/// Change access flags on a specific method inside a class. +fn example_method_flags(src: &str, out: &str) -> Result<(), Box> { + let mut editor = DexEditor::from_file(Path::new(src))?; + + // LEB128 re-encoding is handled automatically when the encoded width changes. + editor.set_method_access_flags("LMain;", "main", 0x0009 /* public static */)?; + + editor.write_to(Path::new(out))?; + println!("method flags -> {out}"); + Ok(()) +} + +/// Low-level: manually patch raw bytes, then fix the checksum. +fn example_manual_checksum(src: &str) -> Result<(), Box> { + let mut buf = fs::read(src)?; + + // DEX header layout (bytes): + // 0–7 magic ("dex\n035\0") + // 8–11 Adler32 checksum ← recalculated by update_checksum + // 12–31 SHA-1 signature (not updated here) + // 32–35 file_size + // ... + // 100–103 class_defs_off + + // Example: zero out byte 200 (arbitrary mutation for illustration). + if buf.len() > 200 { + buf[200] = 0; + } + + // Always call update_checksum after raw mutations to keep the file valid. + update_checksum(&mut buf); + println!("manual patch checksum OK, file_size={}", buf.len()); + Ok(()) +} diff --git a/examples/dex_strings.rs b/examples/dex_strings.rs index fe1f65a..90a04af 100644 --- a/examples/dex_strings.rs +++ b/examples/dex_strings.rs @@ -10,10 +10,10 @@ fn dex_strings(dex: &DexFile<'_>) -> Result<()> { // name can be retrieved in various ways: // // 1. modified utf8 -> utf16 with checks - let name = dex.get_utf16_str(string_id)?; + let name = dex.get_str(string_id)?; // // 2. modified utf8 -> utf16 lossy - let name = dex.get_utf16_str_lossy(string_id)?; + let name = dex.get_str_lossy(string_id)?; // // 3. modified utf8 -> utf8 unsafe (but fast) let name = unsafe { dex.fast_get_utf8_str(string_id)? }; @@ -24,7 +24,7 @@ fn dex_strings(dex: &DexFile<'_>) -> Result<()> { // all of the operations above can be done with the // index directly - let name = dex.get_utf16_str_lossy_at(0)?; + let name = dex.get_str_lossy_at(0)?; Ok(()) } diff --git a/python/dexrs/__init__.py b/python/dexrs/__init__.py old mode 100644 new mode 100755 index 9d53bd8..d4c1043 --- a/python/dexrs/__init__.py +++ b/python/dexrs/__init__.py @@ -1,4 +1,53 @@ +"""dexrs — Python bindings for the Rust DEX file parsing library. + +This package exposes the full public API of the ``dexrs`` Rust crate via +PyO3-generated native extensions, re-exported under clean Python names. + +Quickstart:: + + from dexrs import DexFile, InMemoryDexContainer, VerifyPreset + + with open("classes.dex", "rb") as f: + container = InMemoryDexContainer(f.read()) + + dex = DexFile.from_bytes(container, VerifyPreset.ALL) + + for i in range(dex.num_class_defs()): + cls = dex.get_class_def(i) + print(dex.get_class_desc(cls)) + +Submodules +---------- +- :mod:`dexrs.file` — :class:`DexFile` and :class:`VerifyPreset` +- :mod:`dexrs.container` — :class:`InMemoryDexContainer`, :class:`FileDexContainer` +- :mod:`dexrs.editor` — :class:`DexEditor` for mutation +- :mod:`dexrs.code` — Instructions, opcodes, and operand helpers +- :mod:`dexrs.error` — :exc:`PyDexError` +- :mod:`dexrs.leb128` — LEB128 varint decoders +- :mod:`dexrs.mutf8` — MUTF-8 ↔ str conversion +- :mod:`dexrs.primitive` — :class:`PrimitiveType` enum +- :mod:`dexrs.type_lookup_table` — :class:`TypeLookupTable` +""" # some shortcuts from .file import DexFile, VerifyPreset from .container import InMemoryDexContainer, FileDexContainer -from .error import PyDexError \ No newline at end of file +from .error import PyDexError +from .editor import DexEditor +from .builder import DexIrBuilder, IrClassDef, IrMethodDef, CodeBuilder +from .type_lookup_table import TypeLookupTable +from .primitive import PrimitiveType + +__all__ = [ + "DexFile", + "VerifyPreset", + "InMemoryDexContainer", + "FileDexContainer", + "PyDexError", + "DexEditor", + "DexIrBuilder", + "IrClassDef", + "IrMethodDef", + "CodeBuilder", + "TypeLookupTable", + "PrimitiveType", +] \ No newline at end of file diff --git a/python/dexrs/_internal/__init__.pyi b/python/dexrs/_internal/__init__.pyi old mode 100644 new mode 100755 index e69de29..1dd3db2 --- a/python/dexrs/_internal/__init__.pyi +++ b/python/dexrs/_internal/__init__.pyi @@ -0,0 +1,28 @@ +"""Type stubs for the ``dexrs._internal`` native extension package. + +Sub-modules exposed by the Rust extension: + +- :mod:`dexrs._internal.annotation` — class annotation accessors +- :mod:`dexrs._internal.class_accessor` — class data iterators +- :mod:`dexrs._internal.code` — instructions, opcodes, and operand accessors +- :mod:`dexrs._internal.container` — DEX container types (memory / file) +- :mod:`dexrs._internal.editor` — mutable DEX editor +- :mod:`dexrs._internal.error` — :exc:`PyDexError` exception type +- :mod:`dexrs._internal.leb128` — LEB128 varint decoders +- :mod:`dexrs._internal.mutf8` — MUTF-8 / UTF-16 conversion utilities +- :mod:`dexrs._internal.primitive` — Java primitive-type enum +- :mod:`dexrs._internal.structs` — plain-data structs mirroring DEX on-disk layout +- :mod:`dexrs._internal.type_lookup_table` — O(1) type-descriptor lookup table +""" + +from . import annotation as annotation +from . import class_accessor as class_accessor +from . import code as code +from . import container as container +from . import editor as editor +from . import error as error +from . import leb128 as leb128 +from . import mutf8 as mutf8 +from . import primitive as primitive +from . import structs as structs +from . import type_lookup_table as type_lookup_table diff --git a/python/dexrs/_internal/annotation.pyi b/python/dexrs/_internal/annotation.pyi old mode 100644 new mode 100755 index 1eb707b..6ebe8ef --- a/python/dexrs/_internal/annotation.pyi +++ b/python/dexrs/_internal/annotation.pyi @@ -1,3 +1,10 @@ +"""Type stubs for the ``dexrs._internal.annotation`` native extension module. + +Provides access to class-level annotation data from a DEX file, including +class annotations, field annotations, method annotations, and parameter +annotations. +""" + from typing import List from .structs import ( @@ -7,9 +14,26 @@ from .structs import ( ) AnnotationSetItem = List[int] +"""A list of annotation offsets pointing to :class:`~dexrs._internal.structs.AnnotationItem` records.""" class ClassAnnotationAccessor: - def get_class_annotation_set(self) -> AnnotationSetItem: ... - def get_field_annotations_items(self) -> List[FieldAnnotationsItem]: ... - def get_method_annotations_items(self) -> List[MethodAnnotationsItem]: ... - def get_parameter_annotations_items(self) -> List[ParameterAnnotationsItem]: ... + """Accessor for all annotation data attached to a single class definition. + + Obtain one via :meth:`~dexrs._internal.file.DexFile.get_class_annotation_accessor`. + """ + + def get_class_annotation_set(self) -> AnnotationSetItem: + """Return the list of annotation offsets for the class itself.""" + ... + + def get_field_annotations_items(self) -> List[FieldAnnotationsItem]: + """Return annotation metadata for each annotated field in the class.""" + ... + + def get_method_annotations_items(self) -> List[MethodAnnotationsItem]: + """Return annotation metadata for each annotated method in the class.""" + ... + + def get_parameter_annotations_items(self) -> List[ParameterAnnotationsItem]: + """Return annotation metadata for each annotated method's parameter list.""" + ... diff --git a/python/dexrs/_internal/builder.pyi b/python/dexrs/_internal/builder.pyi new file mode 100755 index 0000000..df15e43 --- /dev/null +++ b/python/dexrs/_internal/builder.pyi @@ -0,0 +1,122 @@ +from typing import Optional + +class ProtoKey: + """Method prototype: return type + parameter types.""" + + def __new__(cls, return_type: str, params: list[str] = ...) -> ProtoKey: ... + @staticmethod + def from_descriptor(desc: str) -> Optional[ProtoKey]: + """Parse ``"([Ljava/lang/String;)V"`` into a :class:`ProtoKey`.""" + ... + + @property + def return_type(self) -> str: ... + @property + def params(self) -> list[str]: ... + def shorty(self) -> str: ... + +class CodeDef: + """Assembled code item returned by :meth:`CodeBuilder.build`.""" + + @property + def registers(self) -> int: ... + @property + def ins(self) -> int: ... + @property + def outs(self) -> int: ... + @property + def insns_count(self) -> int: ... + +class CodeBuilder: + """Assembles Dalvik bytecode from disassembly text lines. + + The builder is consumed by :meth:`build`; any further calls raise + :exc:`ValueError`. + """ + + def __new__(cls, registers: int, ins: int, outs: int) -> CodeBuilder: ... + + def emit(self, line: str) -> None: + """Parse and emit one disassembly line (e.g. ``"return-void"``).""" + ... + + def label(self, name: str) -> None: + """Place a named label at the current instruction position.""" + ... + + def build(self) -> CodeDef: + """Resolve branches and return a :class:`CodeDef`. + + The builder is consumed after this call. + """ + ... + +class IrFieldDef: + """A field declaration inside a class IR.""" + + def __new__( + cls, name: str, field_type: str, access_flags: int = 0 + ) -> IrFieldDef: ... + + @property + def name(self) -> str: ... + @property + def field_type(self) -> str: ... + @property + def access_flags(self) -> int: ... + @access_flags.setter + def access_flags(self, v: int) -> None: ... + +class IrMethodDef: + """A method declaration (optionally with a body).""" + + def __new__( + cls, name: str, descriptor: str, access_flags: int = 0 + ) -> IrMethodDef: ... + + @property + def name(self) -> str: ... + @property + def access_flags(self) -> int: ... + @access_flags.setter + def access_flags(self, v: int) -> None: ... + + def set_code(self, code: CodeDef) -> None: + """Attach a :class:`CodeDef` as this method's body.""" + ... + +class IrClassDef: + """A complete class definition for the DEX IR.""" + + def __new__(cls, descriptor: str) -> IrClassDef: ... + + @property + def descriptor(self) -> str: ... + + def set_access(self, flags: int) -> None: ... + def set_superclass(self, desc: str) -> None: ... + def add_interface(self, desc: str) -> None: ... + def set_source_file(self, name: str) -> None: ... + def add_static_field( + self, name: str, field_type: str, access_flags: int = 0 + ) -> None: ... + def add_instance_field( + self, name: str, field_type: str, access_flags: int = 0 + ) -> None: ... + def add_direct_method(self, method: IrMethodDef) -> None: ... + def add_virtual_method(self, method: IrMethodDef) -> None: ... + +class DexIrBuilder: + """Builds a complete DEX file from class definitions.""" + + def __new__(cls, version: int = 35) -> DexIrBuilder: ... + + def add_class(self, cls: IrClassDef) -> None: + """Add a class to the IR (clones the class).""" + ... + + def write(self) -> bytes: + """Serialize all classes to a valid DEX byte string.""" + ... + + def class_count(self) -> int: ... diff --git a/python/dexrs/_internal/class_accessor.pyi b/python/dexrs/_internal/class_accessor.pyi old mode 100644 new mode 100755 index e1caaa4..9f464c6 --- a/python/dexrs/_internal/class_accessor.pyi +++ b/python/dexrs/_internal/class_accessor.pyi @@ -1,29 +1,85 @@ +"""Type stubs for the ``dexrs._internal.class_accessor`` native extension module. + +Provides iterator-style access to the fields and methods declared inside a +``class_data_item``, which is the LEB128-encoded section of a DEX class +definition that lists members. +""" + from typing import List class Method: + """A method entry decoded from ``class_data_item``. + + Fields mirror the DEX ``encoded_method`` structure (method index delta + resolved to an absolute index, access flags, and code offset). + """ + index: int + """Absolute index into the ``method_ids`` list.""" access_flags: int + """Access flags bitmask (``ACC_PUBLIC``, ``ACC_STATIC``, etc.).""" code_offset: int + """Byte offset in the DEX file of the ``code_item``, or 0 for abstract / native methods.""" - def is_static_or_direct(self) -> bool: ... + def is_static_or_direct(self) -> bool: + """Return ``True`` if this is a static or direct (non-virtual) method.""" + ... class Field: + """A field entry decoded from ``class_data_item``. + + Fields mirror the DEX ``encoded_field`` structure. + """ + index: int + """Absolute index into the ``field_ids`` list.""" access_flags: int + """Access flags bitmask (``ACC_PUBLIC``, ``ACC_STATIC``, etc.).""" - def is_static(self) -> bool: ... + def is_static(self) -> bool: + """Return ``True`` if this field has the ``ACC_STATIC`` flag set.""" + ... class ClassAccessor: + """Accessor for all fields and methods defined in a class. + + Obtain one via :meth:`~dexrs._internal.file.DexFile.get_class_accessor`. + Iterating ``get_fields()`` / ``get_methods()`` is the primary use-case. + """ + num_fields: int + """Total number of fields (static + instance).""" num_methods: int + """Total number of methods (direct + virtual).""" num_static_fields: int + """Number of static fields.""" num_instance_fields: int + """Number of instance fields.""" num_direct_methods: int + """Number of direct (non-virtual) methods.""" num_virtual_methods: int + """Number of virtual methods.""" + + def get_fields(self) -> List[Field]: + """Return all fields (static first, then instance).""" + ... + + def get_methods(self) -> List[Method]: + """Return all methods (direct first, then virtual).""" + ... + + def get_static_fields(self) -> List[Field]: + """Return only the static fields of this class.""" + ... + + def get_instance_fields(self) -> List[Field]: + """Return only the instance fields of this class.""" + ... + + def get_direct_methods(self) -> List[Method]: + """Return only the direct (non-virtual) methods of this class.""" + ... - def get_fields(self) -> List[Field]: ... - def get_methods(self) -> List[Method]: ... - def get_static_fields(self) -> List[Field]: ... - def get_instance_fields(self) -> List[Field]: ... - def get_direct_methods(self) -> List[Method]: ... - def get_virtual_methods(self) -> List[Method]: ... + def get_virtual_methods(self) -> List[Method]: + """Return only the virtual methods of this class.""" + ... diff --git a/python/dexrs/_internal/code/__init__.pyi b/python/dexrs/_internal/code/__init__.pyi old mode 100644 new mode 100755 index e930be3..ba17069 --- a/python/dexrs/_internal/code/__init__.pyi +++ b/python/dexrs/_internal/code/__init__.pyi @@ -1,373 +1,552 @@ +"""Type stubs for the ``dexrs._internal.code`` native extension module. + +Provides types for iterating and inspecting Dalvik bytecode instructions. + +- :class:`CodeItemAccessor` — iterate over instructions in a method body. +- :class:`Instruction` — a single decoded Dalvik instruction. +- :class:`Code` — opcode enum (``NOP``, ``MOVE``, ``INVOKE_VIRTUAL``, …). +- :class:`Format` — instruction format enum (``k10x``, ``k35c``, …). +- :class:`IndexType` — type of the index operand in an instruction. +- :class:`FillArrayDataPayload` — payload for ``fill-array-data``. +- :class:`SparseSwitchPayload` — payload for ``sparse-switch``. +- :class:`PackedSwitchPayload` — payload for ``packed-switch``. +""" + from typing import List, Optional from ..structs import CodeItem from ..file import DexFile class CodeItemAccessor: + """Accessor for the instructions and metadata of a single ``code_item``. + + Obtain one via :meth:`~dexrs._internal.file.DexFile.get_code_item_accessor`. + """ + code_off: int + """Byte offset of the ``code_item`` within the DEX file.""" code_item: CodeItem + """The parsed ``code_item`` header.""" @property - def registers_size(self) -> int: ... + def registers_size(self) -> int: + """Total number of virtual registers used by the method.""" + ... + @property - def ins_size(self) -> int: ... + def ins_size(self) -> int: + """Number of words of incoming arguments.""" + ... + @property - def outs_size(self) -> int: ... + def outs_size(self) -> int: + """Number of words of outgoing argument space required.""" + ... + @property - def tries_size(self) -> int: ... + def tries_size(self) -> int: + """Number of ``try_item`` entries in the method.""" + ... + @property - def debug_info_off(self) -> int: ... + def debug_info_off(self) -> int: + """Offset to the ``debug_info_item``, or 0 if absent.""" + ... + @property - def code_off(self) -> int: ... + def code_off(self) -> int: + """Byte offset of the start of the instruction array.""" + ... + @property - def insns_size_in_code_units(self) -> int: ... + def insns_size_in_code_units(self) -> int: + """Length of the instruction array in 16-bit code units.""" + ... + @property - def insns_size_in_bytes(self) -> int: ... - def insns_raw(self) -> List[int]: ... - def insns(self) -> List[Instruction]: ... - def inst_at(self, pc: int) -> Instruction: ... + def insns_size_in_bytes(self) -> int: + """Length of the instruction array in bytes.""" + ... + + def insns_raw(self) -> List[int]: + """Return the raw instruction array as a list of 16-bit code units.""" + ... + + def insns(self) -> List["Instruction"]: + """Decode and return all instructions in the method body.""" + ... + + def inst_at(self, pc: int) -> "Instruction": + """Return the decoded instruction at program-counter offset *pc* (in code units). + + :raises PyDexError: If *pc* is out of range. + """ + ... class Instruction: + """A single decoded Dalvik instruction. + + Provides access to opcode metadata and operand extraction via the + :mod:`~dexrs._internal.code.vreg` helper functions. + """ + @property - def opcode(self) -> Code: ... + def opcode(self) -> "Code": + """The opcode enum value of this instruction.""" + ... + @property - def format(self) -> Format: ... + def format(self) -> "Format": + """The instruction format (determines operand layout).""" + ... + @property - def name(self) -> str: ... + def name(self) -> str: + """The mnemonic string for this opcode (e.g. ``"invoke-virtual"``).""" + ... + @property - def verify_flags(self) -> int: ... - def size_in_code_units(self) -> int: ... - def next(self) -> Instruction | None: ... + def verify_flags(self) -> int: + """Verification flags bitmask for this instruction (see :mod:`~dexrs._internal.code.verify_flags`).""" + ... + + def size_in_code_units(self) -> int: + """Return the size of this instruction in 16-bit code units.""" + ... + + def next(self) -> Optional["Instruction"]: + """Return the next instruction in the stream, or ``None`` at end-of-method.""" + ... + @staticmethod - def get_opcode_of(inst_data: int) -> Code: ... + def get_opcode_of(inst_data: int) -> "Code": + """Decode the :class:`Code` opcode from the raw first code unit *inst_data*.""" + ... + @staticmethod - def get_name_of(opcode: Code) -> str: ... + def get_name_of(opcode: "Code") -> str: + """Return the mnemonic string for *opcode*.""" + ... + @staticmethod - def get_format_of(opcode: Code) -> Format: ... + def get_format_of(opcode: "Code") -> "Format": + """Return the :class:`Format` for *opcode*.""" + ... + @staticmethod - def get_verify_flags_of(opcode: Code) -> int: ... + def get_verify_flags_of(opcode: "Code") -> int: + """Return the verify-flags bitmask for *opcode*.""" + ... + @staticmethod - def get_flags_of(inst_data: int) -> int: ... + def get_flags_of(inst_data: int) -> int: + """Return the control-flow flags bitmask for the raw first code unit *inst_data*.""" + ... + @staticmethod - def get_index_type_of(inst_data: int) -> IndexType: ... - def to_string(self, dex_file: Optional[DexFile] = ...) -> str: ... + def get_index_type_of(inst_data: int) -> "IndexType": + """Return the :class:`IndexType` for the raw first code unit *inst_data*.""" + ... + + def to_string(self, dex_file: Optional[DexFile] = ...) -> str: + """Return a human-readable disassembly string for this instruction. + + When *dex_file* is provided, index operands are resolved to names. + """ + ... class FillArrayDataPayload: + """Payload for a ``fill-array-data`` instruction.""" + data: bytes + """The raw element bytes of the array initialiser.""" element_count: int + """Number of elements in the array.""" element_size: int + """Size of each element in bytes (1, 2, 4, or 8).""" class SparseSwitchPayload: + """Payload for a ``sparse-switch`` instruction.""" + keys: List[int] + """Sorted list of case keys.""" targets: List[int] + """Branch targets (relative offsets) corresponding to each key.""" case_count: int + """Number of cases in the switch.""" class PackedSwitchPayload: + """Payload for a ``packed-switch`` instruction.""" + first_key: List[int] + """The first (lowest) key value of the packed range.""" targets: List[int] + """Branch targets (relative offsets) for each key starting at *first_key*.""" case_count: int + """Number of cases in the switch.""" class Format: - k10x: Format - k12x: Format - k11n: Format - k11x: Format - k10t: Format - k20t: Format - k22x: Format - k21t: Format - k21s: Format - k21h: Format - k21c: Format - k23x: Format - k22b: Format - k22t: Format - k22s: Format - k22c: Format - k32x: Format - k30t: Format - k31t: Format - k31i: Format - k31c: Format - k35c: Format - k3rc: Format - k45cc: Format - k4rcc: Format - k51l: Format - kInvalidFormat: Format - - def __int__(self) -> int: ... + """Dalvik instruction format enum. + + The format determines the number and layout of operands in an instruction. + Use :attr:`Instruction.format` to obtain the format of a decoded instruction. + """ + + k10x: "Format" + """No operands (e.g. ``nop``, ``return-void``).""" + k12x: "Format" + """Two 4-bit registers: ``vA``, ``vB``.""" + k11n: "Format" + """One 4-bit register and a 4-bit literal: ``vA``, ``#+B``.""" + k11x: "Format" + """One 8-bit register: ``vAA``.""" + k10t: "Format" + """8-bit branch offset: ``+AA``.""" + k20t: "Format" + """16-bit branch offset: ``+AAAA``.""" + k22x: "Format" + """One 8-bit and one 16-bit register: ``vAA``, ``vBBBB``.""" + k21t: "Format" + """One 8-bit register + 16-bit branch offset: ``vAA``, ``+BBBB``.""" + k21s: "Format" + """One 8-bit register + 16-bit signed literal: ``vAA``, ``#+BBBB``.""" + k21h: "Format" + """One 8-bit register + 16-bit high-order literal: ``vAA``, ``#+BBBB0000``.""" + k21c: "Format" + """One 8-bit register + 16-bit index: ``vAA``, ``kind@BBBB``.""" + k23x: "Format" + """Three 8-bit registers: ``vAA``, ``vBB``, ``vCC``.""" + k22b: "Format" + """Two 8-bit registers + 8-bit signed literal: ``vAA``, ``vBB``, ``#+CC``.""" + k22t: "Format" + """Two 4-bit registers + 16-bit branch offset: ``vA``, ``vB``, ``+CCCC``.""" + k22s: "Format" + """Two 4-bit registers + 16-bit signed literal: ``vA``, ``vB``, ``#+CCCC``.""" + k22c: "Format" + """Two 4-bit registers + 16-bit index: ``vA``, ``vB``, ``kind@CCCC``.""" + k32x: "Format" + """Two 16-bit registers: ``vAAAA``, ``vBBBB``.""" + k30t: "Format" + """32-bit branch offset: ``+AAAAAAAA``.""" + k31t: "Format" + """One 8-bit register + 32-bit branch offset: ``vAA``, ``+BBBBBBBB``.""" + k31i: "Format" + """One 8-bit register + 32-bit signed literal: ``vAA``, ``#+BBBBBBBB``.""" + k31c: "Format" + """One 8-bit register + 32-bit index: ``vAA``, ``string@BBBBBBBB``.""" + k35c: "Format" + """Up to 5 registers + 16-bit index (used for invoke): ``{vC,vD,vE,vF,vG}``, ``kind@BBBB``.""" + k3rc: "Format" + """Register range + 16-bit index: ``{vCCCC .. vNNNN}``, ``kind@BBBB``.""" + k45cc: "Format" + """5 registers + two 16-bit indices (invoke-polymorphic): ``{vC,..}``, ``meth@BBBB``, ``proto@HHHH``.""" + k4rcc: "Format" + """Register range + two 16-bit indices (invoke-polymorphic/range).""" + k51l: "Format" + """One 8-bit register + 64-bit literal: ``vAA``, ``#+BBBBBBBBBBBBBBBB``.""" + kInvalidFormat: "Format" + """Sentinel value for an unrecognised or invalid instruction format.""" + + def __int__(self) -> int: + """Return the integer discriminant of this format variant.""" + ... class IndexType: - Unknown: IndexType - NoIndex: IndexType - TypeRef: IndexType - StringRef: IndexType - MethodRef: IndexType - FieldRef: IndexType - MethodAndProtoRef: IndexType - CallSiteRef: IndexType - MethodHandleRef: IndexType - ProtoRef: IndexType - - def __int__(self) -> int: ... + """Describes the kind of pool index carried by an instruction operand.""" + + Unknown: "IndexType" + """Index type is not known.""" + NoIndex: "IndexType" + """This instruction carries no index operand.""" + TypeRef: "IndexType" + """Index into the type identifiers list.""" + StringRef: "IndexType" + """Index into the string identifiers list.""" + MethodRef: "IndexType" + """Index into the method identifiers list.""" + FieldRef: "IndexType" + """Index into the field identifiers list.""" + MethodAndProtoRef: "IndexType" + """Dual index: method and prototype (used by ``invoke-polymorphic``).""" + CallSiteRef: "IndexType" + """Index into the call-site items.""" + MethodHandleRef: "IndexType" + """Index into the method handles list.""" + ProtoRef: "IndexType" + """Index into the prototype identifiers list.""" + + def __int__(self) -> int: + """Return the integer discriminant of this index-type variant.""" + ... class Code: - NOP: Code - MOVE: Code - MOVE_FROM16: Code - MOVE_16: Code - MOVE_WIDE: Code - MOVE_WIDE_FROM16: Code - MOVE_WIDE_16: Code - MOVE_OBJECT: Code - MOVE_OBJECT_FROM16: Code - MOVE_OBJECT_16: Code - MOVE_RESULT: Code - MOVE_RESULT_WIDE: Code - MOVE_RESULT_OBJECT: Code - MOVE_EXCEPTION: Code - RETURN_VOID: Code - RETURN: Code - RETURN_WIDE: Code - RETURN_OBJECT: Code - CONST_4: Code - CONST_16: Code - CONST: Code - CONST_HIGH16: Code - CONST_WIDE_16: Code - CONST_WIDE_32: Code - CONST_WIDE: Code - CONST_WIDE_HIGH16: Code - CONST_STRING: Code - CONST_STRING_JUMBO: Code - CONST_CLASS: Code - MONITOR_ENTER: Code - MONITOR_EXIT: Code - CHECK_CAST: Code - INSTANCE_OF: Code - ARRAY_LENGTH: Code - NEW_INSTANCE: Code - NEW_ARRAY: Code - FILLED_NEW_ARRAY: Code - FILLED_NEW_ARRAY_RANGE: Code - FILL_ARRAY_DATA: Code - THROW: Code - GOTO: Code - GOTO_16: Code - GOTO_32: Code - PACKED_SWITCH: Code - SPARSE_SWITCH: Code - CMPL_FLOAT: Code - CMPG_FLOAT: Code - CMPL_DOUBLE: Code - CMPG_DOUBLE: Code - CMP_LONG: Code - IF_EQ: Code - IF_NE: Code - IF_LT: Code - IF_GE: Code - IF_GT: Code - IF_LE: Code - IF_EQZ: Code - IF_NEZ: Code - IF_LTZ: Code - IF_GEZ: Code - IF_GTZ: Code - IF_LEZ: Code - UNUSED_3E: Code - UNUSED_3F: Code - UNUSED_40: Code - UNUSED_41: Code - UNUSED_42: Code - UNUSED_43: Code - AGET: Code - AGET_WIDE: Code - AGET_OBJECT: Code - AGET_BOOLEAN: Code - AGET_BYTE: Code - AGET_CHAR: Code - AGET_SHORT: Code - APUT: Code - APUT_WIDE: Code - APUT_OBJECT: Code - APUT_BOOLEAN: Code - APUT_BYTE: Code - APUT_CHAR: Code - APUT_SHORT: Code - IGET: Code - IGET_WIDE: Code - IGET_OBJECT: Code - IGET_BOOLEAN: Code - IGET_BYTE: Code - IGET_CHAR: Code - IGET_SHORT: Code - IPUT: Code - IPUT_WIDE: Code - IPUT_OBJECT: Code - IPUT_BOOLEAN: Code - IPUT_BYTE: Code - IPUT_CHAR: Code - IPUT_SHORT: Code - SGET: Code - SGET_WIDE: Code - SGET_OBJECT: Code - SGET_BOOLEAN: Code - SGET_BYTE: Code - SGET_CHAR: Code - SGET_SHORT: Code - SPUT: Code - SPUT_WIDE: Code - SPUT_OBJECT: Code - SPUT_BOOLEAN: Code - SPUT_BYTE: Code - SPUT_CHAR: Code - SPUT_SHORT: Code - INVOKE_VIRTUAL: Code - INVOKE_SUPER: Code - INVOKE_DIRECT: Code - INVOKE_STATIC: Code - INVOKE_INTERFACE: Code - UNUSED_73: Code - INVOKE_VIRTUAL_RANGE: Code - INVOKE_SUPER_RANGE: Code - INVOKE_DIRECT_RANGE: Code - INVOKE_STATIC_RANGE: Code - INVOKE_INTERFACE_RANGE: Code - UNUSED_79: Code - UNUSED_7A: Code - NEG_INT: Code - NOT_INT: Code - NEG_LONG: Code - NOT_LONG: Code - NEG_FLOAT: Code - NEG_DOUBLE: Code - INT_TO_LONG: Code - INT_TO_FLOAT: Code - INT_TO_DOUBLE: Code - LONG_TO_INT: Code - LONG_TO_FLOAT: Code - LONG_TO_DOUBLE: Code - FLOAT_TO_INT: Code - FLOAT_TO_LONG: Code - FLOAT_TO_DOUBLE: Code - DOUBLE_TO_INT: Code - DOUBLE_TO_LONG: Code - DOUBLE_TO_FLOAT: Code - INT_TO_BYTE: Code - INT_TO_CHAR: Code - INT_TO_SHORT: Code - ADD_INT: Code - SUB_INT: Code - MUL_INT: Code - DIV_INT: Code - REM_INT: Code - AND_INT: Code - OR_INT: Code - XOR_INT: Code - SHL_INT: Code - SHR_INT: Code - USHR_INT: Code - ADD_LONG: Code - SUB_LONG: Code - MUL_LONG: Code - DIV_LONG: Code - REM_LONG: Code - AND_LONG: Code - OR_LONG: Code - XOR_LONG: Code - SHL_LONG: Code - SHR_LONG: Code - USHR_LONG: Code - ADD_FLOAT: Code - SUB_FLOAT: Code - MUL_FLOAT: Code - DIV_FLOAT: Code - REM_FLOAT: Code - ADD_DOUBLE: Code - SUB_DOUBLE: Code - MUL_DOUBLE: Code - DIV_DOUBLE: Code - REM_DOUBLE: Code - ADD_INT_2ADDR: Code - SUB_INT_2ADDR: Code - MUL_INT_2ADDR: Code - DIV_INT_2ADDR: Code - REM_INT_2ADDR: Code - AND_INT_2ADDR: Code - OR_INT_2ADDR: Code - XOR_INT_2ADDR: Code - SHL_INT_2ADDR: Code - SHR_INT_2ADDR: Code - USHR_INT_2ADDR: Code - ADD_LONG_2ADDR: Code - SUB_LONG_2ADDR: Code - MUL_LONG_2ADDR: Code - DIV_LONG_2ADDR: Code - REM_LONG_2ADDR: Code - AND_LONG_2ADDR: Code - OR_LONG_2ADDR: Code - XOR_LONG_2ADDR: Code - SHL_LONG_2ADDR: Code - SHR_LONG_2ADDR: Code - USHR_LONG_2ADDR: Code - ADD_FLOAT_2ADDR: Code - SUB_FLOAT_2ADDR: Code - MUL_FLOAT_2ADDR: Code - DIV_FLOAT_2ADDR: Code - REM_FLOAT_2ADDR: Code - ADD_DOUBLE_2ADDR: Code - SUB_DOUBLE_2ADDR: Code - MUL_DOUBLE_2ADDR: Code - DIV_DOUBLE_2ADDR: Code - REM_DOUBLE_2ADDR: Code - ADD_INT_LIT16: Code - RSUB_INT: Code - MUL_INT_LIT16: Code - DIV_INT_LIT16: Code - REM_INT_LIT16: Code - AND_INT_LIT16: Code - OR_INT_LIT16: Code - XOR_INT_LIT16: Code - ADD_INT_LIT8: Code - RSUB_INT_LIT8: Code - MUL_INT_LIT8: Code - DIV_INT_LIT8: Code - REM_INT_LIT8: Code - AND_INT_LIT8: Code - OR_INT_LIT8: Code - XOR_INT_LIT8: Code - SHL_INT_LIT8: Code - SHR_INT_LIT8: Code - USHR_INT_LIT8: Code - UNUSED_E3: Code - UNUSED_E4: Code - UNUSED_E5: Code - UNUSED_E6: Code - UNUSED_E7: Code - UNUSED_E8: Code - UNUSED_E9: Code - UNUSED_EA: Code - UNUSED_EB: Code - UNUSED_EC: Code - UNUSED_ED: Code - UNUSED_EE: Code - UNUSED_EF: Code - UNUSED_F0: Code - UNUSED_F1: Code - UNUSED_F2: Code - UNUSED_F3: Code - UNUSED_F4: Code - UNUSED_F5: Code - UNUSED_F6: Code - UNUSED_F7: Code - UNUSED_F8: Code - UNUSED_F9: Code - INVOKE_POLYMORPHIC: Code - INVOKE_POLYMORPHIC_RANGE: Code - INVOKE_CUSTOM: Code - INVOKE_CUSTOM_RANGE: Code - CONST_METHOD_HANDLE: Code - CONST_METHOD_TYPE: Code - - def __int__(self) -> int: ... + """Dalvik opcode enum. + + Each class attribute is an instance of :class:`Code` representing one + Dalvik opcode. Use :attr:`Instruction.opcode` to obtain the opcode of a + decoded instruction, or compare directly:: + + if inst.opcode == Code.RETURN_VOID: + ... + """ + + NOP: "Code" + MOVE: "Code" + MOVE_FROM16: "Code" + MOVE_16: "Code" + MOVE_WIDE: "Code" + MOVE_WIDE_FROM16: "Code" + MOVE_WIDE_16: "Code" + MOVE_OBJECT: "Code" + MOVE_OBJECT_FROM16: "Code" + MOVE_OBJECT_16: "Code" + MOVE_RESULT: "Code" + MOVE_RESULT_WIDE: "Code" + MOVE_RESULT_OBJECT: "Code" + MOVE_EXCEPTION: "Code" + RETURN_VOID: "Code" + RETURN: "Code" + RETURN_WIDE: "Code" + RETURN_OBJECT: "Code" + CONST_4: "Code" + CONST_16: "Code" + CONST: "Code" + CONST_HIGH16: "Code" + CONST_WIDE_16: "Code" + CONST_WIDE_32: "Code" + CONST_WIDE: "Code" + CONST_WIDE_HIGH16: "Code" + CONST_STRING: "Code" + CONST_STRING_JUMBO: "Code" + CONST_CLASS: "Code" + MONITOR_ENTER: "Code" + MONITOR_EXIT: "Code" + CHECK_CAST: "Code" + INSTANCE_OF: "Code" + ARRAY_LENGTH: "Code" + NEW_INSTANCE: "Code" + NEW_ARRAY: "Code" + FILLED_NEW_ARRAY: "Code" + FILLED_NEW_ARRAY_RANGE: "Code" + FILL_ARRAY_DATA: "Code" + THROW: "Code" + GOTO: "Code" + GOTO_16: "Code" + GOTO_32: "Code" + PACKED_SWITCH: "Code" + SPARSE_SWITCH: "Code" + CMPL_FLOAT: "Code" + CMPG_FLOAT: "Code" + CMPL_DOUBLE: "Code" + CMPG_DOUBLE: "Code" + CMP_LONG: "Code" + IF_EQ: "Code" + IF_NE: "Code" + IF_LT: "Code" + IF_GE: "Code" + IF_GT: "Code" + IF_LE: "Code" + IF_EQZ: "Code" + IF_NEZ: "Code" + IF_LTZ: "Code" + IF_GEZ: "Code" + IF_GTZ: "Code" + IF_LEZ: "Code" + UNUSED_3E: "Code" + UNUSED_3F: "Code" + UNUSED_40: "Code" + UNUSED_41: "Code" + UNUSED_42: "Code" + UNUSED_43: "Code" + AGET: "Code" + AGET_WIDE: "Code" + AGET_OBJECT: "Code" + AGET_BOOLEAN: "Code" + AGET_BYTE: "Code" + AGET_CHAR: "Code" + AGET_SHORT: "Code" + APUT: "Code" + APUT_WIDE: "Code" + APUT_OBJECT: "Code" + APUT_BOOLEAN: "Code" + APUT_BYTE: "Code" + APUT_CHAR: "Code" + APUT_SHORT: "Code" + IGET: "Code" + IGET_WIDE: "Code" + IGET_OBJECT: "Code" + IGET_BOOLEAN: "Code" + IGET_BYTE: "Code" + IGET_CHAR: "Code" + IGET_SHORT: "Code" + IPUT: "Code" + IPUT_WIDE: "Code" + IPUT_OBJECT: "Code" + IPUT_BOOLEAN: "Code" + IPUT_BYTE: "Code" + IPUT_CHAR: "Code" + IPUT_SHORT: "Code" + SGET: "Code" + SGET_WIDE: "Code" + SGET_OBJECT: "Code" + SGET_BOOLEAN: "Code" + SGET_BYTE: "Code" + SGET_CHAR: "Code" + SGET_SHORT: "Code" + SPUT: "Code" + SPUT_WIDE: "Code" + SPUT_OBJECT: "Code" + SPUT_BOOLEAN: "Code" + SPUT_BYTE: "Code" + SPUT_CHAR: "Code" + SPUT_SHORT: "Code" + INVOKE_VIRTUAL: "Code" + INVOKE_SUPER: "Code" + INVOKE_DIRECT: "Code" + INVOKE_STATIC: "Code" + INVOKE_INTERFACE: "Code" + UNUSED_73: "Code" + INVOKE_VIRTUAL_RANGE: "Code" + INVOKE_SUPER_RANGE: "Code" + INVOKE_DIRECT_RANGE: "Code" + INVOKE_STATIC_RANGE: "Code" + INVOKE_INTERFACE_RANGE: "Code" + UNUSED_79: "Code" + UNUSED_7A: "Code" + NEG_INT: "Code" + NOT_INT: "Code" + NEG_LONG: "Code" + NOT_LONG: "Code" + NEG_FLOAT: "Code" + NEG_DOUBLE: "Code" + INT_TO_LONG: "Code" + INT_TO_FLOAT: "Code" + INT_TO_DOUBLE: "Code" + LONG_TO_INT: "Code" + LONG_TO_FLOAT: "Code" + LONG_TO_DOUBLE: "Code" + FLOAT_TO_INT: "Code" + FLOAT_TO_LONG: "Code" + FLOAT_TO_DOUBLE: "Code" + DOUBLE_TO_INT: "Code" + DOUBLE_TO_LONG: "Code" + DOUBLE_TO_FLOAT: "Code" + INT_TO_BYTE: "Code" + INT_TO_CHAR: "Code" + INT_TO_SHORT: "Code" + ADD_INT: "Code" + SUB_INT: "Code" + MUL_INT: "Code" + DIV_INT: "Code" + REM_INT: "Code" + AND_INT: "Code" + OR_INT: "Code" + XOR_INT: "Code" + SHL_INT: "Code" + SHR_INT: "Code" + USHR_INT: "Code" + ADD_LONG: "Code" + SUB_LONG: "Code" + MUL_LONG: "Code" + DIV_LONG: "Code" + REM_LONG: "Code" + AND_LONG: "Code" + OR_LONG: "Code" + XOR_LONG: "Code" + SHL_LONG: "Code" + SHR_LONG: "Code" + USHR_LONG: "Code" + ADD_FLOAT: "Code" + SUB_FLOAT: "Code" + MUL_FLOAT: "Code" + DIV_FLOAT: "Code" + REM_FLOAT: "Code" + ADD_DOUBLE: "Code" + SUB_DOUBLE: "Code" + MUL_DOUBLE: "Code" + DIV_DOUBLE: "Code" + REM_DOUBLE: "Code" + ADD_INT_2ADDR: "Code" + SUB_INT_2ADDR: "Code" + MUL_INT_2ADDR: "Code" + DIV_INT_2ADDR: "Code" + REM_INT_2ADDR: "Code" + AND_INT_2ADDR: "Code" + OR_INT_2ADDR: "Code" + XOR_INT_2ADDR: "Code" + SHL_INT_2ADDR: "Code" + SHR_INT_2ADDR: "Code" + USHR_INT_2ADDR: "Code" + ADD_LONG_2ADDR: "Code" + SUB_LONG_2ADDR: "Code" + MUL_LONG_2ADDR: "Code" + DIV_LONG_2ADDR: "Code" + REM_LONG_2ADDR: "Code" + AND_LONG_2ADDR: "Code" + OR_LONG_2ADDR: "Code" + XOR_LONG_2ADDR: "Code" + SHL_LONG_2ADDR: "Code" + SHR_LONG_2ADDR: "Code" + USHR_LONG_2ADDR: "Code" + ADD_FLOAT_2ADDR: "Code" + SUB_FLOAT_2ADDR: "Code" + MUL_FLOAT_2ADDR: "Code" + DIV_FLOAT_2ADDR: "Code" + REM_FLOAT_2ADDR: "Code" + ADD_DOUBLE_2ADDR: "Code" + SUB_DOUBLE_2ADDR: "Code" + MUL_DOUBLE_2ADDR: "Code" + DIV_DOUBLE_2ADDR: "Code" + REM_DOUBLE_2ADDR: "Code" + ADD_INT_LIT16: "Code" + RSUB_INT: "Code" + MUL_INT_LIT16: "Code" + DIV_INT_LIT16: "Code" + REM_INT_LIT16: "Code" + AND_INT_LIT16: "Code" + OR_INT_LIT16: "Code" + XOR_INT_LIT16: "Code" + ADD_INT_LIT8: "Code" + RSUB_INT_LIT8: "Code" + MUL_INT_LIT8: "Code" + DIV_INT_LIT8: "Code" + REM_INT_LIT8: "Code" + AND_INT_LIT8: "Code" + OR_INT_LIT8: "Code" + XOR_INT_LIT8: "Code" + SHL_INT_LIT8: "Code" + SHR_INT_LIT8: "Code" + USHR_INT_LIT8: "Code" + UNUSED_E3: "Code" + UNUSED_E4: "Code" + UNUSED_E5: "Code" + UNUSED_E6: "Code" + UNUSED_E7: "Code" + UNUSED_E8: "Code" + UNUSED_E9: "Code" + UNUSED_EA: "Code" + UNUSED_EB: "Code" + UNUSED_EC: "Code" + UNUSED_ED: "Code" + UNUSED_EE: "Code" + UNUSED_EF: "Code" + UNUSED_F0: "Code" + UNUSED_F1: "Code" + UNUSED_F2: "Code" + UNUSED_F3: "Code" + UNUSED_F4: "Code" + UNUSED_F5: "Code" + UNUSED_F6: "Code" + UNUSED_F7: "Code" + UNUSED_F8: "Code" + UNUSED_F9: "Code" + INVOKE_POLYMORPHIC: "Code" + INVOKE_POLYMORPHIC_RANGE: "Code" + INVOKE_CUSTOM: "Code" + INVOKE_CUSTOM_RANGE: "Code" + CONST_METHOD_HANDLE: "Code" + CONST_METHOD_TYPE: "Code" + + def __int__(self) -> int: + """Return the integer opcode value.""" + ... diff --git a/python/dexrs/_internal/code/code_flags.pyi b/python/dexrs/_internal/code/code_flags.pyi old mode 100644 new mode 100755 index 3d293a3..b3843bc --- a/python/dexrs/_internal/code/code_flags.pyi +++ b/python/dexrs/_internal/code/code_flags.pyi @@ -1,2 +1,11 @@ -Complex: int = ... -Custom: int = ... +"""Type stubs for the ``dexrs._internal.code.code_flags`` sub-module. + +These flags describe additional per-instruction properties used by the +ART verifier and optimiser that are not captured by the basic control-flow +flags in :mod:`~dexrs._internal.code.flags`. +""" + +Complex: int +"""Instruction has complex behaviour that requires special verifier treatment.""" +Custom: int +"""Instruction is custom (``invoke-custom`` / ``invoke-custom/range``).""" diff --git a/python/dexrs/_internal/code/flags.pyi b/python/dexrs/_internal/code/flags.pyi old mode 100644 new mode 100755 index c86b85a..be9498e --- a/python/dexrs/_internal/code/flags.pyi +++ b/python/dexrs/_internal/code/flags.pyi @@ -1,8 +1,23 @@ -Branch: int = ... -Continue: int = ... -Switch: int = ... -Throw: int = ... -Return: int = ... -Invoke: int = ... -Unconditional: int = ... -Experimental: int = ... +"""Type stubs for the ``dexrs._internal.code.flags`` sub-module. + +Control-flow flags for Dalvik instructions. These bitmask constants are +returned by :meth:`~dexrs._internal.code.Instruction.get_flags_of` and can be +combined with bitwise OR. +""" + +Branch: int +"""Instruction is a conditional or unconditional branch.""" +Continue: int +"""Execution may fall through to the next instruction.""" +Switch: int +"""Instruction is a switch (``packed-switch`` or ``sparse-switch``).""" +Throw: int +"""Instruction may throw an exception.""" +Return: int +"""Instruction is a return (``return``, ``return-void``, etc.).""" +Invoke: int +"""Instruction is a method invocation.""" +Unconditional: int +"""Branch is unconditional (no fall-through path).""" +Experimental: int +"""Instruction is experimental / not part of the stable ART bytecode set.""" diff --git a/python/dexrs/_internal/code/signatures.pyi b/python/dexrs/_internal/code/signatures.pyi old mode 100644 new mode 100755 index 3b2a31b..60a99e8 --- a/python/dexrs/_internal/code/signatures.pyi +++ b/python/dexrs/_internal/code/signatures.pyi @@ -1,3 +1,13 @@ -ArrayDataSignature: int = ... -SparseSwitchSignature: int = ... -PackedSwitchSignature: int = ... +"""Type stubs for the ``dexrs._internal.code.signatures`` sub-module. + +Magic 16-bit values that identify pseudo-instruction payloads embedded in a +Dalvik method body. These appear as the first code unit of a payload block +and are used to distinguish payload types during decoding. +""" + +ArrayDataSignature: int +"""First code unit of a ``fill-array-data`` payload block (``0x0300``).""" +SparseSwitchSignature: int +"""First code unit of a ``sparse-switch`` payload block (``0x0200``).""" +PackedSwitchSignature: int +"""First code unit of a ``packed-switch`` payload block (``0x0100``).""" diff --git a/python/dexrs/_internal/code/verify_flags.pyi b/python/dexrs/_internal/code/verify_flags.pyi old mode 100644 new mode 100755 index c8b4c8d..00c091f --- a/python/dexrs/_internal/code/verify_flags.pyi +++ b/python/dexrs/_internal/code/verify_flags.pyi @@ -1,27 +1,61 @@ -VerifyNothing: int = ... -VerifyRegA: int = ... -VerifyRegAWide: int = ... -VerifyRegB: int = ... -VerifyRegBField: int = ... -VerifyRegBMethod: int = ... -VerifyRegBNewInstance: int = ... -VerifyRegBString: int = ... -VerifyRegBType: int = ... -VerifyRegBWide: int = ... -VerifyRegC: int = ... -VerifyRegCField: int = ... -VerifyRegCNewArray: int = ... -VerifyRegCType: int = ... -VerifyRegCWide: int = ... -VerifyArrayData: int = ... -VerifyBranchTarget: int = ... -VerifySwitchTargets: int = ... -VerifyVarArg: int = ... -VerifyVarArgNonZero: int = ... -VerifyVarArgRange: int = ... -VerifyVarArgRangeNonZero: int = ... -VerifyError: int = ... -VerifyRegHPrototype: int = ... -VerifyRegBCallSite: int = ... -VerifyRegBMethodHandle: int = ... -VerifyRegBPrototype: int = ... +"""Type stubs for the ``dexrs._internal.code.verify_flags`` sub-module. + +Per-instruction verification flags used by the ART bytecode verifier. +Each constant is a bit in the bitmask returned by +:meth:`~dexrs._internal.code.Instruction.get_verify_flags_of`. +""" + +VerifyNothing: int +"""No verification required for any operand.""" +VerifyRegA: int +"""Verify that register ``vA`` is valid.""" +VerifyRegAWide: int +"""Verify that register ``vA`` is a valid wide (64-bit) register pair.""" +VerifyRegB: int +"""Verify that register ``vB`` is valid.""" +VerifyRegBField: int +"""Verify that ``vB`` is a valid field reference.""" +VerifyRegBMethod: int +"""Verify that ``vB`` is a valid method reference.""" +VerifyRegBNewInstance: int +"""Verify that ``vB`` is a valid new-instance type reference.""" +VerifyRegBString: int +"""Verify that ``vB`` is a valid string reference.""" +VerifyRegBType: int +"""Verify that ``vB`` is a valid type reference.""" +VerifyRegBWide: int +"""Verify that ``vB`` is a valid wide register pair.""" +VerifyRegC: int +"""Verify that register ``vC`` is valid.""" +VerifyRegCField: int +"""Verify that ``vC`` is a valid field reference.""" +VerifyRegCNewArray: int +"""Verify that ``vC`` is a valid new-array type reference.""" +VerifyRegCType: int +"""Verify that ``vC`` is a valid type reference.""" +VerifyRegCWide: int +"""Verify that ``vC`` is a valid wide register pair.""" +VerifyArrayData: int +"""Verify the ``fill-array-data`` payload referenced by this instruction.""" +VerifyBranchTarget: int +"""Verify that the branch target is within the method body.""" +VerifySwitchTargets: int +"""Verify all targets of a ``packed-switch`` or ``sparse-switch`` instruction.""" +VerifyVarArg: int +"""Verify the variable-length argument list (up to 5 registers).""" +VerifyVarArgNonZero: int +"""Verify the variable-length argument list is non-empty.""" +VerifyVarArgRange: int +"""Verify the register-range argument list.""" +VerifyVarArgRangeNonZero: int +"""Verify the register-range argument list is non-empty.""" +VerifyError: int +"""Instruction always produces a verification error.""" +VerifyRegHPrototype: int +"""Verify that ``vH`` is a valid prototype reference.""" +VerifyRegBCallSite: int +"""Verify that ``vB`` is a valid call-site reference.""" +VerifyRegBMethodHandle: int +"""Verify that ``vB`` is a valid method-handle reference.""" +VerifyRegBPrototype: int +"""Verify that ``vB`` is a valid prototype (proto-ID) reference.""" diff --git a/python/dexrs/_internal/code/vreg.pyi b/python/dexrs/_internal/code/vreg.pyi old mode 100644 new mode 100755 index 95daf9d..0171236 --- a/python/dexrs/_internal/code/vreg.pyi +++ b/python/dexrs/_internal/code/vreg.pyi @@ -1,3 +1,22 @@ +"""Type stubs for the ``dexrs._internal.code.vreg`` sub-module. + +Virtual-register operand accessor functions for decoded Dalvik instructions. + +The Dalvik instruction formats use lettered operand slots (``A``, ``B``, ``C``, +``H``). These helpers extract the value of each slot from an +:class:`~dexrs._internal.code.Instruction` according to its format. + +Example:: + + from dexrs._internal.code import vreg + + # For an "iput v1, v2, field@0003" instruction (format k22c): + if vreg.has_a(inst): + dst = vreg.A(inst) # destination register index + if vreg.has_b(inst): + src = vreg.B(inst) # source register index +""" + from . import ( Instruction, PackedSwitchPayload, @@ -5,16 +24,78 @@ from . import ( SparseSwitchPayload, ) -def has_a(inst: Instruction) -> bool: ... -def has_b(inst: Instruction) -> bool: ... -def has_c(inst: Instruction) -> bool: ... -def has_h(inst: Instruction) -> bool: ... -def A(inst: Instruction) -> int: ... -def B(inst: Instruction) -> int: ... -def C(inst: Instruction) -> int: ... -def H(inst: Instruction) -> int: ... -def has_wide_b(inst: Instruction) -> bool: ... -def wide_b(inst: Instruction) -> int: ... -def array_data(inst: Instruction) -> FillArrayDataPayload: ... -def packed_switch(inst: Instruction) -> PackedSwitchPayload: ... -def sparse_switch(inst: Instruction) -> SparseSwitchPayload: ... +def has_a(inst: Instruction) -> bool: + """Return ``True`` if *inst* has an ``A`` operand slot.""" + ... + +def has_b(inst: Instruction) -> bool: + """Return ``True`` if *inst* has a ``B`` operand slot.""" + ... + +def has_c(inst: Instruction) -> bool: + """Return ``True`` if *inst* has a ``C`` operand slot.""" + ... + +def has_h(inst: Instruction) -> bool: + """Return ``True`` if *inst* has an ``H`` (high-order) operand slot.""" + ... + +def A(inst: Instruction) -> int: + """Return the value of the ``A`` operand (typically the destination register). + + :raises ValueError: If *inst* has no ``A`` operand. + """ + ... + +def B(inst: Instruction) -> int: + """Return the value of the ``B`` operand (typically the first source register or index). + + :raises ValueError: If *inst* has no ``B`` operand. + """ + ... + +def C(inst: Instruction) -> int: + """Return the value of the ``C`` operand (typically the second source register). + + :raises ValueError: If *inst* has no ``C`` operand. + """ + ... + +def H(inst: Instruction) -> int: + """Return the value of the ``H`` (high-order / prototype) operand. + + :raises ValueError: If *inst* has no ``H`` operand. + """ + ... + +def has_wide_b(inst: Instruction) -> bool: + """Return ``True`` if *inst* has a wide (32-bit) ``B`` operand (e.g. format ``k22x``).""" + ... + +def wide_b(inst: Instruction) -> int: + """Return the wide (32-bit) ``B`` operand value. + + :raises ValueError: If *inst* has no wide ``B`` operand. + """ + ... + +def array_data(inst: Instruction) -> FillArrayDataPayload: + """Decode and return the ``fill-array-data`` payload referenced by *inst*. + + :raises PyDexError: If *inst* is not a ``fill-array-data`` instruction. + """ + ... + +def packed_switch(inst: Instruction) -> PackedSwitchPayload: + """Decode and return the ``packed-switch`` payload referenced by *inst*. + + :raises PyDexError: If *inst* is not a ``packed-switch`` instruction. + """ + ... + +def sparse_switch(inst: Instruction) -> SparseSwitchPayload: + """Decode and return the ``sparse-switch`` payload referenced by *inst*. + + :raises PyDexError: If *inst* is not a ``sparse-switch`` instruction. + """ + ... diff --git a/python/dexrs/_internal/container.pyi b/python/dexrs/_internal/container.pyi old mode 100644 new mode 100755 index 0370c92..af0d4d6 --- a/python/dexrs/_internal/container.pyi +++ b/python/dexrs/_internal/container.pyi @@ -1,24 +1,96 @@ +"""Type stubs for the ``dexrs._internal.container`` native extension module. + +A *container* is the backing store that supplies raw DEX bytes to +:class:`~dexrs._internal.file.DexFile`. Choose the variant that best fits +where your DEX data lives. +""" + import abc -# deprecated class DexContainer(abc.ABC): - def data(self) -> bytes: ... + """Abstract base class for DEX backing stores. + + .. deprecated:: + Prefer :class:`InMemoryDexContainer` or :class:`FileDexContainer` + directly. This ABC is retained for compatibility only. + """ + + def data(self) -> bytes: + """Return the raw DEX bytes held by this container.""" + ... + @property @abc.abstractmethod - def file_size(self) -> int: ... + def file_size(self) -> int: + """Total size of the DEX image in bytes.""" + ... class InMemoryDexContainer: - def __init__(self, data: bytes) -> None: ... - def data(self) -> bytes: ... + """A container that wraps an in-memory ``bytes`` buffer. + + The bytes are copied on construction and owned by the container for the + lifetime of any :class:`~dexrs._internal.file.DexFile` that references it. + + Example:: + + with open("classes.dex", "rb") as f: + container = InMemoryDexContainer(f.read()) + """ + + def __init__(self, data: bytes) -> None: + """Construct from a raw DEX byte string. + + :param data: Raw bytes of a valid ``.dex`` file. + """ + ... + + def data(self) -> bytes: + """Return the raw DEX bytes held by this container.""" + ... + @property - def file_size(self) -> int: ... - def __len__(self) -> int: ... + def file_size(self) -> int: + """Total size of the DEX image in bytes.""" + ... + + def __len__(self) -> int: + """Return the number of bytes in the container (same as :attr:`file_size`).""" + ... class FileDexContainer: - def __init__(self, path: str) -> None: ... - def data(self) -> bytes: ... + """A container that memory-maps a DEX file on disk (zero-copy reads). + + The file is kept open and mapped for the lifetime of the container. + + Example:: + + container = FileDexContainer("classes.dex") + print(container.location) # "classes.dex" + print(container.file_size) # size in bytes + """ + + def __init__(self, path: str) -> None: + """Open and memory-map the DEX file at *path*. + + :param path: Filesystem path to a ``.dex`` file. + :raises IOError: If the file cannot be opened or mapped. + """ + ... + + def data(self) -> bytes: + """Return the memory-mapped DEX bytes as a ``bytes`` view.""" + ... + @property - def file_size(self) -> int: ... + def file_size(self) -> int: + """Total size of the mapped file in bytes.""" + ... + @property - def location(self) -> str: ... - def __len__(self) -> int: ... + def location(self) -> str: + """The filesystem path that was passed to the constructor.""" + ... + + def __len__(self) -> int: + """Return the number of bytes in the container (same as :attr:`file_size`).""" + ... diff --git a/python/dexrs/_internal/editor.pyi b/python/dexrs/_internal/editor.pyi new file mode 100755 index 0000000..3ad61f3 --- /dev/null +++ b/python/dexrs/_internal/editor.pyi @@ -0,0 +1,158 @@ +"""Type stubs for the ``dexrs._internal.editor`` native extension module.""" + + +class DexEditor: + """Mutable DEX file editor backed by the Rust ``DexEditor`` implementation. + + Construct an editor from a file path or raw bytes, apply zero or more + mutations, then call :meth:`build` or :meth:`write_to` to finalise. + The editor is **consumed** by :meth:`build` or :meth:`write_to` — any + subsequent call will raise :exc:`IOError`. + + Example:: + + editor = DexEditor.from_file("classes.dex") + editor.rename_class("LMain;", "LEntry;") + editor.set_class_access_flags("LEntry;", 0x0001) + editor.write_to("classes_patched.dex") + """ + + @staticmethod + def from_file(path: str) -> "DexEditor": + """Open a DEX file from *path* and return a new editor. + + :param path: Filesystem path to the ``.dex`` file. + :type path: str + :returns: A new :class:`DexEditor` backed by the file on disk. + :rtype: DexEditor + :raises IOError: If the file cannot be opened or parsed. + + Example:: + + editor = DexEditor.from_file("classes.dex") + """ + ... + + @staticmethod + def from_bytes(data: bytes) -> "DexEditor": + """Construct a :class:`DexEditor` from raw DEX *data*. + + :param data: Raw bytes of a valid ``.dex`` file. + :type data: bytes + :returns: A new :class:`DexEditor` backed by an in-memory copy of *data*. + :rtype: DexEditor + :raises IOError: If *data* cannot be parsed as a DEX file. + + Example:: + + with open("classes.dex", "rb") as f: + editor = DexEditor.from_bytes(f.read()) + """ + ... + + def set_class_access_flags(self, class_desc: str, flags: int) -> None: + """Replace the access flags of *class_desc* with *flags*. + + *class_desc* is accepted in dotted (``com.example.Foo``), slash + (``com/example/Foo``), or descriptor (``Lcom/example/Foo;``) form. + + :param class_desc: Class name in any supported notation. + :type class_desc: str + :param flags: New ``access_flags`` bitmask (e.g. ``0x0001`` for public). + :type flags: int + :raises IOError: If the class cannot be found or the editor is consumed. + + Example:: + + editor.set_class_access_flags("com.example.Foo", 0x0001) # public + """ + ... + + def set_method_access_flags( + self, class_desc: str, method_name: str, flags: int + ) -> None: + """Replace the access flags of *method_name* inside *class_desc*. + + LEB128 re-encoding is handled automatically when the encoded width of + *flags* differs from the original. + + :param class_desc: Owning class name in any supported notation. + :type class_desc: str + :param method_name: Simple method name (no signature). + :type method_name: str + :param flags: New ``access_flags`` bitmask. + :type flags: int + :raises IOError: If the class or method cannot be found, or the editor + is consumed. + + Example:: + + editor.set_method_access_flags("LMain;", "run", 0x0001) # public + """ + ... + + def clear_hiddenapi_flags(self) -> None: + """Zero out the ``HiddenapiClassData`` section and remove its map entry. + + Useful when the patched DEX is loaded by a runtime that rejects + hidden-API annotations. + + :raises IOError: If the editor is already consumed. + + Example:: + + editor.clear_hiddenapi_flags() + """ + ... + + def rename_class(self, old_name: str, new_name: str) -> None: + """Rename a class, updating the string pool, type references, and checksum. + + Both *old_name* and *new_name* accept dotted, slash, or descriptor form. + + :param old_name: Current class name. + :type old_name: str + :param new_name: Desired class name. + :type new_name: str + :raises IOError: If the class cannot be found or the editor is consumed. + + Example:: + + editor.rename_class("LMain;", "LRenamedMain;") + """ + ... + + def build(self) -> bytes: + """Finalise all edits, recalculate the Adler32 checksum, and return the + modified DEX as :class:`bytes`. + + The editor is **consumed** by this call; further mutations will raise + :exc:`IOError`. + + :returns: Complete, checksum-correct DEX image. + :rtype: bytes + :raises IOError: If finalisation fails or the editor is already consumed. + + Example:: + + data = editor.build() + with open("out.dex", "wb") as f: + f.write(data) + """ + ... + + def write_to(self, path: str) -> None: + """Finalise all edits and write the modified DEX directly to *path*. + + The editor is **consumed** by this call; further mutations will raise + :exc:`IOError`. + + :param path: Destination file path. + :type path: str + :raises IOError: If writing fails or the editor is already consumed. + + Example:: + + editor.write_to("out.dex") + """ + ... diff --git a/python/dexrs/_internal/error.pyi b/python/dexrs/_internal/error.pyi old mode 100644 new mode 100755 index 757ab86..2bb551b --- a/python/dexrs/_internal/error.pyi +++ b/python/dexrs/_internal/error.pyi @@ -1,2 +1,20 @@ +"""Type stubs for the ``dexrs._internal.error`` native extension module.""" + + class PyDexError(Exception): + """Exception raised by ``dexrs`` when a Rust-level operation fails. + + Covers malformed DEX images, out-of-bounds indices, checksum mismatches, + I/O errors, and other parse failures that are not plain :exc:`IOError`. + + Example:: + + from dexrs.error import PyDexError + + try: + dex = DexFile.from_bytes(InMemoryDexContainer(b"bad")) + except PyDexError as exc: + print(f"DEX error: {exc}") + """ + def __init__(self, message: str) -> None: ... diff --git a/python/dexrs/_internal/file.pyi b/python/dexrs/_internal/file.pyi old mode 100644 new mode 100755 index 86334b5..0ba1cb8 --- a/python/dexrs/_internal/file.pyi +++ b/python/dexrs/_internal/file.pyi @@ -1,3 +1,15 @@ +"""Type stubs for the ``dexrs._internal.file`` native extension module. + +The central types for parsing and querying Android DEX files. + +- :class:`VerifyPreset` — controls which header checks to run on open. +- :class:`DexFile` — the parsed DEX image; exposes read-only accessors for + every section described by the `AOSP DEX format specification`_. + +.. _AOSP DEX format specification: + https://source.android.com/docs/core/runtime/dex-format +""" + from typing import Optional, Tuple, List from .container import InMemoryDexContainer, FileDexContainer @@ -12,85 +24,291 @@ from .structs import ( TypeItem, CatchHandlerData, TryItem, - AnnotationItem + AnnotationItem, ) from .class_accessor import ClassAccessor from .code import CodeItemAccessor from .annotation import AnnotationSetItem, ClassAnnotationAccessor +from .type_lookup_table import TypeLookupTable class VerifyPreset: - ALL: VerifyPreset - NONE: VerifyPreset - CHECKSUM_ONLY: VerifyPreset + """Selects which integrity checks to perform when opening a DEX file. + + Pass one of the class-level constants to + :meth:`DexFile.from_file` / :meth:`DexFile.from_bytes`. + """ + + ALL: "VerifyPreset" + """Run all available checks (magic, checksum, and structural validation).""" + NONE: "VerifyPreset" + """Skip all verification — fastest open, but unsafe on untrusted input.""" + CHECKSUM_ONLY: "VerifyPreset" + """Verify the Adler32 checksum only, skipping deeper structural checks.""" class DexFile: + """A parsed DEX image with read-only accessors for every DEX section. + + Construct via :meth:`from_file` or :meth:`from_bytes`. All index-based + getters follow the ``_at`` naming convention for direct integer indices; + overloads that accept a typed ID struct omit the suffix. + + Example:: + + dex = DexFile.from_file(FileDexContainer("classes.dex"), VerifyPreset.ALL) + header = dex.get_header() + print(f"DEX version {header.version_int}, {dex.num_class_defs()} classes") + """ + @staticmethod - def from_file(data: FileDexContainer, preset: VerifyPreset = ...) -> DexFile: ... + def from_file(data: FileDexContainer, preset: VerifyPreset = ...) -> "DexFile": + """Open a DEX file from a :class:`~dexrs._internal.container.FileDexContainer`. + + :param data: Memory-mapped file container. + :param preset: Verification level; defaults to :attr:`VerifyPreset.ALL`. + :raises PyDexError: If the file is malformed or verification fails. + """ + ... + @staticmethod - def from_bytes( - data: InMemoryDexContainer, preset: VerifyPreset = ... - ) -> DexFile: ... - - # instance methods - def get_header(self) -> Header: ... - - # string ids - def get_string_id(self, index: int) -> StringId: ... - def get_string_id_opt(self, index: int) -> Optional[StringId]: ... - def num_string_ids(self) -> int: ... - - # string operations - def get_utf16_at(self, index: int) -> str: ... - def get_utf16(self, string_id: StringId) -> str: ... - def get_utf16_opt_at(self, string_id: StringId) -> Optional[str]: ... - def get_utf16_lossy(self, string_id: StringId) -> str: ... - def get_utf16_lossy_at(self, index: int) -> str: ... - def get_string_data(self, string_id: StringId) -> Tuple[int, bytes]: ... - def fast_get_utf8(self, string_id: StringId) -> str: ... - def fast_get_utf8_at(self, index: int) -> str: ... - - # type ids - def get_type_id(self, index: int) -> TypeId: ... - def get_type_id_opt(self, index: int) -> Optional[TypeId]: ... - def num_type_ids(self) -> int: ... - def get_type_desc(self, type_id: TypeId) -> str: ... - def get_type_desc_at(self, index: int) -> str: ... - def pretty_type_at(self, index: int) -> str: ... - def pretty_type(self, type_id: TypeId) -> str: ... - - # field ids - def get_field_id(self, index: int) -> FieldId: ... - def get_field_id_opt(self, index: int) -> Optional[FieldId]: ... - def num_field_ids(self) -> int: ... - def get_field_name(self, field_id: FieldId) -> str: ... - def get_field_name_at(self, index: int) -> str: ... - - # proto ids - def get_proto_id(self, index: int) -> ProtoId: ... - def get_proto_id_opt(self, index: int) -> Optional[ProtoId]: ... - def num_proto_ids(self) -> int: ... - def get_proto_shorty(self, proto_id: ProtoId) -> str: ... - def get_proto_shorty_at(self, index: int) -> str: ... - - # method ids - def get_method_id(self, index: int) -> MethodId: ... - def get_method_id_opt(self, index: int) -> Optional[MethodId]: ... - def num_method_ids(self) -> int: ... - - # class defs - def get_class_def(self, index: int) -> ClassDef: ... - def get_class_def_opt(self, index: int) -> Optional[ClassDef]: ... - def num_class_defs(self) -> int: ... - def get_class_desc(self, class_def: ClassDef) -> str: ... - def get_interfaces_list(self, class_def: ClassDef) -> Optional[List[TypeItem]]: ... - - # class data - def get_class_accessor(self, class_def: ClassDef) -> Optional[ClassAccessor]: ... - def get_code_item_accessor(self, code_off: int) -> CodeItemAccessor: ... - def get_try_items(self, ca: CodeItemAccessor) -> List[TryItem]: ... - def get_catch_handlers(self, ca: CodeItemAccessor, try_item: TryItem) -> List[CatchHandlerData]: ... - - # annotations - def get_annotation_set(self, offset: int) -> AnnotationSetItem: ... - def get_annotation(self, offset: int) -> AnnotationItem: ... - def get_class_annotation_accessor(self, class_def: ClassDef) -> ClassAnnotationAccessor: ... \ No newline at end of file + def from_bytes(data: InMemoryDexContainer, preset: VerifyPreset = ...) -> "DexFile": + """Parse a DEX file from an in-memory container. + + :param data: In-memory bytes container. + :param preset: Verification level; defaults to :attr:`VerifyPreset.ALL`. + :raises PyDexError: If the bytes are not a valid DEX image. + """ + ... + + def get_header(self) -> Header: + """Return the parsed DEX file header.""" + ... + + # ------------------------------------------------------------------ strings + + def get_string_id(self, index: int) -> StringId: + """Return the :class:`StringId` at *index*. + + :raises PyDexError: If *index* is out of range. + """ + ... + + def get_string_id_opt(self, index: int) -> Optional[StringId]: + """Return the :class:`StringId` at *index*, or ``None`` if *index* is the no-index sentinel.""" + ... + + def num_string_ids(self) -> int: + """Return the total number of string identifiers in the DEX file.""" + ... + + def get_utf16_at(self, index: int) -> str: + """Decode and return the string at the given string-ID *index*. + + :raises PyDexError: On MUTF-8 decode error or out-of-bounds *index*. + """ + ... + + def get_utf16(self, string_id: StringId) -> str: + """Decode and return the string referenced by *string_id*. + + :raises PyDexError: On MUTF-8 decode error. + """ + ... + + def get_utf16_opt_at(self, string_id: StringId) -> Optional[str]: + """Return the decoded string for *string_id*, or ``None`` for the no-index sentinel.""" + ... + + def get_utf16_lossy(self, string_id: StringId) -> str: + """Decode the string referenced by *string_id*, replacing invalid bytes with U+FFFD.""" + ... + + def get_utf16_lossy_at(self, index: int) -> str: + """Decode the string at *index*, replacing invalid bytes with U+FFFD.""" + ... + + def get_string_data(self, string_id: StringId) -> Tuple[int, bytes]: + """Return the raw ``(length_in_utf16_units, mutf8_bytes)`` for *string_id*.""" + ... + + def fast_get_utf8(self, string_id: StringId) -> str: + """Fast path: return the string referenced by *string_id* (assumes valid ASCII/UTF-8).""" + ... + + def fast_get_utf8_at(self, index: int) -> str: + """Fast path: return the string at *index* (assumes valid ASCII/UTF-8).""" + ... + + # ----------------------------------------------------------------- type ids + + def get_type_id(self, index: int) -> TypeId: + """Return the :class:`TypeId` at *index*. + + :raises PyDexError: If *index* is out of range. + """ + ... + + def get_type_id_opt(self, index: int) -> Optional[TypeId]: + """Return the :class:`TypeId` at *index*, or ``None`` for the no-index sentinel.""" + ... + + def num_type_ids(self) -> int: + """Return the total number of type identifiers.""" + ... + + def get_type_desc(self, type_id: TypeId) -> str: + """Return the type descriptor string (e.g. ``"Ljava/lang/String;"``) for *type_id*.""" + ... + + def get_type_desc_at(self, index: int) -> str: + """Return the type descriptor string for the type-ID at *index*.""" + ... + + def pretty_type_at(self, index: int) -> str: + """Return a human-readable type name (e.g. ``"java.lang.String"``) for the type at *index*.""" + ... + + def pretty_type(self, type_id: TypeId) -> str: + """Return a human-readable type name for *type_id*.""" + ... + + # ---------------------------------------------------------------- field ids + + def get_field_id(self, index: int) -> FieldId: + """Return the :class:`FieldId` at *index*. + + :raises PyDexError: If *index* is out of range. + """ + ... + + def get_field_id_opt(self, index: int) -> Optional[FieldId]: + """Return the :class:`FieldId` at *index*, or ``None`` for the no-index sentinel.""" + ... + + def num_field_ids(self) -> int: + """Return the total number of field identifiers.""" + ... + + def get_field_name(self, field_id: FieldId) -> str: + """Return the simple name string for *field_id*.""" + ... + + def get_field_name_at(self, index: int) -> str: + """Return the simple name string for the field-ID at *index*.""" + ... + + # ---------------------------------------------------------------- proto ids + + def get_proto_id(self, index: int) -> ProtoId: + """Return the :class:`ProtoId` at *index*. + + :raises PyDexError: If *index* is out of range. + """ + ... + + def get_proto_id_opt(self, index: int) -> Optional[ProtoId]: + """Return the :class:`ProtoId` at *index*, or ``None`` for the no-index sentinel.""" + ... + + def num_proto_ids(self) -> int: + """Return the total number of method prototype identifiers.""" + ... + + def get_proto_shorty(self, proto_id: ProtoId) -> str: + """Return the shorty descriptor string for *proto_id* (e.g. ``"VIL"``).""" + ... + + def get_proto_shorty_at(self, index: int) -> str: + """Return the shorty descriptor string for the proto-ID at *index*.""" + ... + + # --------------------------------------------------------------- method ids + + def get_method_id(self, index: int) -> MethodId: + """Return the :class:`MethodId` at *index*. + + :raises PyDexError: If *index* is out of range. + """ + ... + + def get_method_id_opt(self, index: int) -> Optional[MethodId]: + """Return the :class:`MethodId` at *index*, or ``None`` for the no-index sentinel.""" + ... + + def num_method_ids(self) -> int: + """Return the total number of method identifiers.""" + ... + + # --------------------------------------------------------------- class defs + + def get_class_def(self, index: int) -> ClassDef: + """Return the :class:`ClassDef` at *index*. + + :raises PyDexError: If *index* is out of range. + """ + ... + + def get_class_def_opt(self, index: int) -> Optional[ClassDef]: + """Return the :class:`ClassDef` at *index*, or ``None`` for the no-index sentinel.""" + ... + + def num_class_defs(self) -> int: + """Return the total number of class definitions.""" + ... + + def get_class_desc(self, class_def: ClassDef) -> str: + """Return the type descriptor for the class defined by *class_def*.""" + ... + + def get_interfaces_list(self, class_def: ClassDef) -> Optional[List[TypeItem]]: + """Return the list of interfaces implemented by *class_def*, or ``None`` if none.""" + ... + + # ------------------------------------------------------------- class data + + def get_class_accessor(self, class_def: ClassDef) -> Optional[ClassAccessor]: + """Return a :class:`~dexrs._internal.class_accessor.ClassAccessor` for *class_def*. + + Returns ``None`` when the class has no ``class_data_item`` (i.e. it is + a pure interface or has no members). + """ + ... + + def get_code_item_accessor(self, code_off: int) -> CodeItemAccessor: + """Return a :class:`~dexrs._internal.code.CodeItemAccessor` for the ``code_item`` at *code_off*. + + :param code_off: Byte offset from the start of the DEX file. + :raises PyDexError: If *code_off* is invalid or out of range. + """ + ... + + def get_try_items(self, ca: CodeItemAccessor) -> List[TryItem]: + """Return the list of ``try_item`` structures from *ca*'s code item.""" + ... + + def get_catch_handlers( + self, ca: CodeItemAccessor, try_item: TryItem + ) -> List[CatchHandlerData]: + """Return the catch-handler entries for *try_item* within *ca*.""" + ... + + # ------------------------------------------------------------ annotations + + def get_annotation_set(self, offset: int) -> AnnotationSetItem: + """Return the list of annotation offsets at *offset* in the DEX file.""" + ... + + def get_annotation(self, offset: int) -> AnnotationItem: + """Return the parsed :class:`~dexrs._internal.structs.AnnotationItem` at *offset*.""" + ... + + def get_class_annotation_accessor( + self, class_def: ClassDef + ) -> ClassAnnotationAccessor: + """Return an accessor for all annotations attached to *class_def*.""" + ... + + # ------------------------------------------------------- type lookup table + + def build_type_lookup_table(self) -> TypeLookupTable: + """Build and return an O(1) :class:`~dexrs._internal.type_lookup_table.TypeLookupTable` for this DEX file.""" + ... diff --git a/python/dexrs/_internal/leb128.pyi b/python/dexrs/_internal/leb128.pyi old mode 100644 new mode 100755 index 20a8432..b644186 --- a/python/dexrs/_internal/leb128.pyi +++ b/python/dexrs/_internal/leb128.pyi @@ -1,3 +1,51 @@ -def decode_uleb128(data: bytes) -> int: ... -def decode_sleb128(data: bytes) -> int: ... -def decode_leb128p1(data: bytes) -> int: ... +"""Type stubs for the ``dexrs._internal.leb128`` native extension module. + +LEB128 (Little Endian Base 128) is the variable-length integer encoding used +throughout the DEX format for sizes, offsets, and access flags. + +See also: https://source.android.com/docs/core/runtime/dex-format#leb128 +""" + + +def decode_uleb128(data: bytes) -> int: + """Decode an unsigned LEB128 integer from the start of *data*. + + :param data: Bytes starting with a valid ULEB128-encoded integer. + :returns: The decoded non-negative integer value. + :raises PyDexError: If *data* is empty or the encoding is malformed. + + Example:: + + decode_uleb128(bytes([0x8E, 0x02])) # 270 + """ + ... + +def decode_sleb128(data: bytes) -> int: + """Decode a signed LEB128 integer from the start of *data*. + + :param data: Bytes starting with a valid SLEB128-encoded integer. + :returns: The decoded signed integer value. + :raises PyDexError: If *data* is empty or the encoding is malformed. + + Example:: + + decode_sleb128(bytes([0x9B, 0x7F])) # -101 + """ + ... + +def decode_leb128p1(data: bytes) -> int: + """Decode a ``ULEB128p1``-encoded integer from the start of *data*. + + The value is stored as ``n + 1``, so the encoded value ``0`` represents + ``-1`` (the "no-index" sentinel used in DEX). + + :param data: Bytes starting with a valid ULEB128p1-encoded integer. + :returns: The decoded value (may be ``-1`` for the no-index sentinel). + :raises PyDexError: If *data* is empty or the encoding is malformed. + + Example:: + + decode_leb128p1(bytes([0x00])) # -1 (no-index sentinel) + decode_leb128p1(bytes([0x01])) # 0 + """ + ... diff --git a/python/dexrs/_internal/mutf8.pyi b/python/dexrs/_internal/mutf8.pyi old mode 100644 new mode 100755 index 34d3122..c437438 --- a/python/dexrs/_internal/mutf8.pyi +++ b/python/dexrs/_internal/mutf8.pyi @@ -1,4 +1,55 @@ -def mutf8_to_str(utf8_data_in: bytes) -> str: ... -def str_to_mutf8(str_data_in: str) -> bytes: ... -def mutf8_to_str_lossy(utf8_data_in: bytes) -> str: ... -def str_to_mutf8_lossy(str_data_in: str) -> bytes: ... +"""Type stubs for the ``dexrs._internal.mutf8`` native extension module. + +DEX files use Modified UTF-8 (MUTF-8), which differs from standard UTF-8 in +two ways: + +1. The null character U+0000 is encoded as the two-byte overlong sequence + ``0xC0 0x80`` rather than a single ``0x00`` byte. +2. Supplementary code points (U+10000–U+10FFFF) are encoded as a pair of + UTF-16 surrogates in CESU-8 style rather than a single 4-byte sequence. + +See: https://source.android.com/docs/core/runtime/dex-format#mutf-8 +""" + + +def mutf8_to_str(utf8_data_in: bytes) -> str: + """Decode strict MUTF-8 bytes to a Python :class:`str`. + + :param utf8_data_in: Raw MUTF-8 encoded bytes. + :returns: Decoded Python string. + :raises PyDexError: If the byte sequence is not valid MUTF-8. + + Example:: + + mutf8_to_str(bytes([0xC0, 0x80])) # "\\x00" + """ + ... + +def str_to_mutf8(str_data_in: str) -> bytes: + """Encode a Python :class:`str` to strict MUTF-8 bytes. + + :param str_data_in: Python string to encode. + :returns: MUTF-8 encoded bytes. + :raises PyDexError: If the string contains characters that cannot be encoded. + + Example:: + + str_to_mutf8("Hello") # b"Hello" + """ + ... + +def mutf8_to_str_lossy(utf8_data_in: bytes) -> str: + """Decode MUTF-8 bytes to a Python :class:`str`, replacing invalid sequences with U+FFFD. + + :param utf8_data_in: Raw bytes, possibly containing invalid MUTF-8 sequences. + :returns: Decoded Python string with replacement characters for bad bytes. + """ + ... + +def str_to_mutf8_lossy(str_data_in: str) -> bytes: + """Encode a Python :class:`str` to MUTF-8 bytes, skipping unencodable code points. + + :param str_data_in: Python string to encode. + :returns: MUTF-8 encoded bytes with unencodable characters silently dropped. + """ + ... diff --git a/python/dexrs/_internal/primitive.pyi b/python/dexrs/_internal/primitive.pyi new file mode 100755 index 0000000..a754888 --- /dev/null +++ b/python/dexrs/_internal/primitive.pyi @@ -0,0 +1,149 @@ +"""Type stubs for the ``dexrs._internal.primitive`` native extension module.""" + +from typing import Optional + + +class PrimitiveType: + """Enumeration of Java primitive types as classified by the DEX format. + + Each variant corresponds to a JVM primitive (or ``void``/``Not`` for the + non-primitive sentinel). The enum supports integer comparison via + ``__int__``, and provides helpers for descriptor characters, boxing classes, + storage sizes, and type properties. + + Example:: + + pt = PrimitiveType.Int + print(pt.descriptor()) # "I" + print(pt.boxed_descriptor()) # "Ljava/lang/Integer;" + print(pt.component_size()) # 4 + print(pt.is_numeric()) # True + print(PrimitiveType.from_char("D")) # PrimitiveType.Double + """ + + Not: "PrimitiveType" + """Sentinel value — not a primitive type.""" + + Boolean: "PrimitiveType" + """Java ``boolean`` (descriptor ``Z``).""" + + Byte: "PrimitiveType" + """Java ``byte`` (descriptor ``B``).""" + + Char: "PrimitiveType" + """Java ``char`` (descriptor ``C``).""" + + Short: "PrimitiveType" + """Java ``short`` (descriptor ``S``).""" + + Int: "PrimitiveType" + """Java ``int`` (descriptor ``I``).""" + + Long: "PrimitiveType" + """Java ``long`` (descriptor ``J``).""" + + Float: "PrimitiveType" + """Java ``float`` (descriptor ``F``).""" + + Double: "PrimitiveType" + """Java ``double`` (descriptor ``D``).""" + + Void: "PrimitiveType" + """Java ``void`` (descriptor ``V``).""" + + def descriptor(self) -> Optional[str]: + """Return the single-character JVM type descriptor, or ``None`` for ``Not``. + + :returns: Descriptor character string (e.g. ``"I"``), or ``None``. + :rtype: str or None + + Example:: + + PrimitiveType.Long.descriptor() # "J" + """ + ... + + def boxed_descriptor(self) -> Optional[str]: + """Return the descriptor of the corresponding boxed class, or ``None`` for ``Not``. + + :returns: Descriptor string (e.g. ``"Ljava/lang/Integer;"``), or ``None``. + :rtype: str or None + + Example:: + + PrimitiveType.Int.boxed_descriptor() # "Ljava/lang/Integer;" + """ + ... + + def component_size(self) -> int: + """Return the storage size of this type in bytes. + + :returns: Storage size: 1 for byte/boolean, 2 for char/short, + 4 for int/float, 8 for long/double, 0 for ``Not``/``Void``. + :rtype: int + + Example:: + + PrimitiveType.Double.component_size() # 8 + """ + ... + + def is_numeric(self) -> bool: + """Return ``True`` if this is a numeric primitive (i.e. not boolean, void, or Not). + + :returns: ``True`` for byte, char, short, int, long, float, double. + :rtype: bool + """ + ... + + def is_64bit(self) -> bool: + """Return ``True`` for 64-bit primitives: ``long`` and ``double``. + + :returns: ``True`` for ``Long`` and ``Double``, ``False`` otherwise. + :rtype: bool + """ + ... + + def pretty_name(self) -> str: + """Return the human-readable Java type name (e.g. ``"int"``, ``"double"``). + + :returns: Java keyword name of the primitive type. + :rtype: str + + Example:: + + PrimitiveType.Char.pretty_name() # "char" + """ + ... + + def __str__(self) -> str: + """Return the human-readable Java type name (same as :meth:`pretty_name`). + + :rtype: str + """ + ... + + def __int__(self) -> int: + """Return the integer discriminant of this variant. + + :rtype: int + """ + ... + + @staticmethod + def from_char(c: str) -> "PrimitiveType": + """Create a :class:`PrimitiveType` from a JVM descriptor character. + + Returns ``PrimitiveType.Not`` for unrecognised characters. + + :param c: Single JVM descriptor character (e.g. ``'I'``, ``'D'``). + :type c: str + :returns: Corresponding primitive type. + :rtype: PrimitiveType + + Example:: + + PrimitiveType.from_char("Z") # PrimitiveType.Boolean + PrimitiveType.from_char("?") # PrimitiveType.Not + """ + ... diff --git a/python/dexrs/_internal/structs.pyi b/python/dexrs/_internal/structs.pyi old mode 100644 new mode 100755 index e680312..4dcd622 --- a/python/dexrs/_internal/structs.pyi +++ b/python/dexrs/_internal/structs.pyi @@ -1,168 +1,328 @@ +"""Type stubs for the ``dexrs._internal.structs`` native extension module. + +Plain-data structs that mirror the on-disk layout of the DEX file format as +described by the `AOSP DEX format specification`_. Instances are created by +:class:`~dexrs._internal.file.DexFile` accessor methods; they are not meant to +be constructed directly. + +.. _AOSP DEX format specification: + https://source.android.com/docs/core/runtime/dex-format +""" + from typing import List class Header: + """The DEX file header (first 112 bytes of the file). + + Contains global metadata: magic number, checksum, SHA-1 signature, + file size, and offsets/sizes for every ID and data section. + """ + checksum: int + """Adler32 checksum of the file contents (excluding magic and this field).""" file_size: int + """Total size of the DEX file in bytes.""" header_size: int + """Size of this header in bytes (112 for standard DEX, 120 for DEX 041+).""" endian_tag: int + """Endianness tag — always ``0x12345678`` for standard DEX.""" link_size: int + """Size of the link section (0 for statically linked files).""" link_off: int + """Offset to the link section (0 if unused).""" string_ids_size: int + """Number of elements in the string identifiers list.""" string_ids_off: int + """Offset to the string identifiers list.""" type_ids_size: int + """Number of elements in the type identifiers list.""" type_ids_off: int + """Offset to the type identifiers list.""" proto_ids_size: int + """Number of elements in the prototype identifiers list.""" proto_ids_off: int + """Offset to the prototype identifiers list.""" field_ids_size: int + """Number of elements in the field identifiers list.""" field_ids_off: int + """Offset to the field identifiers list.""" method_ids_size: int + """Number of elements in the method identifiers list.""" method_ids_off: int + """Offset to the method identifiers list.""" class_defs_size: int + """Number of elements in the class definitions list.""" class_defs_off: int + """Offset to the class definitions list.""" data_size: int + """Size of the data section in bytes.""" data_off: int + """Offset to the data section.""" @property - def version_int(self) -> int: ... + def version_int(self) -> int: + """DEX format version as an integer (e.g. 35 for ``dex\n035\0``).""" + ... + @property - def signature(self) -> bytes: ... + def signature(self) -> bytes: + """SHA-1 hash of the file contents (20 bytes), excluding magic, checksum, and this field.""" + ... + @property - def magic(self) -> bytes: ... + def magic(self) -> bytes: + """The 8-byte magic number (e.g. ``b"dex\\n035\\0"``).""" + ... class StringId: + """Points to the raw string data for one entry in the string table.""" + string_data_off: int + """Byte offset to the ``string_data_item`` for this string.""" class TypeId: + """Associates a type with its descriptor string.""" + descriptor_idx: int + """Index into the string identifiers list for this type's descriptor.""" class FieldId: + """Identifies a field by its class, type, and name.""" + class_idx: int + """Index into the type identifiers list for the defining class.""" type_idx: int + """Index into the type identifiers list for this field's type.""" name_idx: int + """Index into the string identifiers list for this field's name.""" class ProtoId: + """Describes a method prototype (return type + parameter types).""" + shorty_idx: int + """Index into the string identifiers list for the shorty descriptor.""" return_type_idx: int + """Index into the type identifiers list for the return type.""" parameters_off: int + """Offset to the ``type_list`` of parameter types (0 if no parameters).""" class MethodId: + """Identifies a method by its class, prototype, and name.""" + class_idx: int + """Index into the type identifiers list for the defining class.""" proto_idx: int + """Index into the prototype identifiers list for this method's prototype.""" name_idx: int + """Index into the string identifiers list for this method's name.""" class ClassDef: + """Top-level class definition entry in the DEX class list.""" + class_idx: int + """Index into the type identifiers list for this class.""" access_flags: int + """Access and property flags bitmask (``ACC_PUBLIC``, etc.).""" superclass_idx: int + """Type-ID index of the superclass, or ``0xFFFFFFFF`` (no-index) for ``Object``.""" interfaces_off: int + """Offset to the ``type_list`` of implemented interfaces (0 if none).""" source_file_idx: int + """String-ID index of the source file name, or ``0xFFFFFFFF`` if absent.""" annotations_off: int + """Offset to the ``annotations_directory_item`` (0 if no annotations).""" class_data_off: int + """Offset to the ``class_data_item`` (0 if no fields or methods).""" static_values_off: int + """Offset to the ``encoded_array_item`` of static field initial values (0 if none).""" class TypeItem: + """A single type reference inside a ``type_list``.""" + type_idx: int + """Index into the type identifiers list.""" class CodeItem: + """Header of a ``code_item``, describing register and instruction counts.""" + registers_size: int + """Total number of virtual registers used by the method.""" ins_size: int + """Number of words of incoming arguments.""" outs_size: int + """Number of words of outgoing argument space required.""" tries_size: int + """Number of ``try_item`` entries.""" debug_info_off: int + """Offset to the ``debug_info_item`` (0 if none).""" insns_size: int + """Size of the instruction list in 16-bit code units.""" class TryItem: + """A single exception-handling range within a method body.""" + start_addr: int + """Start address of the covered block in 16-bit code units.""" insn_count: int + """Number of 16-bit code units covered by this try block.""" handler_off: int + """Offset to the catch-handler list (relative to the handlers section start).""" class CatchHandlerData: + """A single catch clause within a ``catch_handler_item``.""" + type_idx: int + """Type-ID index of the caught exception type (``0xFFFFFFFF`` for catch-all).""" address: int + """Handler start address in 16-bit code units.""" - def is_catch_all(self) -> bool: ... + def is_catch_all(self) -> bool: + """Return ``True`` if this is a catch-all handler (``type_idx == 0xFFFFFFFF``).""" + ... class AnnotationsDirectoryItem: + """Points to all annotations for a class definition.""" + class_annotations_off: int + """Offset to the class-level ``annotation_set_item`` (0 if none).""" fields_size: int + """Number of ``field_annotations_item`` entries.""" methods_size: int + """Number of ``method_annotations_item`` entries.""" parameters_size: int + """Number of ``parameter_annotations_item`` entries.""" class FieldAnnotationsItem: + """Associates a field with its annotation set.""" + field_idx: int + """Index into the field identifiers list.""" annotations_off: int + """Offset to the ``annotation_set_item`` for this field.""" class MethodAnnotationsItem: + """Associates a method with its annotation set.""" + method_idx: int + """Index into the method identifiers list.""" annotations_off: int + """Offset to the ``annotation_set_item`` for this method.""" class ParameterAnnotationsItem: + """Associates a method's parameters with their annotation sets.""" + method_idx: int + """Index into the method identifiers list.""" annotations_off: int + """Offset to the ``annotation_set_ref_list`` for this method's parameters.""" class EncodedValue: + """A tagged union representing any value that can appear in a DEX annotation or static initialiser. + + Each inner class corresponds to one ``VALUE_TYPE`` tag in the DEX spec. + """ + class Null: + """A ``null`` reference value.""" pass class Boolean: + """A boolean constant.""" value: bool class Byte: + """A signed 8-bit integer constant.""" value: int class Char: + """An unsigned 16-bit character constant.""" value: int class Short: + """A signed 16-bit integer constant.""" value: int class Integer: + """A signed 32-bit integer constant.""" value: int class Float: + """A 32-bit floating-point constant.""" value: float class Long: + """A signed 64-bit integer constant.""" value: int class Double: + """A 64-bit floating-point constant.""" value: float class String: + """A string constant, referenced by index.""" index: int + """Index into the string identifiers list.""" class Type: + """A type constant, referenced by index.""" index: int + """Index into the type identifiers list.""" class Field: + """A field constant, referenced by index.""" index: int + """Index into the field identifiers list.""" class Method: + """A method constant, referenced by index.""" index: int + """Index into the method identifiers list.""" class MethodType: + """A method-type constant (proto), referenced by index.""" index: int + """Index into the prototype identifiers list.""" class MethodHandle: + """A method-handle constant, referenced by index.""" index: int + """Index into the method handles list.""" class Enum: + """An enum constant, referenced by field index.""" index: int + """Index into the field identifiers list for the enum constant.""" class Array: - elements: List[EncodedValue] + """An array of encoded values.""" + elements: "List[EncodedValue]" + """The elements of the array.""" class Annotation: - annotation: EncodedAnnotation + """A nested annotation value.""" + annotation: "EncodedAnnotation" + """The nested encoded annotation.""" class AnnotationElement: + """A single name–value pair within an encoded annotation.""" + name_idx: int + """Index into the string identifiers list for the element name.""" value: EncodedValue + """The element's value.""" class EncodedAnnotation: + """An annotation instance with its type and elements.""" + type_idx: int + """Index into the type identifiers list for the annotation type.""" elements: List[AnnotationElement] + """The name–value pairs of the annotation.""" class AnnotationItem: + """A visibility-tagged annotation as stored in the DEX file.""" + visibility: int + """Visibility byte: ``0`` = BUILD, ``1`` = RUNTIME, ``2`` = SYSTEM.""" annotation: EncodedAnnotation + """The annotation data.""" diff --git a/python/dexrs/_internal/type_lookup_table.pyi b/python/dexrs/_internal/type_lookup_table.pyi new file mode 100755 index 0000000..2d31d04 --- /dev/null +++ b/python/dexrs/_internal/type_lookup_table.pyi @@ -0,0 +1,69 @@ +"""Type stubs for the ``dexrs._internal.type_lookup_table`` native extension module.""" + +from typing import Optional + + +class TypeLookupTable: + """Fast O(1) class-descriptor -> class-def-index lookup table. + + Build from a :class:`~dexrs.DexFile` via + :meth:`~dexrs.DexFile.build_type_lookup_table`. Lookups use a hash table + over the descriptor strings, giving constant-time performance regardless of + how many classes are defined. + + Example:: + + tlt = dex.build_type_lookup_table() + idx = tlt.lookup("Ljava/lang/String;") + if idx is not None: + class_def = dex.get_class_def(idx) + """ + + def lookup(self, descriptor: str) -> Optional[int]: + """Return the ``class_def_idx`` for *descriptor*, or ``None`` if absent. + + *descriptor* must be in DEX format (e.g. ``"Ljava/lang/String;"``). + + :param descriptor: Type descriptor to look up. + :type descriptor: str + :returns: The zero-based class-definition index, or ``None``. + :rtype: int or None + + Example:: + + idx = tlt.lookup("Lcom/example/Foo;") + if idx is not None: + print(f"Found at class_def index {idx}") + """ + ... + + def __len__(self) -> int: + """Return the number of class descriptors in the table. + + :returns: Total number of classes indexed. + :rtype: int + """ + ... + + def __contains__(self, descriptor: str) -> bool: + """Return ``True`` if *descriptor* is present in the table. + + :param descriptor: Type descriptor to test. + :type descriptor: str + :returns: ``True`` if found, ``False`` otherwise. + :rtype: bool + + Example:: + + if "Ljava/lang/Object;" in tlt: + print("Object class is defined") + """ + ... + + def __repr__(self) -> str: + """Return a developer-friendly string representation. + + :returns: String of the form ``TypeLookupTable( classes)``. + :rtype: str + """ + ... diff --git a/python/dexrs/builder.py b/python/dexrs/builder.py new file mode 100755 index 0000000..ca0a2a4 --- /dev/null +++ b/python/dexrs/builder.py @@ -0,0 +1,57 @@ +"""DEX mutation system: build new DEX files from scratch. + +Classes +------- +- :class:`DexIrBuilder` — Assemble a full DEX file from class definitions. +- :class:`IrClassDef` — Define a class (fields, methods, superclass…). +- :class:`IrMethodDef` — Define a method with optional bytecode body. +- :class:`IrFieldDef` — Define a field (convenience; usually use the + ``add_*_field`` methods on :class:`IrClassDef`). +- :class:`CodeBuilder` — Assemble Dalvik bytecode from disassembly text lines. +- :class:`CodeDef` — An assembled code item (return value of + :meth:`CodeBuilder.build`). +- :class:`ProtoKey` — Method prototype (return type + parameter types). + +Quick-start +----------- +:: + + from dexrs.builder import DexIrBuilder, IrClassDef, IrMethodDef, CodeBuilder + + cls = IrClassDef("Lhello/World;") + cls.set_access(0x0001) # ACC_PUBLIC + cls.set_superclass("Ljava/lang/Object;") + + code = CodeBuilder(registers=3, ins=1, outs=2) + code.emit('sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;') + code.emit('const-string v1, "Hello!"') + code.emit('invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V') + code.emit('return-void') + + method = IrMethodDef("main", "([Ljava/lang/String;)V", 0x0009) + method.set_code(code.build()) + cls.add_direct_method(method) + + builder = DexIrBuilder(version=35) + builder.add_class(cls) + dex_bytes = builder.write() # -> bytes +""" +from dexrs._internal import builder as _b + +DexIrBuilder = _b.DexIrBuilder +IrClassDef = _b.IrClassDef +IrMethodDef = _b.IrMethodDef +IrFieldDef = _b.IrFieldDef +CodeBuilder = _b.CodeBuilder +CodeDef = _b.CodeDef +ProtoKey = _b.ProtoKey + +__all__ = [ + "DexIrBuilder", + "IrClassDef", + "IrMethodDef", + "IrFieldDef", + "CodeBuilder", + "CodeDef", + "ProtoKey", +] diff --git a/python/dexrs/code.py b/python/dexrs/code.py old mode 100644 new mode 100755 index 0322bf2..ffb2a74 --- a/python/dexrs/code.py +++ b/python/dexrs/code.py @@ -1,3 +1,34 @@ +"""Thin Python wrapper re-exporting DEX instruction and code-item types. + +This module re-exports the full ``code`` sub-package from the native +extension, which provides: + +- :class:`CodeItemAccessor` — iterate over instructions in a method body. +- :class:`Instruction` — a single decoded Dalvik instruction. +- :class:`Code` — opcode enum (``NOP``, ``MOVE``, ``INVOKE_VIRTUAL``, …). +- :class:`Format` — instruction format enum (``k10x``, ``k35c``, …). +- :class:`IndexType` — index-operand type enum. +- :data:`code_flags`, :data:`verify_flags`, :data:`flags` — flag constant modules. +- :data:`signatures` — well-known pseudo-instruction signatures. +- :data:`vreg` — virtual-register operand accessor functions. + +Example:: + + from dexrs import DexFile, InMemoryDexContainer + from dexrs.code import Code + + with open("classes.dex", "rb") as f: + dex = DexFile.from_bytes(InMemoryDexContainer(f.read())) + + cls = dex.get_class_def(0) + accessor = dex.get_class_accessor(cls) + if accessor is not None: + for method in accessor.get_direct_methods(): + ca = dex.get_code_item_accessor(method.code_offset) + for inst in ca.insns(): + if inst.opcode == Code.RETURN_VOID: + print("Found RETURN_VOID at", inst) +""" from dexrs._internal import code as rust_code CodeItemAccessor = rust_code.CodeItemAccessor diff --git a/python/dexrs/container.py b/python/dexrs/container.py old mode 100644 new mode 100755 index 17f8348..e9744e4 --- a/python/dexrs/container.py +++ b/python/dexrs/container.py @@ -1,3 +1,24 @@ +"""Thin Python wrapper re-exporting DEX container types. + +A *container* is the backing store that holds the raw DEX bytes. Pass one to +:meth:`~dexrs.DexFile.from_bytes` or :meth:`~dexrs.DexFile.from_file` when +opening a DEX file. + +- :class:`InMemoryDexContainer` — wraps an in-memory ``bytes`` buffer. +- :class:`FileDexContainer` — memory-maps a file on disk (zero-copy reads). + +Example:: + + from dexrs.container import InMemoryDexContainer, FileDexContainer + + # from bytes already in memory + container = InMemoryDexContainer(raw_bytes) + + # directly from a file path (uses mmap) + container = FileDexContainer("classes.dex") + print(container.location) # "classes.dex" + print(container.file_size) # size in bytes +""" from dexrs._internal import container as rust_container diff --git a/python/dexrs/editor.py b/python/dexrs/editor.py new file mode 100755 index 0000000..d30b6d6 --- /dev/null +++ b/python/dexrs/editor.py @@ -0,0 +1,25 @@ +"""Thin Python wrapper re-exporting :class:`DexEditor`. + +:class:`DexEditor` provides targeted in-place mutations of a DEX file — +renaming classes, changing access flags, clearing hidden-API annotations — +without requiring a full re-assembly of the DEX. + +The editor is **consumed** once :meth:`~DexEditor.build` or +:meth:`~DexEditor.write_to` is called; any further mutation raises +:exc:`IOError`. + +Example:: + + from dexrs.editor import DexEditor + + editor = DexEditor.from_file("classes.dex") + editor.rename_class("LMain;", "LEntry;") + editor.set_class_access_flags("LEntry;", 0x0001) # public + editor.clear_hiddenapi_flags() + editor.write_to("classes_patched.dex") +""" +from dexrs._internal import editor as _rust_editor + +DexEditor = _rust_editor.DexEditor + +__all__ = ["DexEditor"] diff --git a/python/dexrs/error.py b/python/dexrs/error.py old mode 100644 new mode 100755 index 228e266..373269d --- a/python/dexrs/error.py +++ b/python/dexrs/error.py @@ -1,3 +1,19 @@ +"""Thin Python wrapper re-exporting :exc:`PyDexError`. + +:exc:`PyDexError` is the exception type raised by all ``dexrs`` operations +that fail at the Rust level (e.g. malformed DEX, out-of-bounds index, I/O +errors that are not plain :exc:`IOError`). + +Example:: + + from dexrs import DexFile, InMemoryDexContainer + from dexrs.error import PyDexError + + try: + dex = DexFile.from_bytes(InMemoryDexContainer(b"not a dex")) + except PyDexError as exc: + print(f"Parse failed: {exc}") +""" from dexrs._internal import error as rust_error PyDexError = rust_error.PyDexError diff --git a/python/dexrs/file.py b/python/dexrs/file.py old mode 100644 new mode 100755 index 2fa9d68..1b2c1c5 --- a/python/dexrs/file.py +++ b/python/dexrs/file.py @@ -1,3 +1,24 @@ +"""Thin Python wrapper re-exporting :class:`DexFile` and :class:`VerifyPreset`. + +:class:`DexFile` is the central type of the ``dexrs`` library. It holds a +parsed DEX image and exposes read-only accessors for every section described +by the `AOSP DEX format specification`_. + +.. _AOSP DEX format specification: + https://source.android.com/docs/core/runtime/dex-format + +Example:: + + from dexrs import DexFile, FileDexContainer, VerifyPreset + + dex = DexFile.from_file(FileDexContainer("classes.dex"), VerifyPreset.ALL) + header = dex.get_header() + print(f"DEX version {header.version_int}, {dex.num_class_defs()} classes") + + for i in range(dex.num_class_defs()): + cls = dex.get_class_def(i) + print(dex.get_class_desc(cls)) +""" from dexrs._internal import file as rust_file diff --git a/python/dexrs/leb128.py b/python/dexrs/leb128.py old mode 100644 new mode 100755 index 1e603af..46290d4 --- a/python/dexrs/leb128.py +++ b/python/dexrs/leb128.py @@ -1,3 +1,27 @@ +"""Thin Python wrapper re-exporting LEB128 varint decoder functions. + +`Little Endian Base 128 (LEB128)`_ is a variable-length integer encoding used +extensively in the DEX format for sizes, offsets, and access flags. + +.. _Little Endian Base 128 (LEB128): + https://source.android.com/docs/core/runtime/dex-format#leb128 + +Functions +--------- +- :func:`decode_uleb128` — unsigned LEB128 -> non-negative :class:`int`. +- :func:`decode_sleb128` — signed LEB128 -> signed :class:`int`. +- :func:`decode_leb128p1` — ``ULEB128p1`` encoding (value stored as ``n+1``), + where ``-1`` encodes the special *no-index* sentinel. + +Example:: + + from dexrs.leb128 import decode_uleb128, decode_sleb128, decode_leb128p1 + + decode_uleb128(bytes([0x8E, 0x02])) # 270 + decode_sleb128(bytes([0x9B, 0x7F])) # -101 + decode_leb128p1(bytes([0x00])) # -1 (no-index sentinel) + decode_leb128p1(bytes([0x01])) # 0 +""" from dexrs._internal import leb128 as rust_leb128 diff --git a/python/dexrs/mutf8.py b/python/dexrs/mutf8.py old mode 100644 new mode 100755 index 290f244..c3cba01 --- a/python/dexrs/mutf8.py +++ b/python/dexrs/mutf8.py @@ -1,3 +1,36 @@ +"""Thin Python wrapper re-exporting MUTF-8 ↔ Python :class:`str` converters. + +DEX files use a variant of UTF-8 called `Modified UTF-8 (MUTF-8)`_ which +differs from standard UTF-8 in two ways: + +1. The null character ``U+0000`` is encoded as the two-byte sequence + ``0xC0 0x80`` (overlong form) rather than a single ``0x00`` byte. +2. Supplementary characters (U+10000…U+10FFFF) are encoded as two surrogate + pairs in CESU-8 style rather than a single 4-byte sequence. + +.. _Modified UTF-8 (MUTF-8): + https://source.android.com/docs/core/runtime/dex-format#mutf-8 + +Functions +--------- +- :func:`mutf8_to_str` — strict MUTF-8 bytes -> Python :class:`str`. +- :func:`mutf8_to_str_lossy` — lenient variant; replaces invalid sequences + with the Unicode replacement character ``U+FFFD``. +- :func:`str_to_mutf8` — Python :class:`str` -> MUTF-8 bytes (strict). +- :func:`str_to_mutf8_lossy` — lenient variant; skips unencodable code points. + +Example:: + + from dexrs.mutf8 import mutf8_to_str, str_to_mutf8 + + raw = bytes([0x48, 0x65, 0x6C, 0x6C, 0x6F]) # "Hello" in MUTF-8 + assert mutf8_to_str(raw) == "Hello" + assert str_to_mutf8("Hello") == raw + + # Null character encoded as overlong 0xC0 0x80 + null_encoded = bytes([0xC0, 0x80]) + assert mutf8_to_str(null_encoded) == "\\x00" +""" from dexrs._internal import mutf8 as rust_mutf8 diff --git a/python/dexrs/primitive.py b/python/dexrs/primitive.py new file mode 100755 index 0000000..8faf3ef --- /dev/null +++ b/python/dexrs/primitive.py @@ -0,0 +1,23 @@ +"""Thin Python wrapper re-exporting the Rust ``PrimitiveType`` enum. + +:class:`PrimitiveType` enumerates all Java primitive types (plus ``Void`` +and the non-primitive sentinel ``Not``). It provides helpers for JVM +descriptor characters, boxing classes, storage sizes, and type properties. + +Example:: + + from dexrs.primitive import PrimitiveType + + pt = PrimitiveType.from_char("I") + print(pt.pretty_name()) # "int" + print(pt.descriptor()) # "I" + print(pt.boxed_descriptor()) # "Ljava/lang/Integer;" + print(pt.component_size()) # 4 + print(pt.is_numeric()) # True + print(pt.is_64bit()) # False +""" +from dexrs._internal import primitive as _rust_primitive + +PrimitiveType = _rust_primitive.PrimitiveType + +__all__ = ["PrimitiveType"] diff --git a/python/dexrs/py.typed b/python/dexrs/py.typed old mode 100644 new mode 100755 diff --git a/python/dexrs/type_lookup_table.py b/python/dexrs/type_lookup_table.py new file mode 100755 index 0000000..9982765 --- /dev/null +++ b/python/dexrs/type_lookup_table.py @@ -0,0 +1,23 @@ +"""Thin Python wrapper re-exporting the Rust ``TypeLookupTable`` class. + +:class:`TypeLookupTable` provides O(1) class-descriptor lookups over all +classes defined in a :class:`~dexrs.DexFile`. Build one via +:meth:`~dexrs.DexFile.build_type_lookup_table`. + +Example:: + + from dexrs import DexFile, InMemoryDexContainer + + with open("classes.dex", "rb") as f: + dex = DexFile.from_bytes(InMemoryDexContainer(f.read())) + + tlt = dex.build_type_lookup_table() + idx = tlt.lookup("Ljava/lang/String;") + if idx is not None: + print(f"String class_def index: {idx}") +""" +from dexrs._internal import type_lookup_table as _rust_tlt + +TypeLookupTable = _rust_tlt.TypeLookupTable + +__all__ = ["TypeLookupTable"] diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 0000000..7bc420a --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,29 @@ +"""Shared pytest fixtures for the dexrs test suite.""" + +import pathlib +import pytest + +from dexrs import DexEditor + +_ASSETS = pathlib.Path(__file__).parent.parent.parent / "tests" + +PRIME_DEX_PATH = str(_ASSETS / "prime" / "prime.dex") +FIB_DEX_PATH = str(_ASSETS / "fibonacci" / "fib.dex") + + +def prime_dex_bytes() -> bytes: + return open(PRIME_DEX_PATH, "rb").read() + + +def fib_dex_bytes() -> bytes: + return open(FIB_DEX_PATH, "rb").read() + + +@pytest.fixture +def prime_editor() -> DexEditor: + return DexEditor.from_file(PRIME_DEX_PATH) + + +@pytest.fixture +def fib_editor() -> DexEditor: + return DexEditor.from_file(FIB_DEX_PATH) diff --git a/python/tests/test_builder.py b/python/tests/test_builder.py new file mode 100644 index 0000000..f606e2c --- /dev/null +++ b/python/tests/test_builder.py @@ -0,0 +1,212 @@ +"""Tests for the dexrs.builder Python bindings.""" + +from __future__ import annotations + +import struct + +import pytest + +import dexrs +from dexrs._internal import builder as b + +DEX_MAGIC = b"dex\n035\x00" + + +def make_class(descriptor: str, *, access: int = 0x0001, superclass: str = "Ljava/lang/Object;") -> b.IrClassDef: + cls = b.IrClassDef(descriptor) + cls.set_access(access) + cls.set_superclass(superclass) + return cls + + +# -- DexIrBuilder ------------------------------------------------------------- + + +def test_empty_dex_has_correct_magic(): + ir = b.DexIrBuilder(35) + data = ir.write() + assert data[:8] == DEX_MAGIC + + +def test_empty_dex_minimum_size(): + ir = b.DexIrBuilder(35) + data = ir.write() + assert len(data) >= 112 + + +def test_empty_dex_file_size_field(): + ir = b.DexIrBuilder(35) + data = ir.write() + file_size = struct.unpack_from(" 0 + field_ids_size = struct.unpack_from("= 1 + + +def test_class_with_instance_field(): + ir = b.DexIrBuilder(35) + cls = make_class("Lcom/example/WithInstField;") + cls.add_instance_field("value", "I", 0x0002) # private + ir.add_class(cls) + data = ir.write() + field_ids_size = struct.unpack_from("= 1 + + +def test_class_with_interface(): + ir = b.DexIrBuilder(35) + cls = make_class("Lcom/example/Impl;") + cls.add_interface("Lcom/example/IFoo;") + ir.add_class(cls) + data = ir.write() + assert len(data) > 0 + + +# -- IrMethodDef + CodeBuilder ------------------------------------------------- + + +def test_class_with_empty_init(): + ir = b.DexIrBuilder(35) + cls = make_class("Lcom/example/Hello;") + + code = b.CodeBuilder(1, 0, 0) + code.emit("return-void") + m = b.IrMethodDef("", "()V", 0x10001) # public constructor + m.set_code(code.build()) + cls.add_direct_method(m) + + ir.add_class(cls) + data = ir.write() + assert data[:8] == DEX_MAGIC + method_ids_size = struct.unpack_from("= 1 + + +def test_method_with_string_ref(): + ir = b.DexIrBuilder(35) + cls = make_class("Lcom/example/StringTest;") + + code = b.CodeBuilder(3, 0, 1) + code.emit("sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;") + code.emit('const-string v1, "Hello, World!"') + code.emit("invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V") + code.emit("return-void") + m = b.IrMethodDef("main", "([Ljava/lang/String;)V", 0x0009) + m.set_code(code.build()) + cls.add_direct_method(m) + + ir.add_class(cls) + data = ir.write() + # String pool must contain at least "Hello, World!" + string_ids_size = struct.unpack_from(" 0 + + +def test_code_builder_label_and_branch(): + code = b.CodeBuilder(2, 0, 0) + code.emit("const/4 v0, #0") + code.label("loop_top") + code.emit("if-eqz v0, :loop_top") + code.emit("return-void") + code_def = code.build() + assert code_def is not None + + +def test_static_method(): + ir = b.DexIrBuilder(35) + cls = make_class("Lcom/example/Static;") + + code = b.CodeBuilder(1, 0, 0) + code.emit("const/4 v0, #42") + code.emit("return v0") + m = b.IrMethodDef("getAnswer", "()I", 0x0009) # public static + m.set_code(code.build()) + cls.add_direct_method(m) + + ir.add_class(cls) + data = ir.write() + assert len(data) > 0 + + +# -- ProtoKey ------------------------------------------------------------------ + + +def test_proto_key_from_descriptor_returns_types(): + pk = b.ProtoKey.from_descriptor("(IIZ)Ljava/lang/String;") + assert pk.return_type == "Ljava/lang/String;" + assert pk.params == ["I", "I", "Z"] + + +def test_proto_key_void_return(): + pk = b.ProtoKey.from_descriptor("()V") + assert pk.return_type == "V" + assert pk.params == [] + + +def test_proto_key_shorty_primitives(): + pk = b.ProtoKey.from_descriptor("(IZ)I") + assert pk.shorty() == "IIZ" + + +# -- Round-trip ---------------------------------------------------------------- + + +def test_write_produces_parseable_dex(): + """Write DEX bytes then parse them back with dexrs DexFile.from_bytes.""" + ir = b.DexIrBuilder(35) + cls = make_class("Lcom/example/RoundTrip;") + + code = b.CodeBuilder(1, 0, 0) + code.emit("return-void") + m = b.IrMethodDef("", "()V", 0x10001) + m.set_code(code.build()) + cls.add_direct_method(m) + ir.add_class(cls) + + data = ir.write() + container = dexrs.InMemoryDexContainer(data) + dex_file = dexrs.DexFile.from_bytes(container) + assert dex_file.num_class_defs() == 1 + + +def test_write_bytes_type(): + ir = b.DexIrBuilder(35) + data = ir.write() + assert isinstance(data, bytes) diff --git a/python/tests/test_dex_editor.py b/python/tests/test_dex_editor.py new file mode 100644 index 0000000..fce714d --- /dev/null +++ b/python/tests/test_dex_editor.py @@ -0,0 +1,232 @@ +"""Tests for DexEditor (Python bindings).""" + +import pathlib + +import pytest + +from dexrs import DexEditor, DexFile +import dexrs.container as container + +from . import _util +from .conftest import PRIME_DEX_PATH, FIB_DEX_PATH, prime_dex_bytes, fib_dex_bytes + + +# -- helpers ------------------------------------------------------------------ + +def _reparse(data: bytes) -> DexFile: + """Parse raw bytes and return a DexFile for assertions.""" + c = container.InMemoryDexContainer(data) + return DexFile.from_bytes(c) + + +def _class_descriptor(dex: DexFile, idx: int = 0) -> str: + cd = dex.get_class_def(idx) + type_id = dex.get_type_id(cd.class_idx) + return dex.get_utf16_at(type_id.descriptor_idx) + + +# -- from_file / from_bytes ---------------------------------------------------- + +def test_from_file_valid(prime_editor: DexEditor) -> None: + assert prime_editor is not None + + +def test_from_bytes_valid() -> None: + data = prime_dex_bytes() + editor = DexEditor.from_bytes(data) + assert editor is not None + + +def test_from_bytes_invalid_magic_raises() -> None: + with pytest.raises(OSError): + DexEditor.from_bytes(b"not a dex file at all") + + +def test_from_bytes_too_short_raises() -> None: + with pytest.raises(OSError): + DexEditor.from_bytes(b"\x00" * 10) + + +def test_from_file_missing_path_raises() -> None: + with pytest.raises(OSError): + DexEditor.from_file("/no/such/file.dex") + + +# -- build --------------------------------------------------------------------- + +def test_build_returns_bytes(prime_editor: DexEditor) -> None: + data = prime_editor.build() + assert isinstance(data, bytes) + assert len(data) == len(prime_dex_bytes()) + + +def test_build_produces_parseable_dex(prime_editor: DexEditor) -> None: + data = prime_editor.build() + dex = _reparse(data) + assert dex.get_header().version_int == 35 + + +def test_write_to_creates_file(prime_editor: DexEditor, tmp_path: pathlib.Path) -> None: + out = str(tmp_path / "out.dex") + prime_editor.write_to(out) + assert pathlib.Path(out).stat().st_size == len(prime_dex_bytes()) + + +# -- set_class_access_flags ---------------------------------------------------- + +def test_set_class_flags_descriptor_form(prime_editor: DexEditor) -> None: + prime_editor.set_class_access_flags("Lprime/prime;", 0x0011) + data = prime_editor.build() + dex = _reparse(data) + assert dex.get_class_def(0).access_flags == 0x0011 + + +def test_set_class_flags_dotted_form(prime_editor: DexEditor) -> None: + prime_editor.set_class_access_flags("prime.prime", 0x0001) + data = prime_editor.build() + dex = _reparse(data) + assert dex.get_class_def(0).access_flags == 0x0001 + + +def test_set_class_flags_slash_form(prime_editor: DexEditor) -> None: + prime_editor.set_class_access_flags("prime/prime", 0x0001) + data = prime_editor.build() + dex = _reparse(data) + assert dex.get_class_def(0).access_flags == 0x0001 + + +def test_set_class_flags_unknown_class_raises(prime_editor: DexEditor) -> None: + with pytest.raises(OSError): + prime_editor.set_class_access_flags("Lno/such/Class;", 0x0001) + + +def test_set_class_flags_all_common_values(prime_editor: DexEditor) -> None: + for flags in [0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0400]: + editor = DexEditor.from_file(PRIME_DEX_PATH) + editor.set_class_access_flags("Lprime/prime;", flags) + data = editor.build() + dex = _reparse(data) + assert dex.get_class_def(0).access_flags == flags + + +# -- set_method_access_flags --------------------------------------------------- + +def test_set_method_flags_main(prime_editor: DexEditor) -> None: + prime_editor.set_method_access_flags("Lprime/prime;", "main", 0x0009) + data = prime_editor.build() + _reparse(data) # must be parseable + + +def test_set_method_flags_init(prime_editor: DexEditor) -> None: + prime_editor.set_method_access_flags("Lprime/prime;", "", 0x10001) + data = prime_editor.build() + _reparse(data) + + +def test_set_method_flags_unknown_method_raises(prime_editor: DexEditor) -> None: + with pytest.raises(OSError): + prime_editor.set_method_access_flags("Lprime/prime;", "noSuchMethod", 0x0001) + + +def test_set_method_flags_unknown_class_raises(prime_editor: DexEditor) -> None: + with pytest.raises(OSError): + prime_editor.set_method_access_flags("Lno/such/Class;", "main", 0x0001) + + +# -- rename_class -------------------------------------------------------------- + +def test_rename_same_length(prime_editor: DexEditor) -> None: + # "Lprime/prime;" (13) -> "Lprime/other;" (13) + prime_editor.rename_class("Lprime/prime;", "Lprime/other;") + data = prime_editor.build() + dex = _reparse(data) + assert _class_descriptor(dex) == "Lprime/other;" + + +def test_rename_same_length_old_gone(prime_editor: DexEditor) -> None: + prime_editor.rename_class("Lprime/prime;", "Lprime/other;") + data = prime_editor.build() + assert b"Lprime/prime;" not in data + + +def test_rename_different_length_longer(prime_editor: DexEditor) -> None: + original_size = len(prime_dex_bytes()) + prime_editor.rename_class("Lprime/prime;", "Lprime/renamed;") + data = prime_editor.build() + dex = _reparse(data) + assert _class_descriptor(dex) == "Lprime/renamed;" + assert len(data) > original_size + + +def test_rename_different_length_shorter(prime_editor: DexEditor) -> None: + original_size = len(prime_dex_bytes()) + prime_editor.rename_class("Lprime/prime;", "La/b;") + data = prime_editor.build() + dex = _reparse(data) + assert _class_descriptor(dex) == "La/b;" + assert len(data) < original_size + + +def test_rename_unknown_class_raises(prime_editor: DexEditor) -> None: + with pytest.raises(OSError): + prime_editor.rename_class("Lno/such/Class;", "Lnew/name;") + + +def test_rename_dotted_form(prime_editor: DexEditor) -> None: + prime_editor.rename_class("prime.prime", "prime.other") + data = prime_editor.build() + dex = _reparse(data) + assert _class_descriptor(dex) == "Lprime/other;" + + +# -- clear_hiddenapi_flags ----------------------------------------------------- + +def test_clear_hiddenapi_noop_on_plain_dex(prime_editor: DexEditor) -> None: + original_size = len(prime_dex_bytes()) + prime_editor.clear_hiddenapi_flags() # no-op, must not raise + data = prime_editor.build() + assert len(data) == original_size + + +# -- chained mutations --------------------------------------------------------- + +def test_chain_flags_and_rename(prime_editor: DexEditor) -> None: + prime_editor.set_class_access_flags("Lprime/prime;", 0x0011) + prime_editor.rename_class("Lprime/prime;", "Lprime/renamed;") + data = prime_editor.build() + dex = _reparse(data) + assert dex.get_class_def(0).access_flags == 0x0011 + assert _class_descriptor(dex) == "Lprime/renamed;" + + +def test_chain_rename_then_method_flags() -> None: + editor = DexEditor.from_file(PRIME_DEX_PATH) + editor.rename_class("Lprime/prime;", "Lprime/renamed;") + editor.set_method_access_flags("Lprime/renamed;", "main", 0x0009) + data = editor.build() + _reparse(data) + + +def test_chain_method_and_class_flags(prime_editor: DexEditor) -> None: + prime_editor.set_class_access_flags("Lprime/prime;", 0x0011) + prime_editor.set_method_access_flags("Lprime/prime;", "main", 0x0009) + prime_editor.clear_hiddenapi_flags() + data = prime_editor.build() + dex = _reparse(data) + assert dex.get_class_def(0).access_flags == 0x0011 + + +# -- fibonacci DEX ------------------------------------------------------------- + +def test_fib_set_flags(fib_editor: DexEditor) -> None: + fib_editor.set_class_access_flags("fibonacci.fib", 0x0011) + data = fib_editor.build() + dex = _reparse(data) + assert dex.get_class_def(0).access_flags == 0x0011 + + +def test_fib_rename(fib_editor: DexEditor) -> None: + fib_editor.rename_class("fibonacci.fib", "fib.renamed") + data = fib_editor.build() + dex = _reparse(data) + assert _class_descriptor(dex) == "Lfib/renamed;" diff --git a/src/bin/dexrs/commands/disasm.rs b/src/bin/dexrs/commands/disasm.rs index 47c296c..71def62 100644 --- a/src/bin/dexrs/commands/disasm.rs +++ b/src/bin/dexrs/commands/disasm.rs @@ -97,7 +97,7 @@ where p.section(&format!(".method {flags} {full_name}")); if code_off == 0 { - p.item("(abstract / native — no code)"); + p.item("(abstract / native - no code)"); return Ok(()); } diff --git a/src/bin/dexrs/commands/inspect.rs b/src/bin/dexrs/commands/inspect.rs index 8feb7a3..5b5a669 100644 --- a/src/bin/dexrs/commands/inspect.rs +++ b/src/bin/dexrs/commands/inspect.rs @@ -108,7 +108,7 @@ pub fn run(args: &InspectArgs) -> Result<()> { .map_err(|_| anyhow::anyhow!("build thread panicked"))? }); - // Drop the mmap handles — all data is now owned by classes/file_info. + // Drop the mmap handles - all data is now owned by classes/file_info. drop(dex); drop(container); drop(file); @@ -120,7 +120,7 @@ pub fn run(args: &InspectArgs) -> Result<()> { Err(e) } Ok(None) => { - // User cancelled loading — restore terminal and exit cleanly. + // User cancelled loading - restore terminal and exit cleanly. disable_raw_mode()?; execute!(terminal.backend_mut(), LeaveAlternateScreen)?; Ok(()) @@ -216,7 +216,7 @@ pub fn build_app_state<'a, C: DexContainer<'a>>( Some((raw, desc)) }) .unwrap_or_else(|| (name.clone(), "()V".to_string())); - // Only read the code_item header (4 × u16) — no instruction scanning. + // Only read the code_item header (4 × u16) - no instruction scanning. let code_info = build_code_info(dex, m.code_offset); methods.push(MemberEntry { name, @@ -274,7 +274,7 @@ pub fn build_app_state<'a, C: DexContainer<'a>>( Ok(Some((classes, file_info))) } -/// Fast code-item header read: registers, ins, outs, tries — zero instruction scanning. +/// Fast code-item header read: registers, ins, outs, tries - zero instruction scanning. fn build_code_info<'a, C: DexContainer<'a>>(dex: &DexFile<'a, C>, code_off: u32) -> Option { if code_off == 0 { return None; } let ca = dex.get_code_item_accessor(code_off).ok()?; @@ -378,7 +378,7 @@ pub fn run_vdex_inspect( } }; - // Extract the raw DEX bytes — must be copied because the mmap/container + // Extract the raw DEX bytes - must be copied because the mmap/container // lifetime ends when this function returns and App owns its bytes. let dex_bytes = vdex.get_dex_file_data(chosen)?.to_vec(); drop(vdex); diff --git a/src/bin/dexrs/commands/patch.rs b/src/bin/dexrs/commands/patch.rs index ac1f547..764d4fd 100644 --- a/src/bin/dexrs/commands/patch.rs +++ b/src/bin/dexrs/commands/patch.rs @@ -10,7 +10,7 @@ fn parse_int(s: &str) -> Result { } } -/// `patch flags` — in-place: set class access flags, then update checksum. +/// `patch flags` - in-place: set class access flags, then update checksum. /// /// Uses `DexEditor` for class lookup; writes the result back to the same file. pub fn run_flags(args: &PatchFlagsArgs) -> Result<()> { @@ -29,7 +29,7 @@ pub fn run_flags(args: &PatchFlagsArgs) -> Result<()> { Ok(()) } -/// `patch insn` — in-place: overwrite one instruction word, then update checksum. +/// `patch insn` - in-place: overwrite one instruction word, then update checksum. pub fn run_insn(args: &PatchInsnArgs) -> Result<()> { let code_off = parse_int(&args.code_offset).context("--code-offset")?; let word = parse_int(&args.word).context("--word")? as u16; diff --git a/src/bin/dexrs/commands/vdex.rs b/src/bin/dexrs/commands/vdex.rs index 7e27bd5..3e869c6 100644 --- a/src/bin/dexrs/commands/vdex.rs +++ b/src/bin/dexrs/commands/vdex.rs @@ -1,10 +1,10 @@ //! CLI handlers for VDEX file operations. //! //! Subcommands: -//! - `vdex info` — show header, sections and checksums -//! - `vdex list` — tabular list of all embedded DEX files -//! - `vdex extract` — write a single embedded DEX to disk -//! - `vdex inspect` — launch the TUI inspector on an embedded DEX +//! - `vdex info` - show header, sections and checksums +//! - `vdex list` - tabular list of all embedded DEX files +//! - `vdex extract` - write a single embedded DEX to disk +//! - `vdex inspect` - launch the TUI inspector on an embedded DEX use std::fs::File; diff --git a/src/bin/dexrs/highlight.rs b/src/bin/dexrs/highlight.rs index 9866833..121458d 100644 --- a/src/bin/dexrs/highlight.rs +++ b/src/bin/dexrs/highlight.rs @@ -3,7 +3,7 @@ //! This module is intentionally thin: all semantic knowledge about which parts //! of an instruction carry which meaning lives in the library's //! [`dexrs::file::dump`] module. Here we only translate [`Highlight`] tags to -//! the two presentation targets we support — ratatui (TUI) and crossterm (CLI). +//! the two presentation targets we support - ratatui (TUI) and crossterm (CLI). use crossterm::style::Stylize; #[cfg(feature = "tui")] @@ -15,7 +15,7 @@ use ratatui::{ text::{Line, Span as TuiSpan}, }; -// Muted, low-contrast palette — readable on both dark and light backgrounds. +// Muted, low-contrast palette - readable on both dark and light backgrounds. // Opcode dusty gold #C8A96A // Register soft sage #7BAF7B // Immediate muted mauve #A07BA0 diff --git a/src/bin/dexrs/tui/app.rs b/src/bin/dexrs/tui/app.rs index 1a9ab76..dd54ac5 100644 --- a/src/bin/dexrs/tui/app.rs +++ b/src/bin/dexrs/tui/app.rs @@ -36,7 +36,7 @@ pub struct MemberEntry { /// Disassembly is computed on-demand from this offset and cached in `App::disasm_cache`. pub code_offset: u32, /// Code metadata summary (registers, ins, outs, tries). - /// Fast to read — just 4 u16 fields from the code_item header. + /// Fast to read - just 4 u16 fields from the code_item header. pub code_info: Option, } @@ -56,7 +56,7 @@ impl MemberEntry { } } -/// Cached disassembly for one method — computed lazily on first view. +/// Cached disassembly for one method - computed lazily on first view. pub struct DisasmEntry { /// Styled spans for the code viewer (PC -> highlighted tokens). pub styled: Vec<(u32, StyledLine)>, @@ -194,7 +194,7 @@ pub struct CodeEditState { pub method_name: String, /// (registers, ins, outs) from original code_info. pub registers: (u16, u16, u16), - /// Dirty flag — true after any modification. + /// Dirty flag - true after any modification. pub dirty: bool, /// Buffer for the currently-edited line (LineEdit sub-mode). pub line_buf: String, @@ -313,7 +313,7 @@ pub struct ModalState { pub struct App { // -- Source data ---------------------------------------------------------- pub classes: Vec, - /// Raw DEX bytes — always populated; used for lazy disassembly and edit mode. + /// Raw DEX bytes - always populated; used for lazy disassembly and edit mode. pub raw_bytes: Vec, /// Path to write the modified DEX file. `Some` enables edit mode. pub output_path: Option, diff --git a/src/bin/dexrs/tui/events.rs b/src/bin/dexrs/tui/events.rs index 62c8e97..c9545c3 100644 --- a/src/bin/dexrs/tui/events.rs +++ b/src/bin/dexrs/tui/events.rs @@ -13,7 +13,7 @@ fn handle_browse(app: &mut App, code: KeyCode, mods: KeyModifiers) { if matches!(code, KeyCode::Char('q') | KeyCode::Char('Q')) || (code == KeyCode::Char('c') && mods.contains(KeyModifiers::CONTROL)) { - // Signal quit via a special code — handled by returning Quit in the outer loop. + // Signal quit via a special code - handled by returning Quit in the outer loop. // We re-use the Action enum by setting a flag. Easier: just set a quit flag. // Actually we can't return from here, so we'll use an app flag. app.show_help = false; // placeholder; handled below in outer match diff --git a/src/bin/dexrs/tui/ui.rs b/src/bin/dexrs/tui/ui.rs index bc9c167..f33ee3e 100644 --- a/src/bin/dexrs/tui/ui.rs +++ b/src/bin/dexrs/tui/ui.rs @@ -1,4 +1,4 @@ -//! TUI renderer — 2-pane layout: collapsible class tree + scrollable code/details pane. +//! TUI renderer - 2-pane layout: collapsible class tree + scrollable code/details pane. //! //! Layout: //! ``` @@ -25,7 +25,7 @@ use super::app::{App, AppMode, Focus, MemberKind, TreeItem}; use crate::highlight; // -- Palette ------------------------------------------------------------------- -// All muted RGB values — avoid terminal neons, keep contrast readable but calm. +// All muted RGB values - avoid terminal neons, keep contrast readable but calm. /// Active-border / accent (steel blue) const ACCENT: Color = Color::Rgb(95, 135, 175); @@ -339,7 +339,7 @@ fn build_code_viewer_content(app: &mut App) -> (String, Vec>) { Style::default().fg(DIM), )), ]; - (format!(" Package — {pkg_display} "), lines) + (format!(" Package - {pkg_display} "), lines) } TreeItem::Class { class_idx, .. } => { let cls = &app.classes[*class_idx]; @@ -378,7 +378,7 @@ fn build_code_viewer_content(app: &mut App) -> (String, Vec>) { } else { &cls.fields[mi - cls.methods.len()] }; - let title = format!(" {} — {} ", member.kind.label(), member.raw_name); + let title = format!(" {} - {} ", member.kind.label(), member.raw_name); let mut lines: Vec = Vec::new(); // Signature / type @@ -393,7 +393,7 @@ fn build_code_viewer_content(app: &mut App) -> (String, Vec>) { if styled_disasm.is_empty() { lines.push(Line::default()); lines.push(Line::from(Span::styled( - "(abstract / native — no code)", + "(abstract / native - no code)", Style::default().fg(DIM).add_modifier(Modifier::ITALIC), ))); if app.is_editable() { diff --git a/src/file/builder.rs b/src/file/builder.rs index 266e1e6..e16a9fa 100644 --- a/src/file/builder.rs +++ b/src/file/builder.rs @@ -136,7 +136,7 @@ fn next_token(s: &str) -> Result<(Token, &str)> { Ok((Token::Register(n), &s[end..])) } - // p-register: p0..p255 — kept as Token::PRegister; caller resolves to vN + // p-register: p0..p255 - kept as Token::PRegister; caller resolves to vN Some(b'p') => { let end = s[1..] .find(|c: char| !c.is_ascii_digit()) @@ -234,7 +234,7 @@ fn next_token(s: &str) -> Result<(Token, &str)> { // Type/Method/Field reference: L...; or [[... Some(b'L') | Some(b'[') => parse_reference(s), - // Primitive type descriptor (V, I, B, etc.) — treat as type ref + // Primitive type descriptor (V, I, B, etc.) - treat as type ref Some(b'V') | Some(b'B') | Some(b'C') @@ -250,7 +250,7 @@ fn next_token(s: &str) -> Result<(Token, &str)> { Ok((Token::TypeRef(s[..end].to_string()), &s[end..])) } - // Signed integer literal (no # prefix) — branch offsets (+5, -3) and bare numbers + // Signed integer literal (no # prefix) - branch offsets (+5, -3) and bare numbers Some(c) if (*c as char).is_ascii_digit() || *c == b'-' || *c == b'+' => { let end = s .find(|c: char| c == ',' || c == '}' || c.is_whitespace()) @@ -937,7 +937,7 @@ fn require_reg_list(tokens: &[Token], idx: usize) -> Result> { match tokens.get(idx) { Some(Token::RegList(r)) => Ok(r.clone()), Some(Token::RegRange(first, last)) => Ok((*first..=*last).collect()), - // Single register not in braces — still valid + // Single register not in braces - still valid Some(Token::Register(r)) | Some(Token::PRegister(r)) => Ok(vec![*r]), other => Err(DexError::DexFileError(format!( "expected register list at token {idx}, got {other:?}" @@ -997,7 +997,7 @@ fn make_ref( } } -// -- DexIrBuilder — high-level builder facade ---------------------------------- +// -- DexIrBuilder - high-level builder facade ---------------------------------- /// High-level builder that progressively constructs a [`DexIr`] and emits classes, /// methods, and fields through a fluent API. @@ -1228,14 +1228,14 @@ mod tests { #[test] fn parse_typed_literal_int() { - // #int +65536 — emitted by dump.rs imm_typed_u32 + // #int +65536 - emitted by dump.rs imm_typed_u32 let n = parse_line("const/high16 v0, #int +65536").unwrap(); assert_eq!(n.literal, 65536); } #[test] fn parse_typed_literal_long() { - // #long +1234567890 — emitted by dump.rs imm_typed_u64 + // #long +1234567890 - emitted by dump.rs imm_typed_u64 let n = parse_line("const-wide v0, #long +1234567890").unwrap(); assert_eq!(n.literal, 1234567890); } diff --git a/src/file/ir.rs b/src/file/ir.rs index 6f5e060..1c92491 100644 --- a/src/file/ir.rs +++ b/src/file/ir.rs @@ -3,7 +3,7 @@ //! The IR stores everything symbolically: class/type/field/method names are kept //! as plain `String`s, and integer pool indices are assigned only at write time by //! [`crate::file::writer::DexWriter`]. This makes the representation trivially -//! composable — add a class, add a method, splice in instructions — without having +//! composable - add a class, add a method, splice in instructions - without having //! to maintain cross-references by hand. //! //! # Quick start @@ -41,7 +41,7 @@ use crate::file::instruction::Code; /// Key identifying a method prototype. /// -/// Sorted by return type first, then parameter types lexicographically — the +/// Sorted by return type first, then parameter types lexicographically - the /// same ordering the DEX spec requires for `proto_ids`. #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct ProtoKey { diff --git a/src/file/writer.rs b/src/file/writer.rs index e4f821f..5d08c4f 100644 --- a/src/file/writer.rs +++ b/src/file/writer.rs @@ -120,11 +120,11 @@ struct Pools { protos: Vec, proto_idx: HashMap, - /// Sorted field keys: (class_desc, name, field_type) — all MUTF-8. + /// Sorted field keys: (class_desc, name, field_type) - all MUTF-8. fields: Vec, field_idx: FieldHashMap, - /// Sorted method keys: (class_desc, name, proto) — class+name as MUTF-8. + /// Sorted method keys: (class_desc, name, proto) - class+name as MUTF-8. methods: Vec<(Vec, Vec, ProtoKey)>, method_idx: HashMap<(Vec, Vec, ProtoKey), u32>, } @@ -429,7 +429,7 @@ impl DexWriter { // checksum placeholder (4 bytes) let checksum_pos = out.len(); out.write_u32(0); - // SHA-1 signature (20 bytes — we leave it as zeros; most tools don't verify) + // SHA-1 signature (20 bytes - we leave it as zeros; most tools don't verify) for _ in 0..20 { out.write_u8(0); } diff --git a/src/vdex/mod.rs b/src/vdex/mod.rs index 2f1c501..1d28409 100644 --- a/src/vdex/mod.rs +++ b/src/vdex/mod.rs @@ -109,9 +109,9 @@ impl VdexFileHeader { /// /// Layout (12 bytes): /// ```text -/// section_kind u32 LE — VdexSection discriminant -/// section_offset u32 LE — byte offset from start of file -/// section_size u32 LE — byte length of section (0 = absent) +/// section_kind u32 LE - VdexSection discriminant +/// section_offset u32 LE - byte offset from start of file +/// section_size u32 LE - byte length of section (0 = absent) /// ``` #[derive(Debug, Clone, Copy)] #[repr(C)] @@ -150,7 +150,7 @@ pub type MmapVdexFile<'a> = VdexFile<'a, memmap2::Mmap>; /// let vdex = VdexFile::from_raw_parts(&mmap, ())?; /// ``` pub struct VdexFile<'a, C: DexContainer<'a> = memmap2::Mmap> { - /// Reference to the backing container — same field name and semantics as + /// Reference to the backing container - same field name and semantics as /// `DexFile::mmap`. pub(crate) mmap: &'a C, /// Owned copy of the file-level header, read via `plain::copy_from_bytes` @@ -164,7 +164,7 @@ impl<'a, C: DexContainer<'a>> VdexFile<'a, C> { // -- Helper: raw slice from container -------------------------------------- /// Returns `&'a [u8]` for `container[start..end]`, propagating the - /// container's lifetime — the same pattern used by `DexFile::get_section`. + /// container's lifetime - the same pattern used by `DexFile::get_section`. #[inline] fn raw_slice(base: &'a C, start: usize, end: usize) -> &'a [u8] { &base[start..end] @@ -261,7 +261,7 @@ impl<'a, C: DexContainer<'a>> VdexFile<'a, C> { /// Returns a raw `&'a [u8]` for the given section (empty when absent). /// - /// The returned slice borrows directly from the container — no copying. + /// The returned slice borrows directly from the container - no copying. pub fn get_section_data(&self, kind: VdexSection) -> &'a [u8] { let Some(hdr) = self.get_section_header(kind) else { return &[]; @@ -313,7 +313,7 @@ impl<'a, C: DexContainer<'a>> VdexFile<'a, C> { } /// Returns the raw bytes of the DEX file at `index` as a `&'a [u8]` slice - /// that borrows directly from the container — no copying. + /// that borrows directly from the container - no copying. /// /// DEX files inside the section are stored back-to-back with 4-byte /// alignment (matching `OatWriter::SeekToDexFiles`). @@ -548,11 +548,11 @@ mod tests { checksum_section_offset as u32, checksum_section_size as u32, ); - // kDexFileSection — absent + // kDexFileSection - absent write_section(&mut out, 1, VdexSection::DexFile as u32, 0, 0); - // kVerifierDepsSection — absent + // kVerifierDepsSection - absent write_section(&mut out, 2, VdexSection::VerifierDeps as u32, 0, 0); - // kTypeLookupTableSection — absent + // kTypeLookupTableSection - absent write_section(&mut out, 3, VdexSection::TypeLookupTable as u32, 0, 0); // Write checksums. @@ -630,7 +630,7 @@ mod tests { let mut data = build_vdex(&[]); data[4] = b'0'; data[5] = b'0'; - data[6] = b'1'; // version "001\0" — not supported + data[6] = b'1'; // version "001\0" - not supported assert!(matches!( VdexFile::from_raw_parts(&data), Err(DexError::UnknownVdexVersion { .. }) From ac9059e83766e6968a8a007dc1ac82f372680d12 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Mon, 6 Apr 2026 09:28:13 +0200 Subject: [PATCH 44/46] docs: update docstrings --- LICENSE | 0 python/dexrs/__init__.py | 20 ++++++++++---------- python/dexrs/_internal/__init__.pyi | 22 +++++++++++----------- python/dexrs/_internal/code/__init__.pyi | 16 ++++++++-------- python/dexrs/_internal/editor.pyi | 2 +- python/dexrs/_internal/file.pyi | 6 +++--- python/dexrs/_internal/primitive.pyi | 2 +- python/dexrs/_internal/structs.pyi | 2 +- python/dexrs/builder.py | 14 +++++++------- python/dexrs/code.py | 16 ++++++++-------- python/dexrs/container.py | 4 ++-- python/dexrs/editor.py | 4 ++-- python/dexrs/leb128.py | 6 +++--- python/dexrs/mutf8.py | 8 ++++---- 14 files changed, 61 insertions(+), 61 deletions(-) mode change 100755 => 100644 LICENSE diff --git a/LICENSE b/LICENSE old mode 100755 new mode 100644 diff --git a/python/dexrs/__init__.py b/python/dexrs/__init__.py index d4c1043..2dbb4b3 100755 --- a/python/dexrs/__init__.py +++ b/python/dexrs/__init__.py @@ -1,4 +1,4 @@ -"""dexrs — Python bindings for the Rust DEX file parsing library. +"""dexrs - Python bindings for the Rust DEX file parsing library. This package exposes the full public API of the ``dexrs`` Rust crate via PyO3-generated native extensions, re-exported under clean Python names. @@ -18,15 +18,15 @@ Submodules ---------- -- :mod:`dexrs.file` — :class:`DexFile` and :class:`VerifyPreset` -- :mod:`dexrs.container` — :class:`InMemoryDexContainer`, :class:`FileDexContainer` -- :mod:`dexrs.editor` — :class:`DexEditor` for mutation -- :mod:`dexrs.code` — Instructions, opcodes, and operand helpers -- :mod:`dexrs.error` — :exc:`PyDexError` -- :mod:`dexrs.leb128` — LEB128 varint decoders -- :mod:`dexrs.mutf8` — MUTF-8 ↔ str conversion -- :mod:`dexrs.primitive` — :class:`PrimitiveType` enum -- :mod:`dexrs.type_lookup_table` — :class:`TypeLookupTable` +- :mod:`dexrs.file` - :class:`DexFile` and :class:`VerifyPreset` +- :mod:`dexrs.container` - :class:`InMemoryDexContainer`, :class:`FileDexContainer` +- :mod:`dexrs.editor` - :class:`DexEditor` for mutation +- :mod:`dexrs.code` - Instructions, opcodes, and operand helpers +- :mod:`dexrs.error` - :exc:`PyDexError` +- :mod:`dexrs.leb128` - LEB128 varint decoders +- :mod:`dexrs.mutf8` - MUTF-8 ↔ str conversion +- :mod:`dexrs.primitive` - :class:`PrimitiveType` enum +- :mod:`dexrs.type_lookup_table` - :class:`TypeLookupTable` """ # some shortcuts from .file import DexFile, VerifyPreset diff --git a/python/dexrs/_internal/__init__.pyi b/python/dexrs/_internal/__init__.pyi index 1dd3db2..972b456 100755 --- a/python/dexrs/_internal/__init__.pyi +++ b/python/dexrs/_internal/__init__.pyi @@ -2,17 +2,17 @@ Sub-modules exposed by the Rust extension: -- :mod:`dexrs._internal.annotation` — class annotation accessors -- :mod:`dexrs._internal.class_accessor` — class data iterators -- :mod:`dexrs._internal.code` — instructions, opcodes, and operand accessors -- :mod:`dexrs._internal.container` — DEX container types (memory / file) -- :mod:`dexrs._internal.editor` — mutable DEX editor -- :mod:`dexrs._internal.error` — :exc:`PyDexError` exception type -- :mod:`dexrs._internal.leb128` — LEB128 varint decoders -- :mod:`dexrs._internal.mutf8` — MUTF-8 / UTF-16 conversion utilities -- :mod:`dexrs._internal.primitive` — Java primitive-type enum -- :mod:`dexrs._internal.structs` — plain-data structs mirroring DEX on-disk layout -- :mod:`dexrs._internal.type_lookup_table` — O(1) type-descriptor lookup table +- :mod:`dexrs._internal.annotation` - class annotation accessors +- :mod:`dexrs._internal.class_accessor` - class data iterators +- :mod:`dexrs._internal.code` - instructions, opcodes, and operand accessors +- :mod:`dexrs._internal.container` - DEX container types (memory / file) +- :mod:`dexrs._internal.editor` - mutable DEX editor +- :mod:`dexrs._internal.error` - :exc:`PyDexError` exception type +- :mod:`dexrs._internal.leb128` - LEB128 varint decoders +- :mod:`dexrs._internal.mutf8` - MUTF-8 / UTF-16 conversion utilities +- :mod:`dexrs._internal.primitive` - Java primitive-type enum +- :mod:`dexrs._internal.structs` - plain-data structs mirroring DEX on-disk layout +- :mod:`dexrs._internal.type_lookup_table` - O(1) type-descriptor lookup table """ from . import annotation as annotation diff --git a/python/dexrs/_internal/code/__init__.pyi b/python/dexrs/_internal/code/__init__.pyi index ba17069..b99e599 100755 --- a/python/dexrs/_internal/code/__init__.pyi +++ b/python/dexrs/_internal/code/__init__.pyi @@ -2,14 +2,14 @@ Provides types for iterating and inspecting Dalvik bytecode instructions. -- :class:`CodeItemAccessor` — iterate over instructions in a method body. -- :class:`Instruction` — a single decoded Dalvik instruction. -- :class:`Code` — opcode enum (``NOP``, ``MOVE``, ``INVOKE_VIRTUAL``, …). -- :class:`Format` — instruction format enum (``k10x``, ``k35c``, …). -- :class:`IndexType` — type of the index operand in an instruction. -- :class:`FillArrayDataPayload` — payload for ``fill-array-data``. -- :class:`SparseSwitchPayload` — payload for ``sparse-switch``. -- :class:`PackedSwitchPayload` — payload for ``packed-switch``. +- :class:`CodeItemAccessor` - iterate over instructions in a method body. +- :class:`Instruction` - a single decoded Dalvik instruction. +- :class:`Code` - opcode enum (``NOP``, ``MOVE``, ``INVOKE_VIRTUAL``, …). +- :class:`Format` - instruction format enum (``k10x``, ``k35c``, …). +- :class:`IndexType` - type of the index operand in an instruction. +- :class:`FillArrayDataPayload` - payload for ``fill-array-data``. +- :class:`SparseSwitchPayload` - payload for ``sparse-switch``. +- :class:`PackedSwitchPayload` - payload for ``packed-switch``. """ from typing import List, Optional diff --git a/python/dexrs/_internal/editor.pyi b/python/dexrs/_internal/editor.pyi index 3ad61f3..dc01472 100755 --- a/python/dexrs/_internal/editor.pyi +++ b/python/dexrs/_internal/editor.pyi @@ -6,7 +6,7 @@ class DexEditor: Construct an editor from a file path or raw bytes, apply zero or more mutations, then call :meth:`build` or :meth:`write_to` to finalise. - The editor is **consumed** by :meth:`build` or :meth:`write_to` — any + The editor is **consumed** by :meth:`build` or :meth:`write_to` - any subsequent call will raise :exc:`IOError`. Example:: diff --git a/python/dexrs/_internal/file.pyi b/python/dexrs/_internal/file.pyi index 0ba1cb8..f307922 100755 --- a/python/dexrs/_internal/file.pyi +++ b/python/dexrs/_internal/file.pyi @@ -2,8 +2,8 @@ The central types for parsing and querying Android DEX files. -- :class:`VerifyPreset` — controls which header checks to run on open. -- :class:`DexFile` — the parsed DEX image; exposes read-only accessors for +- :class:`VerifyPreset` - controls which header checks to run on open. +- :class:`DexFile` - the parsed DEX image; exposes read-only accessors for every section described by the `AOSP DEX format specification`_. .. _AOSP DEX format specification: @@ -41,7 +41,7 @@ class VerifyPreset: ALL: "VerifyPreset" """Run all available checks (magic, checksum, and structural validation).""" NONE: "VerifyPreset" - """Skip all verification — fastest open, but unsafe on untrusted input.""" + """Skip all verification - fastest open, but unsafe on untrusted input.""" CHECKSUM_ONLY: "VerifyPreset" """Verify the Adler32 checksum only, skipping deeper structural checks.""" diff --git a/python/dexrs/_internal/primitive.pyi b/python/dexrs/_internal/primitive.pyi index a754888..8a3d8a9 100755 --- a/python/dexrs/_internal/primitive.pyi +++ b/python/dexrs/_internal/primitive.pyi @@ -22,7 +22,7 @@ class PrimitiveType: """ Not: "PrimitiveType" - """Sentinel value — not a primitive type.""" + """Sentinel value - not a primitive type.""" Boolean: "PrimitiveType" """Java ``boolean`` (descriptor ``Z``).""" diff --git a/python/dexrs/_internal/structs.pyi b/python/dexrs/_internal/structs.pyi index 4dcd622..482586b 100755 --- a/python/dexrs/_internal/structs.pyi +++ b/python/dexrs/_internal/structs.pyi @@ -25,7 +25,7 @@ class Header: header_size: int """Size of this header in bytes (112 for standard DEX, 120 for DEX 041+).""" endian_tag: int - """Endianness tag — always ``0x12345678`` for standard DEX.""" + """Endianness tag - always ``0x12345678`` for standard DEX.""" link_size: int """Size of the link section (0 for statically linked files).""" link_off: int diff --git a/python/dexrs/builder.py b/python/dexrs/builder.py index ca0a2a4..74c82a5 100755 --- a/python/dexrs/builder.py +++ b/python/dexrs/builder.py @@ -2,15 +2,15 @@ Classes ------- -- :class:`DexIrBuilder` — Assemble a full DEX file from class definitions. -- :class:`IrClassDef` — Define a class (fields, methods, superclass…). -- :class:`IrMethodDef` — Define a method with optional bytecode body. -- :class:`IrFieldDef` — Define a field (convenience; usually use the +- :class:`DexIrBuilder` - Assemble a full DEX file from class definitions. +- :class:`IrClassDef` - Define a class (fields, methods, superclass…). +- :class:`IrMethodDef` - Define a method with optional bytecode body. +- :class:`IrFieldDef` - Define a field (convenience; usually use the ``add_*_field`` methods on :class:`IrClassDef`). -- :class:`CodeBuilder` — Assemble Dalvik bytecode from disassembly text lines. -- :class:`CodeDef` — An assembled code item (return value of +- :class:`CodeBuilder` - Assemble Dalvik bytecode from disassembly text lines. +- :class:`CodeDef` - An assembled code item (return value of :meth:`CodeBuilder.build`). -- :class:`ProtoKey` — Method prototype (return type + parameter types). +- :class:`ProtoKey` - Method prototype (return type + parameter types). Quick-start ----------- diff --git a/python/dexrs/code.py b/python/dexrs/code.py index ffb2a74..d28cf54 100755 --- a/python/dexrs/code.py +++ b/python/dexrs/code.py @@ -3,14 +3,14 @@ This module re-exports the full ``code`` sub-package from the native extension, which provides: -- :class:`CodeItemAccessor` — iterate over instructions in a method body. -- :class:`Instruction` — a single decoded Dalvik instruction. -- :class:`Code` — opcode enum (``NOP``, ``MOVE``, ``INVOKE_VIRTUAL``, …). -- :class:`Format` — instruction format enum (``k10x``, ``k35c``, …). -- :class:`IndexType` — index-operand type enum. -- :data:`code_flags`, :data:`verify_flags`, :data:`flags` — flag constant modules. -- :data:`signatures` — well-known pseudo-instruction signatures. -- :data:`vreg` — virtual-register operand accessor functions. +- :class:`CodeItemAccessor` - iterate over instructions in a method body. +- :class:`Instruction` - a single decoded Dalvik instruction. +- :class:`Code` - opcode enum (``NOP``, ``MOVE``, ``INVOKE_VIRTUAL``, …). +- :class:`Format` - instruction format enum (``k10x``, ``k35c``, …). +- :class:`IndexType` - index-operand type enum. +- :data:`code_flags`, :data:`verify_flags`, :data:`flags` - flag constant modules. +- :data:`signatures` - well-known pseudo-instruction signatures. +- :data:`vreg` - virtual-register operand accessor functions. Example:: diff --git a/python/dexrs/container.py b/python/dexrs/container.py index e9744e4..1959f32 100755 --- a/python/dexrs/container.py +++ b/python/dexrs/container.py @@ -4,8 +4,8 @@ :meth:`~dexrs.DexFile.from_bytes` or :meth:`~dexrs.DexFile.from_file` when opening a DEX file. -- :class:`InMemoryDexContainer` — wraps an in-memory ``bytes`` buffer. -- :class:`FileDexContainer` — memory-maps a file on disk (zero-copy reads). +- :class:`InMemoryDexContainer` - wraps an in-memory ``bytes`` buffer. +- :class:`FileDexContainer` - memory-maps a file on disk (zero-copy reads). Example:: diff --git a/python/dexrs/editor.py b/python/dexrs/editor.py index d30b6d6..3d3b8b5 100755 --- a/python/dexrs/editor.py +++ b/python/dexrs/editor.py @@ -1,7 +1,7 @@ """Thin Python wrapper re-exporting :class:`DexEditor`. -:class:`DexEditor` provides targeted in-place mutations of a DEX file — -renaming classes, changing access flags, clearing hidden-API annotations — +:class:`DexEditor` provides targeted in-place mutations of a DEX file - +renaming classes, changing access flags, clearing hidden-API annotations - without requiring a full re-assembly of the DEX. The editor is **consumed** once :meth:`~DexEditor.build` or diff --git a/python/dexrs/leb128.py b/python/dexrs/leb128.py index 46290d4..50adee2 100755 --- a/python/dexrs/leb128.py +++ b/python/dexrs/leb128.py @@ -8,9 +8,9 @@ Functions --------- -- :func:`decode_uleb128` — unsigned LEB128 -> non-negative :class:`int`. -- :func:`decode_sleb128` — signed LEB128 -> signed :class:`int`. -- :func:`decode_leb128p1` — ``ULEB128p1`` encoding (value stored as ``n+1``), +- :func:`decode_uleb128` - unsigned LEB128 -> non-negative :class:`int`. +- :func:`decode_sleb128` - signed LEB128 -> signed :class:`int`. +- :func:`decode_leb128p1` - ``ULEB128p1`` encoding (value stored as ``n+1``), where ``-1`` encodes the special *no-index* sentinel. Example:: diff --git a/python/dexrs/mutf8.py b/python/dexrs/mutf8.py index c3cba01..941dc20 100755 --- a/python/dexrs/mutf8.py +++ b/python/dexrs/mutf8.py @@ -13,11 +13,11 @@ Functions --------- -- :func:`mutf8_to_str` — strict MUTF-8 bytes -> Python :class:`str`. -- :func:`mutf8_to_str_lossy` — lenient variant; replaces invalid sequences +- :func:`mutf8_to_str` - strict MUTF-8 bytes -> Python :class:`str`. +- :func:`mutf8_to_str_lossy` - lenient variant; replaces invalid sequences with the Unicode replacement character ``U+FFFD``. -- :func:`str_to_mutf8` — Python :class:`str` -> MUTF-8 bytes (strict). -- :func:`str_to_mutf8_lossy` — lenient variant; skips unencodable code points. +- :func:`str_to_mutf8` - Python :class:`str` -> MUTF-8 bytes (strict). +- :func:`str_to_mutf8_lossy` - lenient variant; skips unencodable code points. Example:: From 63ef6d3adf6f6de390330db669799173b6f472b0 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Mon, 6 Apr 2026 10:29:16 +0200 Subject: [PATCH 45/46] docs: update readme --- LICENSE | 0 README.md | 213 ++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 182 insertions(+), 31 deletions(-) mode change 100644 => 100755 LICENSE diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md index a0da2bf..b985b17 100644 --- a/README.md +++ b/README.md @@ -1,60 +1,211 @@ # DEXrs -**DEXrs** is an exploratory project in Rust aimed at developing a decompiler for Android executable files (DEX files). It currently covers a low level DEX file parser and disassembler with a Python API. +> [!IMPORTANT] +> Branch `v2-rewrite` contains a complete rewrite of this library including a Python binding. Installation is as follows: +> +> Crate: +> ```bash +> cargo add --git https://github.com/MatrixEditor/dexrs --branch dev/v2-rewrite +> cargo install --git https://github.com/MatrixEditor/dexrs --branch dev/v2-rewrite +> ``` +> +> Python: +> ```bash +> pip install -v git+https://github.com/MatrixEditor/dexrs@dev/v2-rewrite +> ``` + +**DEXrs** is a Rust library and CLI tool for parsing, inspecting, and modifying Android DEX files. It covers a zero-copy parser, a Dalvik disassembler, a full-featured terminal UI, a DEX modification API, and Python bindings via PyO3. + +#### What this project covers + +- [x] Zero-copy, lazy DEX file parser (fuzzing-hardened) +- [x] Dalvik bytecode disassembler +- [x] `dexrs` CLI — 12 subcommands for inspection and modification +- [x] Interactive TUI (`dexrs inspect`) via ratatui/crossterm +- [x] DEX modification API — in-place patching and structural editing +- [x] Python extension via PyO3 (parser + editor) +- [ ] Benchmarks (WIP) +- [ ] Smali disassembler / decompiler -#### What this project already covers: -- [x] A (*blazingly fast* 🔥) DEX file parser that utilizes - - [x] *zero-copy* wherever applicable - - [x] *lazy-parsing* all the time - - [x] respect fuzzing tests to make sure there's no panic -- [x] Python extension using pyo3 for Pythonists -- [x] A simple disassembler for Dalvik byte code -- [ ] Benchmarks are WIP, but present -- [ ] A simplistic Smali disassembler +## Installation -#### Roadmap +```bash +# CLI binary +cargo install --git https://github.com/MatrixEditor/dexrs dexrs +# with TUI +cargo install --git https://github.com/MatrixEditor/dexrs -F tui dexrs -- [ ] Basic Java decompiler -- [ ] Bytecode modification and DEX rebuild +# Python package +pip install -v git+https://github.com/MatrixEditor/dexrs@dev/v2-rewrite +``` -## Installation +## CLI — `dexrs` +``` +dexrs [--no-color] [--no-verify] [--json] [OPTIONS] +``` + +### Inspection + +| Command | Description | +|---|---| +| `info` | File header, integrity hashes, section counts | +| `map` | Map list — all section types and their offsets | +| `classes` | All class definitions with access flags | +| `class --class ` | Single class (fields + methods) | +| `methods` | All methods across all classes | +| `fields` | All fields across all classes | +| `disasm --class --method ` | Disassemble one method | +| `strings` | Full string pool | +| `types` | All type descriptors | +| `inspect` | Interactive TUI (see below) | -Install DEXrs using Cargo: ```bash -cargo install --git https://github.com/MatrixEditor/dexrs dexrs +dexrs info classes.dex +dexrs classes classes.dex --no-color +dexrs class classes.dex --class LMain; +dexrs disasm classes.dex --class LMain; --method main --json ``` -Or directly using pip: +### Modification + +#### `patch` — in-place (overwrites source file) + ```bash -pip install -ve dexrs@git+https://github.com/MatrixEditor/dexrs.git +# Set class access flags +dexrs patch flags --class --flags + +# Overwrite a single instruction word +dexrs patch insn --offset --pc --word +``` + +```bash +dexrs patch flags classes.dex --class LMain; --flags 0x11 # public final +``` + +#### `edit` — structural (writes to `--output`) + +```bash +dexrs edit rename-class --output +dexrs edit set-flags --class --flags --output +dexrs edit set-method-flags --class --method --flags --output +dexrs edit clear-hiddenapi --output +``` + +```bash +dexrs edit rename-class classes.dex LMain; LRenamedMain; --output out.dex +dexrs edit set-flags classes.dex --class LMain; --flags 0x21 --output out.dex ``` -## Usage -### Disassembling DEX files +## Rust API -Here’s a quick example of how to parse a DEX file: +### Parsing ```rust -let mut f = File::open("classes.dex").expect("file not found"); -// parse DEX input and verify its contents -let container = DexFileContainer::new(&file) +use dexrs::file::{verifier::VerifyPreset, DexFile, DexFileContainer, DexLocation}; + +// From a file with verification +let file = std::fs::File::open("classes.dex")?; +let dex = DexFileContainer::new(&file) .verify(true) - .verify_checksum(true); + .verify_checksum(true) + .open()?; -// please use the examples/ directory for more usage information -let dex = container.open()?; +// From memory +let dex = DexFile::open(data, DexLocation::InMemory, VerifyPreset::None)?; ``` -In-memory parsing is also allowed: +See `examples/parse_dex_file.rs` and `examples/dex_basic_ops.rs` for full usage. + +### DEX modification — `DexEditor` + +`DexEditor` owns the DEX bytes and exposes named mutations. Finalise with +`build()` -> `Vec` or `write_to(path)` — both recalculate the Adler32 checksum. + +```rust +use std::path::Path; +use dexrs::file::DexEditor; + +let mut editor = DexEditor::from_file(Path::new("classes.dex"))?; +// or: DexEditor::from_bytes(bytes)? + +// Accepts dotted ("com.example.Foo"), slash, or descriptor ("Lcom/example/Foo;") form +editor.set_class_access_flags("LMain;", 0x0011 /* public final */)?; +editor.rename_class("LMain;", "LRenamedMain;")?; +editor.set_method_access_flags("LMain;", "main", 0x0009 /* public static */)?; +editor.clear_hiddenapi_flags().ok(); // no-op if section absent + +// Finalise +let bytes: Vec = editor.build()?; +// or: +editor.write_to(Path::new("out.dex"))?; +``` + +### Low-level checksum + ```rust -let data: [u8] = ...; -let dex = DexFile::open(&data, DexLocation::InMemory, VerifyPreset::All)?; +use dexrs::file::patch::update_checksum; + +let mut raw = std::fs::read("classes.dex")?; +// ... raw byte mutations ... +update_checksum(&mut raw); // recalculate Adler32 in-place +``` + +See `examples/dex_edit.rs` for a complete runnable example. + + +## Python API + +### Parsing + +```python +from dexrs import DexFile, VerifyPreset, FileDexContainer + +container = FileDexContainer("classes.dex") +dex = DexFile.from_container(container, verify=VerifyPreset.All) + +for cls in dex.get_class_defs(): + print(cls) +``` + +### DEX modification — `DexEditor` + +```python +from dexrs import DexEditor + +editor = DexEditor.from_file("classes.dex") +# or: DexEditor.from_bytes(open("classes.dex","rb").read()) + +editor.set_class_access_flags("LMain;", 0x0001) # public +editor.rename_class("LMain;", "LRenamedMain;") # rebuild string pool +editor.set_method_access_flags("LMain;", "main", 0x0009) # public static +editor.clear_hiddenapi_flags() # strip hidden-API metadata + +# Get bytes +data = editor.build() +open("out.dex", "wb").write(data) + +# Or write directly (editor is consumed) +editor.write_to("out.dex") ``` +#### Common access flag values + +| Value | Meaning | +|---|---| +| `0x0001` | `public` | +| `0x0002` | `private` | +| `0x0004` | `protected` | +| `0x0008` | `static` | +| `0x0010` | `final` | +| `0x0100` | `native` | +| `0x0400` | `abstract` | +| `0x1000` | `synthetic` | + + ## License -This project is licensed under the [MIT license](LICENSE) \ No newline at end of file +This project is licensed under the [MIT license](LICENSE). From 01fd39c8682db42c0802ca1721abbef985a088cc Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Mon, 6 Apr 2026 10:31:01 +0200 Subject: [PATCH 46/46] docs: update license --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index a8ef4b4..a592d61 100755 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024, MatrixEditor +Copyright (c) 2024-2026, MatrixEditor Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal