diff --git a/java-assets/compiled-classes/Instructions.class b/java-assets/compiled-classes/Instructions.class new file mode 100644 index 0000000..4d05d2d Binary files /dev/null and b/java-assets/compiled-classes/Instructions.class differ diff --git a/java-assets/src/uk/co/palmr/classfileparser/Instructions.java b/java-assets/src/uk/co/palmr/classfileparser/Instructions.java new file mode 100644 index 0000000..cf99d42 --- /dev/null +++ b/java-assets/src/uk/co/palmr/classfileparser/Instructions.java @@ -0,0 +1,20 @@ +package uk.co.palmr.classfileparser; + +public class Instructions { + public static int[] test(int x) { + int a = 0; + switch (x) { + case 1: a = 10; break; + case 2: a = 20; break; + case 3: a = 30; break; + } + int b = 0; + switch (x) { + case 100: b = 2; break; + case 1000: b = 3; break; + case 10000: b = 4; break; + } + System.out.printf("%d + %d = %d\n", a, b, a + b); + return new int[]{ a, b, a + b }; + } +} diff --git a/src/code_attribute/mod.rs b/src/code_attribute/mod.rs new file mode 100644 index 0000000..bfbf1c3 --- /dev/null +++ b/src/code_attribute/mod.rs @@ -0,0 +1,7 @@ +mod parser; +mod types; + +pub use self::types::*; + +pub use self::parser::code_parser; +pub use self::parser::instruction_parser; diff --git a/src/code_attribute/parser.rs b/src/code_attribute/parser.rs new file mode 100644 index 0000000..1b9d4fc --- /dev/null +++ b/src/code_attribute/parser.rs @@ -0,0 +1,274 @@ +use code_attribute::types::Instruction; +use nom::{be_i16, be_i32, be_i8, be_u16, be_u32, be_u8, IResult, Offset}; + +fn offset<'a>(remaining: &'a [u8], input: &[u8]) -> IResult<&'a [u8], usize> { + Ok((remaining, input.offset(remaining))) +} + +fn align(input: &[u8], address: usize) -> IResult<&[u8], &[u8]> { + take!(input, (4 - address % 4) % 4) +} + +fn lookupswitch_parser(input: &[u8]) -> IResult<&[u8], Instruction> { + do_parse!( + input, + default: be_i32 + >> npairs: be_u32 + >> pairs: + count!( + do_parse!(lookup: be_i32 >> offset: be_i32 >> (lookup, offset)), + npairs as usize + ) + >> (Instruction::Lookupswitch { + default: default, + pairs: pairs + }) + ) +} + +fn tableswitch_parser(input: &[u8]) -> IResult<&[u8], Instruction> { + do_parse!( + input, + default: be_i32 + >> low: be_i32 + >> high: be_i32 + >> offsets: count!(be_i32, (high - low + 1) as usize) + >> (Instruction::Tableswitch { + default: default, + low: low, + high: high, + offsets: offsets + }) + ) +} + +pub fn code_parser(input: &[u8]) -> IResult<&[u8], Vec<(usize, Instruction)>> { + many0!( + input, + complete!(do_parse!( + address: apply!(offset, input) + >> instruction: apply!(instruction_parser, address) + >> (address, instruction) + )) + ) +} + +pub fn instruction_parser(input: &[u8], address: usize) -> IResult<&[u8], Instruction> { + switch!(input, be_u8, + 0x32 => value!(Instruction::Aaload) | + 0x53 => value!(Instruction::Aastore) | + 0x01 => value!(Instruction::Aconstnull) | + 0x19 => map!(be_u8, Instruction::Aload) | + 0x2a => value!(Instruction::Aload0) | + 0x2b => value!(Instruction::Aload1) | + 0x2c => value!(Instruction::Aload2) | + 0x2d => value!(Instruction::Aload3) | + 0xbd => map!(be_u16, Instruction::Anewarray) | + 0xb0 => value!(Instruction::Areturn) | + 0xbe => value!(Instruction::Arraylength) | + 0x3a => map!(be_u8, Instruction::Astore) | + 0x4b => value!(Instruction::Astore0) | + 0x4c => value!(Instruction::Astore1) | + 0x4d => value!(Instruction::Astore2) | + 0x4e => value!(Instruction::Astore3) | + 0xbf => value!(Instruction::Athrow) | + 0x33 => value!(Instruction::Baload) | + 0x54 => value!(Instruction::Bastore) | + 0x10 => map!(be_i8, Instruction::Bipush) | + 0x34 => value!(Instruction::Caload) | + 0x55 => value!(Instruction::Castore) | + 0xc0 => map!(be_u16, Instruction::Checkcast) | + 0x90 => value!(Instruction::D2f) | + 0x8e => value!(Instruction::D2i) | + 0x8f => value!(Instruction::D2l) | + 0x63 => value!(Instruction::Dadd) | + 0x31 => value!(Instruction::Daload) | + 0x52 => value!(Instruction::Dastore) | + 0x98 => value!(Instruction::Dcmpg) | + 0x97 => value!(Instruction::Dcmpl) | + 0x0e => value!(Instruction::Dconst0) | + 0x0f => value!(Instruction::Dconst1) | + 0x6f => value!(Instruction::Ddiv) | + 0x18 => map!(be_u8, Instruction::Dload) | + 0x26 => value!(Instruction::Dload0) | + 0x27 => value!(Instruction::Dload1) | + 0x28 => value!(Instruction::Dload2) | + 0x29 => value!(Instruction::Dload3) | + 0x6b => value!(Instruction::Dmul) | + 0x77 => value!(Instruction::Dneg) | + 0x73 => value!(Instruction::Drem) | + 0xaf => value!(Instruction::Dreturn) | + 0x39 => map!(be_u8, Instruction::Dstore) | + 0x47 => value!(Instruction::Dstore0) | + 0x48 => value!(Instruction::Dstore1) | + 0x49 => value!(Instruction::Dstore2) | + 0x4a => value!(Instruction::Dstore3) | + 0x67 => value!(Instruction::Dsub) | + 0x59 => value!(Instruction::Dup) | + 0x5a => value!(Instruction::Dupx1) | + 0x5b => value!(Instruction::Dupx2) | + 0x5c => value!(Instruction::Dup2) | + 0x5d => value!(Instruction::Dup2x1) | + 0x5e => value!(Instruction::Dup2x2) | + 0x8d => value!(Instruction::F2d) | + 0x8b => value!(Instruction::F2i) | + 0x8c => value!(Instruction::F2l) | + 0x62 => value!(Instruction::Fadd) | + 0x30 => value!(Instruction::Faload) | + 0x51 => value!(Instruction::Fastore) | + 0x96 => value!(Instruction::Fcmpg) | + 0x95 => value!(Instruction::Fcmpl) | + 0x0b => value!(Instruction::Fconst0) | + 0x0c => value!(Instruction::Fconst1) | + 0x0d => value!(Instruction::Fconst2) | + 0x6e => value!(Instruction::Fdiv) | + 0x17 => map!(be_u8, Instruction::Fload) | + 0x22 => value!(Instruction::Fload0) | + 0x23 => value!(Instruction::Fload1) | + 0x24 => value!(Instruction::Fload2) | + 0x25 => value!(Instruction::Fload3) | + 0x6a => value!(Instruction::Fmul) | + 0x76 => value!(Instruction::Fneg) | + 0x72 => value!(Instruction::Frem) | + 0xae => value!(Instruction::Freturn) | + 0x38 => map!(be_u8, Instruction::Fstore) | + 0x43 => value!(Instruction::Fstore0) | + 0x44 => value!(Instruction::Fstore1) | + 0x45 => value!(Instruction::Fstore2) | + 0x46 => value!(Instruction::Fstore3) | + 0x66 => value!(Instruction::Fsub) | + 0xb4 => map!(be_u16, Instruction::Getfield) | + 0xb2 => map!(be_u16, Instruction::Getstatic) | + 0xa7 => map!(be_i16, Instruction::Goto) | + 0xc8 => map!(be_i32, Instruction::GotoW) | + 0x91 => value!(Instruction::I2b) | + 0x92 => value!(Instruction::I2c) | + 0x87 => value!(Instruction::I2d) | + 0x86 => value!(Instruction::I2f) | + 0x85 => value!(Instruction::I2l) | + 0x93 => value!(Instruction::I2s) | + 0x60 => value!(Instruction::Iadd) | + 0x2e => value!(Instruction::Iaload) | + 0x7e => value!(Instruction::Iand) | + 0x4f => value!(Instruction::Iastore) | + 0x02 => value!(Instruction::Iconstm1) | + 0x03 => value!(Instruction::Iconst0) | + 0x04 => value!(Instruction::Iconst1) | + 0x05 => value!(Instruction::Iconst2) | + 0x06 => value!(Instruction::Iconst3) | + 0x07 => value!(Instruction::Iconst4) | + 0x08 => value!(Instruction::Iconst5) | + 0x6c => value!(Instruction::Idiv) | + 0xa5 => map!(be_i16, Instruction::IfAcmpeq) | + 0xa6 => map!(be_i16, Instruction::IfAcmpne) | + 0x9f => map!(be_i16, Instruction::IfIcmpeq) | + 0xa0 => map!(be_i16, Instruction::IfIcmpne) | + 0xa1 => map!(be_i16, Instruction::IfIcmplt) | + 0xa2 => map!(be_i16, Instruction::IfIcmpge) | + 0xa3 => map!(be_i16, Instruction::IfIcmpgt) | + 0xa4 => map!(be_i16, Instruction::IfIcmple) | + 0x99 => map!(be_i16, Instruction::Ifeq) | + 0x9a => map!(be_i16, Instruction::Ifne) | + 0x9b => map!(be_i16, Instruction::Iflt) | + 0x9c => map!(be_i16, Instruction::Ifge) | + 0x9d => map!(be_i16, Instruction::Ifgt) | + 0x9e => map!(be_i16, Instruction::Ifle) | + 0xc7 => map!(be_i16, Instruction::Ifnonnull) | + 0xc6 => map!(be_i16, Instruction::Ifnull) | + 0x84 => do_parse!(index: be_u8 >> value: be_i8 >> (Instruction::Iinc{index: index, value: value})) | + 0x15 => map!(be_u8, Instruction::Iload) | + 0x1a => value!(Instruction::Iload0) | + 0x1b => value!(Instruction::Iload1) | + 0x1c => value!(Instruction::Iload2) | + 0x1d => value!(Instruction::Iload3) | + 0x68 => value!(Instruction::Imul) | + 0x74 => value!(Instruction::Ineg) | + 0xc1 => map!(be_u16, Instruction::Instanceof) | + 0xba => do_parse!(index: be_u16 >> tag!(&[0, 0]) >> (Instruction::Invokedynamic(index))) | + 0xb9 => do_parse!(index: be_u16 >> count: be_u8 >> tag!(&[0]) >> (Instruction::Invokeinterface{index: index, count: count})) | + 0xb7 => map!(be_u16, Instruction::Invokespecial) | + 0xb8 => map!(be_u16, Instruction::Invokestatic) | + 0xb6 => map!(be_u16, Instruction::Invokevirtual) | + 0x80 => value!(Instruction::Ior) | + 0x70 => value!(Instruction::Irem) | + 0xac => value!(Instruction::Ireturn) | + 0x78 => value!(Instruction::Ishl) | + 0x7a => value!(Instruction::Ishr) | + 0x36 => map!(be_u8, Instruction::Istore) | + 0x3b => value!(Instruction::Istore0) | + 0x3c => value!(Instruction::Istore1) | + 0x3d => value!(Instruction::Istore2) | + 0x3e => value!(Instruction::Istore3) | + 0x64 => value!(Instruction::Isub) | + 0x7c => value!(Instruction::Iushr) | + 0x82 => value!(Instruction::Ixor) | + 0xa8 => map!(be_i16, Instruction::Jsr) | + 0xc9 => map!(be_i32, Instruction::JsrW) | + 0x8a => value!(Instruction::L2d) | + 0x89 => value!(Instruction::L2f) | + 0x88 => value!(Instruction::L2i) | + 0x61 => value!(Instruction::Ladd) | + 0x2f => value!(Instruction::Laload) | + 0x7f => value!(Instruction::Land) | + 0x50 => value!(Instruction::Lastore) | + 0x94 => value!(Instruction::Lcmp) | + 0x09 => value!(Instruction::Lconst0) | + 0x0a => value!(Instruction::Lconst1) | + 0x12 => map!(be_u8, Instruction::Ldc) | + 0x13 => map!(be_u16, Instruction::LdcW) | + 0x14 => map!(be_u16, Instruction::Ldc2W) | + 0x6d => value!(Instruction::Ldiv) | + 0x16 => map!(be_u8, Instruction::Lload) | + 0x1e => value!(Instruction::Lload0) | + 0x1f => value!(Instruction::Lload1) | + 0x20 => value!(Instruction::Lload2) | + 0x21 => value!(Instruction::Lload3) | + 0x69 => value!(Instruction::Lmul) | + 0x75 => value!(Instruction::Lneg) | + 0xab => preceded!(apply!(align, address + 1), lookupswitch_parser) | + 0x81 => value!(Instruction::Lor) | + 0x71 => value!(Instruction::Lrem) | + 0xad => value!(Instruction::Lreturn) | + 0x79 => value!(Instruction::Lshl) | + 0x7b => value!(Instruction::Lshr) | + 0x37 => map!(be_u8, Instruction::Lstore) | + 0x3f => value!(Instruction::Lstore0) | + 0x40 => value!(Instruction::Lstore1) | + 0x41 => value!(Instruction::Lstore2) | + 0x42 => value!(Instruction::Lstore3) | + 0x65 => value!(Instruction::Lsub) | + 0x7d => value!(Instruction::Lushr) | + 0x83 => value!(Instruction::Lxor) | + 0xc2 => value!(Instruction::Monitorenter) | + 0xc3 => value!(Instruction::Monitorexit) | + 0xc5 => do_parse!(index: be_u16 >> dimensions: be_u8 >> (Instruction::Multianewarray{index: index, dimensions: dimensions})) | + 0xbb => map!(be_u16, Instruction::New) | + 0xbc => map!(be_u8, Instruction::Newarray) | + 0x00 => value!(Instruction::Nop) | + 0x57 => value!(Instruction::Pop) | + 0x58 => value!(Instruction::Pop2) | + 0xb5 => map!(be_u16, Instruction::Putfield) | + 0xb3 => map!(be_u16, Instruction::Putstatic) | + 0xa9 => map!(be_u8, Instruction::Ret) | + 0xb1 => value!(Instruction::Return) | + 0x35 => value!(Instruction::Saload) | + 0x56 => value!(Instruction::Sastore) | + 0x11 => map!(be_i16, Instruction::Sipush) | + 0x5f => value!(Instruction::Swap) | + 0xaa => preceded!(apply!(align, address + 1), tableswitch_parser) | + 0xc4 => switch!(be_u8, + 0x19 => map!(be_u16, Instruction::AloadWide) | + 0x3a => map!(be_u16, Instruction::AstoreWide) | + 0x18 => map!(be_u16, Instruction::DloadWide) | + 0x39 => map!(be_u16, Instruction::DstoreWide) | + 0x17 => map!(be_u16, Instruction::FloadWide) | + 0x38 => map!(be_u16, Instruction::FstoreWide) | + 0x15 => map!(be_u16, Instruction::IloadWide) | + 0x36 => map!(be_u16, Instruction::IstoreWide) | + 0x16 => map!(be_u16, Instruction::LloadWide) | + 0x37 => map!(be_u16, Instruction::LstoreWide) | + 0xa9 => map!(be_u16, Instruction::RetWide) | + 0x84 => do_parse!(index: be_u16 >> value: be_i16 >> (Instruction::IincWide{index: index, value: value})) + ) + ) +} diff --git a/src/code_attribute/types.rs b/src/code_attribute/types.rs new file mode 100644 index 0000000..7946538 --- /dev/null +++ b/src/code_attribute/types.rs @@ -0,0 +1,236 @@ +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Instruction { + Aaload, + Aastore, + Aconstnull, + Aload(u8), + AloadWide(u16), + Aload0, + Aload1, + Aload2, + Aload3, + Anewarray(u16), + Areturn, + Arraylength, + Astore(u8), + AstoreWide(u16), + Astore0, + Astore1, + Astore2, + Astore3, + Athrow, + Baload, + Bastore, + Bipush(i8), + Caload, + Castore, + Checkcast(u16), + D2f, + D2i, + D2l, + Dadd, + Daload, + Dastore, + Dcmpg, + Dcmpl, + Dconst0, + Dconst1, + Ddiv, + Dload(u8), + DloadWide(u16), + Dload0, + Dload1, + Dload2, + Dload3, + Dmul, + Dneg, + Drem, + Dreturn, + Dstore(u8), + DstoreWide(u16), + Dstore0, + Dstore1, + Dstore2, + Dstore3, + Dsub, + Dup, + Dupx1, + Dupx2, + Dup2, + Dup2x1, + Dup2x2, + F2d, + F2i, + F2l, + Fadd, + Faload, + Fastore, + Fcmpg, + Fcmpl, + Fconst0, + Fconst1, + Fconst2, + Fdiv, + Fload(u8), + FloadWide(u16), + Fload0, + Fload1, + Fload2, + Fload3, + Fmul, + Fneg, + Frem, + Freturn, + Fstore(u8), + FstoreWide(u16), + Fstore0, + Fstore1, + Fstore2, + Fstore3, + Fsub, + Getfield(u16), + Getstatic(u16), + Goto(i16), + GotoW(i32), + I2b, + I2c, + I2d, + I2f, + I2l, + I2s, + Iadd, + Iaload, + Iand, + Iastore, + Iconstm1, + Iconst0, + Iconst1, + Iconst2, + Iconst3, + Iconst4, + Iconst5, + Idiv, + IfAcmpeq(i16), + IfAcmpne(i16), + IfIcmpeq(i16), + IfIcmpne(i16), + IfIcmplt(i16), + IfIcmpge(i16), + IfIcmpgt(i16), + IfIcmple(i16), + Ifeq(i16), + Ifne(i16), + Iflt(i16), + Ifge(i16), + Ifgt(i16), + Ifle(i16), + Ifnonnull(i16), + Ifnull(i16), + Iinc { + index: u8, + value: i8, + }, + IincWide { + index: u16, + value: i16, + }, + Iload(u8), + IloadWide(u16), + Iload0, + Iload1, + Iload2, + Iload3, + Imul, + Ineg, + Instanceof(u16), + Invokedynamic(u16), + Invokeinterface { + index: u16, + count: u8, + }, + Invokespecial(u16), + Invokestatic(u16), + Invokevirtual(u16), + Ior, + Irem, + Ireturn, + Ishl, + Ishr, + Istore(u8), + IstoreWide(u16), + Istore0, + Istore1, + Istore2, + Istore3, + Isub, + Iushr, + Ixor, + Jsr(i16), + JsrW(i32), + L2d, + L2f, + L2i, + Ladd, + Laload, + Land, + Lastore, + Lcmp, + Lconst0, + Lconst1, + Ldc(u8), + LdcW(u16), + Ldc2W(u16), + Ldiv, + Lload(u8), + LloadWide(u16), + Lload0, + Lload1, + Lload2, + Lload3, + Lmul, + Lneg, + Lookupswitch { + default: i32, + pairs: Vec<(i32, i32)>, + }, + Lor, + Lrem, + Lreturn, + Lshl, + Lshr, + Lstore(u8), + LstoreWide(u16), + Lstore0, + Lstore1, + Lstore2, + Lstore3, + Lsub, + Lushr, + Lxor, + Monitorenter, + Monitorexit, + Multianewarray { + index: u16, + dimensions: u8, + }, + New(u16), + Newarray(u8), + Nop, + Pop, + Pop2, + Putfield(u16), + Putstatic(u16), + Ret(u8), + RetWide(u16), + Return, + Saload, + Sastore, + Sipush(i16), + Swap, + Tableswitch { + default: i32, + low: i32, + high: i32, + offsets: Vec, + }, +} diff --git a/src/lib.rs b/src/lib.rs index 2d1e4b6..97ee27f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,8 @@ pub mod constant_info; pub mod field_info; pub mod method_info; +pub mod code_attribute; + pub mod parser; pub mod types; diff --git a/tests/code_attribute.rs b/tests/code_attribute.rs new file mode 100644 index 0000000..0730dff --- /dev/null +++ b/tests/code_attribute.rs @@ -0,0 +1,81 @@ +extern crate classfile_parser; + +use classfile_parser::attribute_info::code_attribute_parser; +use classfile_parser::class_parser; +use classfile_parser::code_attribute::{code_parser, instruction_parser, Instruction}; +use classfile_parser::method_info::MethodAccessFlags; + +#[test] +fn test_simple() { + let instruction = &[0x11, 0xff, 0xfe]; + assert_eq!( + Ok((&[][..], Instruction::Sipush(-2i16))), + instruction_parser(instruction, 0) + ); +} + +#[test] +fn test_wide() { + let instruction = &[0xc4, 0x15, 0xaa, 0xbb]; + assert_eq!( + Ok((&[][..], Instruction::IloadWide(0xaabb))), + instruction_parser(instruction, 0) + ); +} + +#[test] +fn test_alignment() { + let instructions = vec![ + ( + 3, + vec![ + 0xaa, 0, 0, 0, 10, 0, 0, 0, 20, 0, 0, 0, 21, 0, 0, 0, 30, 0, 0, 0, 31, + ], + ), + ( + 0, + vec![ + 0xaa, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 20, 0, 0, 0, 21, 0, 0, 0, 30, 0, 0, 0, 31, + ], + ), + ]; + let expected = Ok(( + &[][..], + Instruction::Tableswitch { + default: 10, + low: 20, + high: 21, + offsets: vec![30, 31], + }, + )); + for (address, instruction) in instructions { + assert_eq!(expected, instruction_parser(&instruction, address)); + } +} + +#[test] +fn test_incomplete() { + let code = &[0x59, 0x59, 0xc4, 0x15]; // dup, dup, + let expected = Ok(( + &[0xc4, 0x15][..], + vec![(0, Instruction::Dup), (1, Instruction::Dup)], + )); + assert_eq!(expected, code_parser(code)); +} + +#[test] +fn test_class() { + let class_bytes = include_bytes!("../java-assets/compiled-classes/Instructions.class"); + let (_, class) = class_parser(class_bytes).unwrap(); + let method_info = &class + .methods + .iter() + .find(|m| m.access_flags.contains(MethodAccessFlags::STATIC)) + .unwrap(); + let (_, code_attribute) = code_attribute_parser(&method_info.attributes[0].info).unwrap(); + + let parsed = code_parser(&code_attribute.code); + + assert_eq!(true, parsed.is_ok()); + assert_eq!(64, parsed.unwrap().1.len()); +}