Permalink
Browse files

Keeping track of things that were latin1 for roundtripping

  • Loading branch information...
1 parent 3cd4583 commit 4b7cd397369509ef82c0d475785d3ff83631b85e Chris Veenboer committed Oct 11, 2012
@@ -263,20 +263,20 @@ public Object decode() throws SerealException {
return out;
}
- private Map<String, Object> read_hash(byte tag) throws SerealException {
+ private Map<CharSequence, Object> read_hash(byte tag) throws SerealException {
long num_keys = 0;
if( tag == 0 ) {
num_keys = read_varint();
} else {
num_keys = tag & 15;
}
- Map<String, Object> hash = new LinkedHashMap<String, Object>( (int) num_keys );
+ Map<CharSequence, Object> hash = new LinkedHashMap<CharSequence, Object>( (int) num_keys );
log.fine( "Reading " + num_keys + " hash elements" );
for(int i = 0; i < num_keys; i++) {
- String key = (String) readSingleValue();
+ CharSequence key = (CharSequence) readSingleValue();
Object val = readSingleValue();
hash.put( key, val );
}
@@ -332,11 +332,11 @@ Object readSingleValue() throws SerealException {
log.fine( "Read small negative int:" + (tag - 32) );
out = tag - 32;
} else if( (tag & SRL_HDR_SHORT_BINARY_LOW) == SRL_HDR_SHORT_BINARY_LOW ) {
- String short_binary = read_short_binary( tag );
+ CharSequence short_binary = read_short_binary( tag );
log.fine( "Read short binary: " + short_binary + " length " + short_binary.length() );
out = short_binary;
} else if( (tag & SRL_HDR_HASHREF) == SRL_HDR_HASHREF ) {
- Map<String, Object> hash = read_hash( tag );
+ Map<CharSequence, Object> hash = read_hash( tag );
log.fine( "Read hash: " + hash );
out = hash;
} else if( (tag & SRL_HDR_ARRAYREF) == SRL_HDR_ARRAYREF ) {
@@ -419,9 +419,9 @@ Object readSingleValue() throws SerealException {
break;
case SRL_HDR_OBJECTV:
log.fine( "Reading an objectv" );
- String className = (String) get_tracked_item();
+ CharSequence className = (CharSequence) get_tracked_item();
log.fine( "Read an objectv of class: " + className);
- out = new PerlObject( className, readSingleValue() );
+ out = new PerlObject( ((Latin1String)className).getString(), readSingleValue() );
break;
case SRL_HDR_COPY:
log.fine( "Reading a copy" );
@@ -486,12 +486,12 @@ Object readSingleValue() throws SerealException {
* @param tag
* @return
*/
- String read_short_binary(byte tag) {
+ CharSequence read_short_binary(byte tag) {
int length = tag & SRL_MASK_SHORT_BINARY_LEN;
log.fine( "Short binary, length: " + length );
byte[] buf = new byte[length];
data.get( buf );
- return Charset.forName( "ISO-8859-1" ).decode( ByteBuffer.wrap( buf ) ).toString();
+ return new Latin1String( Charset.forName( "ISO-8859-1" ).decode( ByteBuffer.wrap( buf ) ).toString() );
}
/**
@@ -538,7 +538,8 @@ long read_zigzag() {
Pattern read_regex() throws SerealException {
int flags = 0;
- String regex = (String) readSingleValue();
+ Object str = readSingleValue();
+ String regex = str instanceof Latin1String ? ((Latin1String)str).getString() : (String) str;
log.fine( "Read pattern: " + regex );
// now read modifiers
@@ -583,12 +584,12 @@ private Object read_object() throws SerealException {
// Maybe we should have some kind of read_string() method?
int position = data.position();
byte tag = data.get();
- String className;
+ Latin1String className;
if( (tag & SRL_HDR_SHORT_BINARY_LOW) == SRL_HDR_SHORT_BINARY_LOW ) {
int length = tag & SRL_MASK_SHORT_BINARY_LEN;
byte[] buf = new byte[length];
data.get( buf );
- className = new String( buf );
+ className = new Latin1String( new String( buf ) );
} else {
throw new SerealException( "Don't know how to read classname from tag" + tag );
}
@@ -606,23 +607,23 @@ private Object read_object() throws SerealException {
Map<String, Object> classData = (Map<String, Object>) structure;
try {
// either an existing java class
- Class<?> c = Class.forName( className );
+ Class<?> c = Class.forName( className.getString() );
return Utils.bless( c, classData );
} catch (ClassNotFoundException e) {
// or we make a new one
if( objectType == ObjectType.POJO ) {
- return Utils.bless( className, classData );
+ return Utils.bless( className.getString(), classData );
} else {
// or we make a Perl-style one
- return new PerlObject( className, classData );
+ return new PerlObject( className.getString(), classData );
}
}
} else if( structure.getClass().isArray() ) {
// nothing we can really do here except make Perl objects..
- return new PerlObject( className, structure );
+ return new PerlObject( className.getString(), structure );
} else if( structure instanceof PerlReference ) {
- return new PerlObject( className, structure);
+ return new PerlObject( className.getString(), structure);
}
// it's a regexp for example
@@ -4,7 +4,6 @@
import java.lang.reflect.Array;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
-import java.nio.charset.CharsetEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -235,46 +234,45 @@ void write_zigzag(long n) {
/**
* Encode a short ascii string
*
- * @param s
+ * @param latin1
* String to encode as US-ASCII bytes
* @throws SerealException
* if the string is not short enough
*/
- void write_short_binary(String s) throws SerealException {
+ void write_short_binary(byte[] latin1) throws SerealException {
- log.fine( "Writing short binary: " + s );
+ log.fine( "Writing short binary: " + latin1 );
// maybe we can just COPY (but obviously not emit a copy tag for ourselves)
- if( isTracked( s ) && getTrackedItem( s ) != data.size() ) {
- write_copy( s );
+ if( isTracked( latin1 ) && getTrackedItem( latin1 ) != data.size() ) {
+ write_copy( latin1 );
return;
}
- int length = s.length();
+ int length = latin1.length;
if( length > 31 ) {
- throw new SerealException( "Cannot create short binary for " + s + ": too long" );
+ throw new SerealException( "Cannot create short binary for " + latin1 + ": too long" );
}
- // 0 reserves space for the length byte
- byte[] out = Charset.forName( "ISO-8859-1" ).encode( 0 + s ).array();
// length of string
- out[0] = (byte) (length | SerealHeader.SRL_HDR_SHORT_BINARY);
+ data.add( new byte[]{ (byte) (length | SerealHeader.SRL_HDR_SHORT_BINARY) } );
+ size++;
// save it
- data.add( out );
- size += out.length;
+ data.add( latin1 );
+ size += length;
}
- protected void write_copy(String s) {
+ protected void write_copy(byte[] latin1) {
- log.fine( "Emitting a copy for: '" + s + "'" );
+ log.fine( "Emitting a copy for: '" + latin1 + "'" );
data.add( new byte[] { SerealHeader.SRL_HDR_COPY } );
size++;
- write_varint( getTrackedItem( s ) );
+ write_varint( getTrackedItem( latin1 ) );
// do not track since spec says no
}
@@ -297,7 +295,7 @@ void write_regex(Pattern p) throws SerealException {
flags += (p.flags() & Pattern.CASE_INSENSITIVE) != 0 ? "i" : "";
flags += (p.flags() & Pattern.COMMENTS) != 0 ? "x" : "";
- String pattern = p.pattern();
+ Latin1String pattern = new Latin1String( p.pattern() );
int length = pattern.length();
if( length < 32 ) {
@@ -306,7 +304,7 @@ void write_regex(Pattern p) throws SerealException {
size++;
// make array with bytes for (pattern + pattern length tag) + space for flags length tag + flags
- write_short_binary( pattern );
+ write_short_binary( pattern.getBytes() );
data.add( new byte[] { (byte) (flags.length() | SerealHeader.SRL_HDR_SHORT_BINARY) } );
size++;
data.add( flags.getBytes( Charset.forName( "US-ASCII" ) ) );
@@ -391,6 +389,8 @@ private void encode(Object obj) throws SerealException {
write_hash( (HashMap<String, Object>) obj ); // we only allow string keys afaict
} else if( type == String.class ) {
write_string_type( (String) obj );
+ } else if( type == Latin1String.class ) {
+ write_string_type( (Latin1String) obj );
} else if( type.isArray() ) {
write_array( obj );
} else if( type == Pattern.class ) {
@@ -458,7 +458,7 @@ private void write_object(PerlObject po) throws SerealException {
saved_classnames.put( po.getName(), size );
- write_string_type( po.getName() );
+ write_string_type( new Latin1String( po.getName() ) );
}
// write the data structure
@@ -645,19 +645,15 @@ private void write_array(Object obj) throws SerealException {
}
- private Charset charset_latin1 = Charset.forName( "ISO-8859-1" );
private Charset charset_utf8 = Charset.forName( "UTF-8" );
- private void write_string_type(String str) throws SerealException {
-
- CharsetEncoder cl = charset_latin1.newEncoder();
- boolean encodableAsLatin1 = cl.canEncode( str );
+ private void write_string_type(CharSequence str) throws SerealException {
- if( encodableAsLatin1 ) {
+ if( str instanceof Latin1String ) {
log.fine( "Encoding as latin1: " + str );
- byte[] latin1 = str.getBytes( charset_latin1 );
+ byte[] latin1 = ((Latin1String)str).getBytes();
if( str.length() < SerealHeader.SRL_MASK_SHORT_BINARY_LEN ) {
- write_short_binary( str );
+ write_short_binary( latin1 );
} else {
write_bytearray( latin1 );
}
@@ -667,7 +663,7 @@ private void write_string_type(String str) throws SerealException {
data.add( new byte[] { SerealHeader.SRL_HDR_STR_UTF8 } );
size++;
- byte[] utf8 = str.getBytes( charset_utf8 );
+ byte[] utf8 = ((String)str).getBytes( charset_utf8 );
write_varint( utf8.length );
data.add( utf8 );
@@ -0,0 +1,37 @@
+package com.booking.sereal;
+
+import java.nio.charset.Charset;
+
+public class Latin1String implements CharSequence {
+
+ private Charset charset_latin1 = Charset.forName( "ISO-8859-1" );
+ private String s;
+
+ public Latin1String(String s) {
+ this.s = s;
+ }
+
+ @Override
+ public char charAt(int index) {
+ return s.charAt( index );
+ }
+
+ @Override
+ public int length() {
+ return s.length();
+ }
+
+ @Override
+ public CharSequence subSequence(int start, int end) {
+ return s.subSequence( start, end );
+ }
+
+ public byte[] getBytes() {
+ return s.getBytes( charset_latin1 );
+ }
+
+ public String getString() {
+ return s;
+ }
+
+}
@@ -35,7 +35,7 @@ public void header() {
public void short_binary() {
try {
- encoder.write_short_binary( "foo" );
+ encoder.write_short_binary( new Latin1String("foo").getBytes() );
} catch (SerealException e) {
fail( e.getMessage() );
}
@@ -56,7 +56,7 @@ public void allTypes() {
encoder.write_bytearray( new byte[] { 0x66, 0x6f, 0x6f } );
encoder.write_regex( Pattern.compile( "(?:foo)[0-9]{3}\\z", Pattern.CASE_INSENSITIVE ) );
- encoder.write_short_binary( "Hello, Sereal!" );
+ encoder.write_short_binary( new Latin1String("Hello, Sereal!").getBytes() );
encoder.write_varint( 2395846 );
encoder.write_zigzag( -345 );
@@ -73,9 +73,9 @@ public void copy() {
// write 3 copies of a string (that should be copied)
try {
- encoder.write_short_binary( "This is quite a long string" );
- encoder.write_short_binary( "This is quite a long string" );
- encoder.write_short_binary( "This is quite a long string" );
+ encoder.write_short_binary( new Latin1String("This is quite a long string").getBytes() );
+ encoder.write_short_binary( new Latin1String("This is quite a long string").getBytes() );
+ encoder.write_short_binary( new Latin1String("This is quite a long string").getBytes() );
} catch (SerealException e) {
fail( e.getMessage() );
}
@@ -127,9 +127,9 @@ public void copy() {
@Test
public void short_binary() {
- String str = "Hello, Sereal!";
+ Latin1String str = new Latin1String( "Hello, Sereal!" );
try {
- encoder.write_short_binary( str );
+ encoder.write_short_binary( str.getBytes() );
} catch (SerealException e) {
fail( e.getMessage() );
}
@@ -57,7 +57,7 @@
*/
public static void main(String[] args) throws IOException {
- String manual = "../test_dir/test_data_00114";
+ String manual = null;//"../test_dir/test_data_00029";
if( args.length == 0 && manual == null ) {
throw new UnsupportedOperationException( "Usage: Example [test_dir OR test_data_00XXXX]" );

0 comments on commit 4b7cd39

Please sign in to comment.