deflate.cs

// deflate.cs -- internal compression state & compress data using the deflation algorithm
// Copyright (C) 1995-2010 Jean-loup Gailly.
// Copyright (C) 2007-2011 by the Authors
// For conditions of distribution and use, see copyright notice in License.txt

#region ALGORITHM , ACKNOWLEDGEMENTS & REFERENCES
//  ALGORITHM
//
//      The "deflation" process depends on being able to identify portions
//      of the input text which are identical to earlier input (within a
//      sliding window trailing behind the input currently being processed).
//
//      The most straightforward technique turns out to be the fastest for
//      most input files: try all possible matches and select the longest.
//      The key feature of this algorithm is that insertions into the string
//      dictionary are very simple and thus fast, and deletions are avoided
//      completely. Insertions are performed at each input character, whereas
//      string matches are performed only when the previous match ends. So it
//      is preferable to spend more time in matches to allow very fast string
//      insertions and avoid deletions. The matching algorithm for small
//      strings is inspired from that of Rabin & Karp. A brute force approach
//      is used to find longer strings when a small match has been found.
//      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
//      (by Leonid Broukhis).
//         A previous version of this file used a more sophisticated algorithm
//      (by Fiala and Greene) which is guaranteed to run in linear amortized
//      time, but has a larger average cost, uses more memory and is patented.
//      However the F&G algorithm may be faster for some highly redundant
//      files if the parameter max_chain_length (described below) is too large.
//
//  ACKNOWLEDGEMENTS
//
//      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
//      I found it in 'freeze' written by Leonid Broukhis.
//      Thanks to many people for bug reports and testing.
//
//  REFERENCES
//
//      Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
//      Available in http://www.ietf.org/rfc/rfc1951.txt
//
//      A description of the Rabin and Karp algorithm is given in the book
//         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
//
//      Fiala,E.R., and Greene,D.H.
//         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
#endregion

using System;

namespace Free.Ports.zLib
{
	public static partial class zlib
	{
		#region deflate.h
		// ===========================================================================
		// Internal compression state.

		// number of length codes, not counting the special END_BLOCK code
		private const int LENGTH_CODES=29;

		// number of literal bytes 0..255
		private const int LITERALS=256;

		// number of Literal or Length codes, including the END_BLOCK code
		private const int L_CODES=LITERALS+1+LENGTH_CODES;

		// number of distance codes
		private const int D_CODES=30;

		// number of codes used to transfer the bit lengths
		private const int BL_CODES=19;

		// maximum heap size
		private const int HEAP_SIZE=2*L_CODES+1;

		// All codes must not exceed MAX_BITS bits
		private const int MAX_BITS=15;

		// Stream status
		private const int INIT_STATE=42;
		private const int EXTRA_STATE=69;
		private const int NAME_STATE=73;
		private const int COMMENT_STATE=91;
		private const int HCRC_STATE=103;
		private const int BUSY_STATE=113;
		private const int FINISH_STATE=666;

		// Data structure describing a single value and its code string.
		struct ct_data
		{
			ushort freq;
			public ushort Freq { get { return freq; } set { freq=value; } } // frequency count
			public ushort Code { get { return freq; } set { freq=value; } }	// bit string

			ushort dad;
			public ushort Dad { get { return dad; } set { dad=value; } }		// father node in Huffman tree
			public ushort Len { get { return dad; } set { dad=value; } }		// length of bit string

			public ct_data(ushort freq, ushort dad)
			{
				this.freq=freq;
				this.dad=dad;
			}

			public ct_data(ct_data data)
			{
				freq=data.freq;
				dad=data.dad;
			}
		}

		struct tree_desc
		{
			public ct_data[] dyn_tree;			// the dynamic tree
			public int max_code;				// largest code with non zero frequency
			public static_tree_desc stat_desc;	// the corresponding static tree

			public tree_desc(tree_desc desc)
			{
				dyn_tree=desc.dyn_tree;
				max_code=desc.max_code;
				stat_desc=desc.stat_desc;
			}
		}

		class deflate_state //internal_state 
		{
			public z_stream strm;			// pointer back to this zlib stream
			public int status;				// as the name implies
			public byte[] pending_buf;		// output still pending
			public uint pending_buf_size;	// size of pending_buf

			public int pending_out;			// next pending byte to output to the stream

			public uint pending;			// nb of bytes in the pending buffer
			public int wrap;				// bit 0 true for zlib, bit 1 true for gzip
			public gz_header gzhead;		// gzip header information to write
			public uint gzindex;			// where in extra, name, or comment
			public byte method;				// STORED (for zip only) or DEFLATED
			public int last_flush;			// value of flush param for previous deflate call

			// used by deflate.c:
			public uint w_size;	// LZ77 window size (32K by default)
			public uint w_bits;	// log2(w_size)  (8..16)
			public uint w_mask;	// w_size - 1

			// Sliding window. Input bytes are read into the second half of the window,
			// and move to the first half later to keep a dictionary of at least wSize
			// bytes. With this organization, matches are limited to a distance of
			// wSize-MAX_MATCH bytes, but this ensures that IO is always
			// performed with a length multiple of the block size. Also, it limits
			// the window size to 64K, which is quite useful on MSDOS.
			// To do: use the user input buffer as sliding window.
			public byte[] window;

			// Actual size of window: 2*wSize, except when the user input buffer
			// is directly used as sliding window.
			public uint window_size;

			// Link to older string with same hash index. To limit the size of this
			// array to 64K, this link is maintained only for the last 32K strings.
			// An index in this array is thus a window index modulo 32K.
			public ushort[] prev;

			public ushort[] head;	// Heads of the hash chains or NIL.

			public uint ins_h;		// hash index of string to be inserted
			public uint hash_size;	// number of elements in hash table
			public uint hash_bits;	// log2(hash_size)
			public uint hash_mask;	// hash_size-1

			// Number of bits by which ins_h must be shifted at each input
			// step. It must be such that after MIN_MATCH steps, the oldest
			// byte no longer takes part in the hash key, that is:
			//   hash_shift * MIN_MATCH >= hash_bits
			public uint hash_shift;

			// Window position at the beginning of the current output block. Gets
			// negative when the window is moved backwards.
			public int block_start;

			public uint match_length;	// length of best match
			public uint prev_match;		// previous match
			public int match_available;	// set if previous match exists
			public uint strstart;		// start of string to insert
			public uint match_start;	// start of matching string
			public uint lookahead;		// number of valid bytes ahead in window

			// Length of the best match at previous step. Matches not greater than this
			// are discarded. This is used in the lazy match evaluation.
			public uint prev_length;

			// To speed up deflation, hash chains are never searched beyond this
			// length.  A higher limit improves compression ratio but degrades the speed.
			public uint max_chain_length;

			// Attempt to find a better match only when the current match is strictly
			// smaller than this value. This mechanism is used only for compression
			// levels >= 4.
			public uint max_lazy_match;

			// Insert new strings in the hash table only if the match length is not
			// greater than this length. This saves time but degrades compression.
			// max_insert_length is used only for compression levels <= 3.
			//#define max_insert_length max_lazy_match

			public int level;		// compression level (1..9)
			public int strategy;	// favor or force Huffman coding

			public uint good_match;	// Use a faster search when the previous match is longer than this

			public int nice_match;	// Stop searching when current match exceeds this

			// used by trees.c:
			public ct_data[] dyn_ltree=new ct_data[HEAP_SIZE];		// literal and length tree
			public ct_data[] dyn_dtree=new ct_data[2*D_CODES+1];	// distance tree
			public ct_data[] bl_tree=new ct_data[2*BL_CODES+1];		// Huffman tree for bit lengths

			public tree_desc l_desc=new tree_desc();	// desc. for literal tree
			public tree_desc d_desc=new tree_desc();	// desc. for distance tree
			public tree_desc bl_desc=new tree_desc();	// desc. for bit length tree

			// number of codes at each bit length for an optimal tree
			public ushort[] bl_count=new ushort[MAX_BITS+1];

			// The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
			// The same heap array is used to build all trees.
			public int[] heap=new int[2*L_CODES+1];		// heap used to build the Huffman trees
			public int heap_len;	// number of elements in the heap
			public int heap_max;	// element of largest frequency

			// Depth of each subtree used as tie breaker for trees of equal frequency
			public byte[] depth=new byte[2*L_CODES+1];

			public byte[] l_buf;		// buffer for literals or lengths

			// Size of match buffer for literals/lengths.  There are 4 reasons for
			// limiting lit_bufsize to 64K:
			//   - frequencies can be kept in 16 bit counters
			//   - if compression is not successful for the first block, all input
			//     data is still in the window so we can still emit a stored block even
			//     when input comes from standard input.  (This can also be done for
			//     all blocks if lit_bufsize is not greater than 32K.)
			//   - if compression is not successful for a file smaller than 64K, we can
			//     even emit a stored file instead of a stored block (saving 5 bytes).
			//     This is applicable only for zip (not zlib).
			//   - creating new Huffman trees less frequently may not provide fast
			//     adaptation to changes in the input data statistics. (Take for
			//     example a binary file with poorly compressible code followed by
			//     a highly compressible string table.) Smaller buffer sizes give
			//     fast adaptation but have of course the overhead of transmitting
			//     trees more frequently.
			//   - I can't count above 4
			public uint lit_bufsize;

			public uint last_lit;      // running index in l_buf

			// Buffer for distances. To simplify the code, d_buf and l_buf have
			// the same number of elements. To use different lengths, an extra flag
			// array would be necessary.
			public ushort[] d_buf;

			public uint opt_len;		// bit length of current block with optimal trees
			public uint static_len;		// bit length of current block with static trees
			public uint matches;		// number of string matches in current block
			public int last_eob_len;	// bit length of EOB code for last block

			// Output buffer. bits are inserted starting at the bottom (least
			// significant bits).
			public ushort bi_buf;

			// Number of valid bits in bi_buf.  All bits above the last valid bit
			// are always zero.
			public int bi_valid;

			// High water mark offset in window for initialized bytes -- bytes above
			// this are set to zero in order to avoid memory check warnings when
			// longest match routines access bytes past the input.  This is then
			// updated to the new high water mark.
			public uint high_water;

			public deflate_state Clone()
			{
				deflate_state ret=(deflate_state)MemberwiseClone();

				ret.dyn_ltree=new ct_data[HEAP_SIZE];
				for(int i=0; i<HEAP_SIZE; i++) ret.dyn_ltree[i]=new ct_data(dyn_ltree[i]);
				
				ret.dyn_dtree=new ct_data[2*D_CODES+1];
				for(int i=0; i<(2*D_CODES+1); i++) ret.dyn_dtree[i]=new ct_data(dyn_dtree[i]);

				ret.bl_tree=new ct_data[2*BL_CODES+1];
				for(int i=0; i<(2*BL_CODES+1); i++) ret.bl_tree[i]=new ct_data(bl_tree[i]);

				ret.bl_count=new ushort[MAX_BITS+1]; bl_count.CopyTo(ret.bl_count, 0);
				ret.heap=new int[2*L_CODES+1]; heap.CopyTo(ret.heap, 0);
				ret.depth=new byte[2*L_CODES+1]; depth.CopyTo(ret.depth, 0);

				ret.l_desc=new tree_desc(l_desc);	// desc. for literal tree
				ret.d_desc=new tree_desc(d_desc);	// desc. for distance tree
				ret.bl_desc=new tree_desc(bl_desc);

				return ret;
			}
		}

		// Output a byte on the stream.
		// IN assertion: there is enough room in pending_buf.
		//#define put_byte(s, c) {s.pending_buf[s.pending++] = (c);}

		// Minimum amount of lookahead, except at the end of the input file.
		// See deflate.c for comments about the MIN_MATCH+1.
		private const int MIN_LOOKAHEAD=MAX_MATCH+MIN_MATCH+1;

		// In order to simplify the code, particularly on 16 bit machines, match
		// distances are limited to MAX_DIST instead of WSIZE.
		//#define MAX_DIST(s)  (s.w_size-MIN_LOOKAHEAD)

		// Number of bytes after end of data in window to initialize in order to avoid
		// memory checker errors from longest match routines
		private const int WIN_INIT=MAX_MATCH;

		// Mapping from a distance to a distance code. dist is the distance - 1 and
		// must not have side effects. _dist_code[256] and _dist_code[257] are never
		// used.
		//#define d_code(dist) ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])

		#endregion

		// If you use the zlib library in a product, an acknowledgment is welcome
		// in the documentation of your product. If for some reason you cannot
		// include such an acknowledgment, I would appreciate that you keep this
		// copyright string in the executable of your product.
		private const string deflate_copyright=" deflate 1.2.5 Copyright 1995-2010 Jean-loup Gailly ";

		// ===========================================================================
		// Function prototypes.

		enum block_state
		{
			need_more,      // block not completed, need more input or more output
			block_done,     // block flush performed
			finish_started, // finish started, need only more output at next deflate
			finish_done     // finish done, accept no more input or output
		}

		// Compression function. Returns the block state after the call.
		delegate block_state compress_func(deflate_state s, int flush);

		// ===========================================================================
		// Local data

		// Tail of hash chains
		private const int NIL=0;

		// Matches of length 3 are discarded if their distance exceeds TOO_FAR
		private const int TOO_FAR=4096;

		// Values for max_lazy_match, good_match and max_chain_length, depending on
		// the desired pack level (0..9). The values given below have been tuned to
		// exclude worst case performance for pathological files. Better values may be
		// found for specific files.
		struct config
		{
			public ushort good_length;	// reduce lazy search above this match length
			public ushort max_lazy;		// do not perform lazy search above this match length
			public ushort nice_length;	// quit search above this match length
			public ushort max_chain;
			public compress_func func;

			public config(ushort good_length, ushort max_lazy, ushort nice_length, ushort max_chain, compress_func func)
			{
				this.good_length=good_length;
				this.max_lazy=max_lazy;
				this.nice_length=nice_length;
				this.max_chain=max_chain;
				this.func=func;
			}
		}

		static readonly config[] configuration_table=new config[] 
		{ // good lazy nice chain
			new config( 0,   0,   0,    0, deflate_stored),	// store only
			new config( 4,   4,   8,    4, deflate_fast),	// max speed, no lazy matches
			new config( 4,   5,  16,    8, deflate_fast),
			new config( 4,   6,  32,   32, deflate_fast),
			new config( 4,   4,  16,   16, deflate_slow),	// lazy matches
			new config( 8,  16,  32,   32, deflate_slow),
			new config( 8,  16, 128,  128, deflate_slow),
			new config( 8,  32, 128,  256, deflate_slow),
			new config(32, 128, 258, 1024, deflate_slow),
			new config(32, 258, 258, 4096, deflate_slow)	// max compression
		};

		// Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
		// For deflate_fast() (levels <= 3) good is ignored and lazy has a different
		// meaning.

		// ===========================================================================
		// Update a hash value with the given input byte
		// IN  assertion: all calls to to UPDATE_HASH are made with consecutive
		//    input characters, so that a running hash key can be computed from the
		//    previous key instead of complete recalculation each time.
		//#define UPDATE_HASH(s,h,c) h = ((h<<s.hash_shift) ^ c) & s.hash_mask

		// ===========================================================================
		// Insert string str in the dictionary and set match_head to the previous head
		// of the hash chain (the most recent string with same hash key). Return
		// the previous length of the hash chain.
		// If this file is compiled with -DFASTEST, the compression level is forced
		// to 1, and no hash chains are maintained.
		// IN  assertion: all calls to to INSERT_STRING are made with consecutive
		//    input characters and the first MIN_MATCH bytes of str are valid
		//    (except for the last MIN_MATCH-1 bytes of the input file).
		//#define INSERT_STRING(s, str, match_head) \
		//		s.ins_h = ((s.ins_h<<(int)s.hash_shift) ^ s.window[(str) + (MIN_MATCH-1)]) & s.hash_mask; \
		//		match_head = s.prev[(str) & s.w_mask] = s.head[s.ins_h]; \
		//		s.head[s.ins_h] = (unsigned short)str

		// ===========================================================================
		// Initialize the hash table (avoiding 64K overflow for 16 bit systems).
		// prev[] will be initialized on the fly.

		// =========================================================================
		//   Initializes the internal stream state for compression. The fields
		// zalloc, zfree and opaque must be initialized before by the caller.
		// If zalloc and zfree are set to Z_NULL, deflateInit updates them to
		// use default allocation functions.

		//   The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
		// 1 gives best speed, 9 gives best compression, 0 gives no compression at
		// all (the input data is simply copied a block at a time).
		// Z_DEFAULT_COMPRESSION requests a default compromise between speed and
		// compression (currently equivalent to level 6).

		//   deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
		// enough memory, Z_STREAM_ERROR if level is not a valid compression level,
		// Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
		// with the version assumed by the caller (ZLIB_VERSION).
		// msg is set to null if there is no error message.  deflateInit does not
		// perform any compression: this will be done by deflate().
		public static int deflateInit(z_stream strm, int level)
		{
			return deflateInit2(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
			// Todo: ignore strm.next_in if we use it as window
		}

		// =========================================================================
		//   This is another version of deflateInit with more compression options. The
		// fields next_in, zalloc, zfree and opaque must be initialized before by
		// the caller.

		//   The method parameter is the compression method. It must be Z_DEFLATED in
		// this version of the library.

		//   The windowBits parameter is the base two logarithm of the window size
		// (the size of the history buffer). It should be in the range 8..15 for this
		// version of the library. Larger values of this parameter result in better
		// compression at the expense of memory usage. The default value is 15 if
		// deflateInit is used instead.

		//   windowBits can also be -8..-15 for raw deflate. In this case, -windowBits
		// determines the window size. deflate() will then generate raw deflate data
		// with no zlib header or trailer, and will not compute an adler32 check value.

		//   windowBits can also be greater than 15 for optional gzip encoding. Add
		// 16 to windowBits to write a simple gzip header and trailer around the
		// compressed data instead of a zlib wrapper. The gzip header will have no
		// file name, no extra data, no comment, no modification time (set to zero),
		// no header crc, and the operating system will be set to 255 (unknown).  If a
		// gzip stream is being written, strm.adler is a crc32 instead of an adler32.

		//   The memLevel parameter specifies how much memory should be allocated
		// for the internal compression state. memLevel=1 uses minimum memory but
		// is slow and reduces compression ratio; memLevel=9 uses maximum memory
		// for optimal speed. The default value is 8. See zconf.h for total memory
		// usage as a function of windowBits and memLevel.

		//   The strategy parameter is used to tune the compression algorithm. Use the
		// value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
		// filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
		// string match), or Z_RLE to limit match distances to one (run-length
		// encoding). Filtered data consists mostly of small values with a somewhat
		// random distribution. In this case, the compression algorithm is tuned to
		// compress them better. The effect of Z_FILTERED is to force more Huffman
		// coding and less string matching; it is somewhat intermediate between
		// Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as
		// Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy
		// parameter only affects the compression ratio but not the correctness of the
		// compressed output even if it is not set appropriately.  Z_FIXED prevents the
		// use of dynamic Huffman codes, allowing for a simpler decoder for special
		// applications.

		//    deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
		// memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid
		// method). msg is set to null if there is no error message.  deflateInit2 does
		// not perform any compression: this will be done by deflate().

		public static int deflateInit2(z_stream strm, int level, int method, int windowBits, int memLevel, int strategy)
		{
			if(strm==null) return Z_STREAM_ERROR;
			strm.msg=null;

			if(level==Z_DEFAULT_COMPRESSION) level=6;

			int wrap=1;

			if(windowBits<0)
			{ // suppress zlib wrapper
				wrap=0;
				windowBits=-windowBits;
			}
			else if(windowBits>15)
			{
				wrap=2;       // write gzip wrapper instead
				windowBits-=16;
			}

			if(memLevel<1||memLevel>MAX_MEM_LEVEL||method!=Z_DEFLATED||windowBits<8||windowBits>15||level<0||level>9||
				strategy<0||strategy>Z_FIXED) return Z_STREAM_ERROR;

			if(windowBits==8) windowBits=9;  // until 256-byte window bug fixed

			deflate_state s;
			try
			{
				s=new deflate_state();
			}
			catch(Exception)
			{
				return Z_MEM_ERROR;
			}

			strm.state=s;
			s.strm=strm;

			s.wrap=wrap;
			s.w_bits=(uint)windowBits;
			s.w_size=1U<<(int)s.w_bits;
			s.w_mask=s.w_size-1;

			s.hash_bits=(uint)memLevel+7;
			s.hash_size=1U<<(int)s.hash_bits;
			s.hash_mask=s.hash_size-1;
			s.hash_shift=(s.hash_bits+MIN_MATCH-1)/MIN_MATCH;

			try
			{
				s.window=new byte[s.w_size*2];
				s.prev=new ushort[s.w_size];
				s.head=new ushort[s.hash_size];
				s.high_water=0; // nothing written to s->window yet

				s.lit_bufsize=1U<<(memLevel+6); // 16K elements by default

				s.pending_buf=new byte[s.lit_bufsize*4];
				s.pending_buf_size=s.lit_bufsize*4;

				s.d_buf=new ushort[s.lit_bufsize];
				s.l_buf=new byte[s.lit_bufsize];
			}
			catch(Exception)
			{
				s.status=FINISH_STATE;
				strm.msg=zError(Z_MEM_ERROR);
				deflateEnd(strm);
				return Z_MEM_ERROR;
			}

			s.level=level;
			s.strategy=strategy;
			s.method=(byte)method;

			return deflateReset(strm);
		}

		// =========================================================================
		//   Initializes the compression dictionary from the given byte sequence
		// without producing any compressed output. This function must be called
		// immediately after deflateInit, deflateInit2 or deflateReset, before any
		// call of deflate. The compressor and decompressor must use exactly the same
		// dictionary (see inflateSetDictionary).

		//   The dictionary should consist of strings (byte sequences) that are likely
		// to be encountered later in the data to be compressed, with the most commonly
		// used strings preferably put towards the end of the dictionary. Using a
		// dictionary is most useful when the data to be compressed is short and can be
		// predicted with good accuracy; the data can then be compressed better than
		// with the default empty dictionary.

		//   Depending on the size of the compression data structures selected by
		// deflateInit or deflateInit2, a part of the dictionary may in effect be
		// discarded, for example if the dictionary is larger than the window size in
		// deflate or deflate2. Thus the strings most likely to be useful should be
		// put at the end of the dictionary, not at the front. In addition, the
		// current implementation of deflate will use at most the window size minus
		// 262 bytes of the provided dictionary.

		//   Upon return of this function, strm.adler is set to the adler32 value
		// of the dictionary; the decompressor may later use this value to determine
		// which dictionary has been used by the compressor. (The adler32 value
		// applies to the whole dictionary even if only a subset of the dictionary is
		// actually used by the compressor.) If a raw deflate was requested, then the
		// adler32 value is not computed and strm.adler is not set.

		//   deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
		// parameter is invalid (such as NULL dictionary) or the stream state is
		// inconsistent (for example if deflate has already been called for this stream
		// or if the compression method is bsort). deflateSetDictionary does not
		// perform any compression: this will be done by deflate().

		public static int deflateSetDictionary(z_stream strm, byte[] dictionary, uint dictLength)
		{
			uint length=dictLength;
			uint n;
			uint hash_head=0;

			if(strm==null||strm.state==null||dictionary==null) return Z_STREAM_ERROR;

			deflate_state s=strm.state as deflate_state;
			if(s==null||s.wrap==2||(s.wrap==1&&s.status!=INIT_STATE))
				return Z_STREAM_ERROR;

			if(s.wrap!=0) strm.adler=adler32(strm.adler, dictionary, dictLength);

			if(length<MIN_MATCH) return Z_OK;

			int dictionary_ind=0;
			if(length>s.w_size)
			{
				length=s.w_size;
				dictionary_ind=(int)(dictLength-length); // use the tail of the dictionary
			}

			//was memcpy(s.window, dictionary+dictionary_ind, length);
			Array.Copy(dictionary, dictionary_ind, s.window, 0, length);

			s.strstart=length;
			s.block_start=(int)length;

			// Insert all strings in the hash table (except for the last two bytes).
			// s.lookahead stays null, so s.ins_h will be recomputed at the next
			// call of fill_window.
			s.ins_h=s.window[0];

			//was UPDATE_HASH(s, s.ins_h, s.window[1]);
			s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[1])&s.hash_mask;

			for(n=0; n<=length-MIN_MATCH; n++)
			{
				//was INSERT_STRING(s, n, hash_head);
				s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[n+(MIN_MATCH-1)])&s.hash_mask;
				hash_head=s.prev[n&s.w_mask]=s.head[s.ins_h];
				s.head[s.ins_h]=(ushort)n;
			}
			if(hash_head!=0) hash_head=0;  // to make compiler happy
			return Z_OK;
		}

		// =========================================================================
		//   This function is equivalent to deflateEnd followed by deflateInit,
		// but does not free and reallocate all the internal compression state.
		// The stream will keep the same compression level and any other attributes
		// that may have been set by deflateInit2.

		//    deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
		// stream state was inconsistent (such as zalloc or state being NULL).
		public static int deflateReset(z_stream strm)
		{
			if(strm==null||strm.state==null) return Z_STREAM_ERROR;

			strm.total_in=strm.total_out=0;
			strm.msg=null;

			deflate_state s=(deflate_state)strm.state;
			s.pending=0;
			s.pending_out=0;

			if(s.wrap<0) s.wrap=-s.wrap; // was made negative by deflate(..., Z_FINISH);

			s.status=s.wrap!=0?INIT_STATE:BUSY_STATE;
			strm.adler=s.wrap==2?crc32(0, null, 0):adler32(0, null, 0);
			s.last_flush=Z_NO_FLUSH;

			_tr_init(s);
			lm_init(s);

			return Z_OK;
		}

		// =========================================================================
		//    deflateSetHeader() provides gzip header information for when a gzip
		// stream is requested by deflateInit2().  deflateSetHeader() may be called
		// after deflateInit2() or deflateReset() and before the first call of
		// deflate().  The text, time, os, extra field, name, and comment information
		// in the provided gz_header structure are written to the gzip header (xflag is
		// ignored -- the extra flags are set according to the compression level).  The
		// caller must assure that, if not Z_NULL, name and comment are terminated with
		// a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
		// available there.  If hcrc is true, a gzip header crc is included.  Note that
		// the current versions of the command-line version of gzip (up through version
		// 1.3.x) do not support header crc's, and will report that it is a "multi-part
		// gzip file" and give up.

		//    If deflateSetHeader is not used, the default gzip header has text false,
		// the time set to zero, and os set to 255, with no extra, name, or comment
		// fields.  The gzip header is returned to the default state by deflateReset().

		//    deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
		// stream state was inconsistent.

		public static int deflateSetHeader(z_stream strm, gz_header head)
		{
			if(strm==null||strm.state==null) return Z_STREAM_ERROR;
			deflate_state s=(deflate_state)strm.state;
			if(s.wrap!=2) return Z_STREAM_ERROR;
			s.gzhead=head;
			return Z_OK;
		}

		// =========================================================================
		//    deflatePrime() inserts bits in the deflate output stream.  The intent
		// is that this function is used to start off the deflate output with the
		// bits leftover from a previous deflate stream when appending to it.  As such,
		// this function can only be used for raw deflate, and must be used before the
		// first deflate() call after a deflateInit2() or deflateReset().  bits must be
		// less than or equal to 16, and that many of the least significant bits of
		// value will be inserted in the output.

		//    deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
		// stream state was inconsistent.

		public static int deflatePrime(z_stream strm, int bits, int value)
		{
			if(strm==null||strm.state==null) return Z_STREAM_ERROR;
			deflate_state s=(deflate_state)strm.state;
			s.bi_valid=bits;
			s.bi_buf=(ushort)(value&((1<<bits)-1));
			return Z_OK;
		}

		// =========================================================================
		//   Dynamically update the compression level and compression strategy.  The
		// interpretation of level and strategy is as in deflateInit2.  This can be
		// used to switch between compression and straight copy of the input data, or
		// to switch to a different kind of input data requiring a different
		// strategy. If the compression level is changed, the input available so far
		// is compressed with the old level (and may be flushed); the new level will
		// take effect only at the next call of deflate().

		//   Before the call of deflateParams, the stream state must be set as for
		// a call of deflate(), since the currently available input may have to
		// be compressed and flushed. In particular, strm.avail_out must be non-zero.

		//   deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
		// stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
		// if strm.avail_out was zero.

		public static int deflateParams(z_stream strm, int level, int strategy)
		{
			if(strm==null||strm.state==null) return Z_STREAM_ERROR;
			deflate_state s=(deflate_state)strm.state;

			if(level==Z_DEFAULT_COMPRESSION) level=6;
			if(level<0||level>9||strategy<0||strategy>Z_FIXED) return Z_STREAM_ERROR;

			compress_func func=configuration_table[s.level].func;
			int err=Z_OK;

			if((strategy!=s.strategy||func!=configuration_table[level].func)&&strm.total_in!=0) // Flush the last buffer:
				err=deflate(strm, Z_BLOCK);

			if(s.level!=level)
			{
				s.level=level;
				s.max_lazy_match=configuration_table[level].max_lazy;
				s.good_match=configuration_table[level].good_length;
				s.nice_match=configuration_table[level].nice_length;
				s.max_chain_length=configuration_table[level].max_chain;
			}

			s.strategy=strategy;
			return err;
		}

		// =========================================================================
		//   Fine tune deflate's internal compression parameters.  This should only be
		// used by someone who understands the algorithm used by zlib's deflate for
		// searching for the best matching string, and even then only by the most
		// fanatic optimizer trying to squeeze out the last compressed bit for their
		// specific input data.  Read the deflate.cs source code for the meaning of the
		// max_lazy, good_length, nice_length, and max_chain parameters.

		//   deflateTune() can be called after deflateInit() or deflateInit2(), and
		// returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.

		public static int deflateTune(z_stream strm, uint good_length, uint max_lazy, int nice_length, uint max_chain)
		{
			if(strm==null||strm.state==null) return Z_STREAM_ERROR;
			deflate_state s=(deflate_state)strm.state;
			s.good_match=good_length;
			s.max_lazy_match=max_lazy;
			s.nice_match=nice_length;
			s.max_chain_length=max_chain;
			return Z_OK;
		}

		// =========================================================================
		// For the default windowBits of 15 and memLevel of 8, this function returns
		// a close to exact, as well as small, upper bound on the compressed size.
		// They are coded as constants here for a reason--if the #define's are
		// changed, then this function needs to be changed as well.  The return
		// value for 15 and 8 only works for those exact settings.
		//
		// For any setting other than those defaults for windowBits and memLevel,
		// the value returned is a conservative worst case for the maximum expansion
		// resulting from using fixed blocks instead of stored blocks, which deflate
		// can emit on compressed data for some combinations of the parameters.
		//
		// This function could be more sophisticated to provide closer upper bounds for
		// every combination of windowBits and memLevel. But even the conservative
		// upper bound of about 14% expansion does not seem onerous for output buffer
		// allocation.

		//   deflateBound() returns an upper bound on the compressed size after
		// deflation of sourceLen bytes.  It must be called after deflateInit()
		// or deflateInit2().  This would be used to allocate an output buffer
		// for deflation in a single pass, and so would be called before deflate().

		public static uint deflateBound(z_stream strm, uint sourceLen)
		{
			// conservative upper bound for compressed data
			uint complen=sourceLen+((sourceLen+7)>>3)+((sourceLen+63)>>6)+5;

			// if can't get parameters, return conservative bound plus zlib wrapper
			if(strm==null||strm.state==null) return complen+6;

			// compute wrapper length
			deflate_state s=(deflate_state)strm.state;
			uint wraplen;
			byte[] str;
			switch(s.wrap)
			{
				case 0: // raw deflate
					wraplen=0;
					break;
				case 1: // zlib wrapper
					wraplen=(uint)(6+(s.strstart!=0?4:0));
					break;
				case 2: // gzip wrapper
					wraplen=18;
					if(s.gzhead!=null) // user-supplied gzip header
					{
						if(s.gzhead.extra!=null) wraplen+=2+s.gzhead.extra_len;
						str=s.gzhead.name;
						int str_ind=0;
						if(str!=null)
						{
							do
							{
								wraplen++;
							} while(str[str_ind++]!=0);
						}
						str=s.gzhead.comment;
						if(str!=null)
						{
							do
							{
								wraplen++;
							} while(str[str_ind++]!=0);
						}
						if(s.gzhead.hcrc!=0) wraplen+=2;
					}
					break;
				default: wraplen=6; break; // for compiler happiness
			}

			// if not default parameters, return conservative bound
			if(s.w_bits!=15||s.hash_bits!=8+7) return complen+wraplen;

			// default settings: return tight bound for that case
			return sourceLen+(sourceLen>>12)+(sourceLen>>14)+(sourceLen>>25)+13-6+wraplen;
		}

		// =========================================================================
		// Put a short in the pending buffer. The 16-bit value is put in MSB order.
		// IN assertion: the stream state is correct and there is enough room in
		// pending_buf.
		static void putShortMSB(deflate_state s, uint b)
		{
			//was put_byte(s, (byte)(b >> 8));
			s.pending_buf[s.pending++]=(byte)(b >> 8);
			//was put_byte(s, (byte)(b & 0xff));
			s.pending_buf[s.pending++]=(byte)(b & 0xff);
		}

		// =========================================================================
		// Flush as much pending output as possible. All deflate() output goes
		// through this function so some applications may wish to modify it
		// to avoid allocating a large strm.next_out buffer and copying into it.
		// (See also read_buf()).
		static void flush_pending(z_stream strm)
		{
			deflate_state s=(deflate_state)strm.state;
			uint len=s.pending;

			if(len>strm.avail_out) len=strm.avail_out;
			if(len==0) return;

			//was memcpy(strm.next_out, s.pending_out, len);
			Array.Copy(s.pending_buf, s.pending_out, strm.out_buf, strm.next_out, len);

			strm.next_out+=(int)len;
			s.pending_out+=(int)len;
			strm.total_out+=len;
			strm.avail_out-=len;
			s.pending-=len;
			if(s.pending==0) s.pending_out=0;
		}

		const int PRESET_DICT=0x20; // preset dictionary flag in zlib header

		#region deflate
		// =========================================================================
		//   deflate compresses as much data as possible, and stops when the input
		// buffer becomes empty or the output buffer becomes full. It may introduce some
		// output latency (reading input without producing any output) except when
		// forced to flush.

		//   The detailed semantics are as follows. deflate performs one or both of the
		// following actions:

		// - Compress more input starting at next_in and update next_in and avail_in
		//   accordingly. If not all input can be processed (because there is not
		//   enough room in the output buffer), next_in and avail_in are updated and
		//   processing will resume at this point for the next call of deflate().

		// - Provide more output starting at next_out and update next_out and avail_out
		//   accordingly. This action is forced if the parameter flush is non zero.
		//   Forcing flush frequently degrades the compression ratio, so this parameter
		//   should be set only when necessary (in interactive applications).
		//   Some output may be provided even if flush is not set.

		// Before the call of deflate(), the application should ensure that at least
		// one of the actions is possible, by providing more input and/or consuming
		// more output, and updating avail_in or avail_out accordingly; avail_out
		// should never be zero before the call. The application can consume the
		// compressed output when it wants, for example when the output buffer is full
		// (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK
		// and with zero avail_out, it must be called again after making room in the
		// output buffer because there might be more output pending.

		//   Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
		// decide how much data to accumualte before producing output, in order to
		// maximize compression.

		//   If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
		// flushed to the output buffer and the output is aligned on a byte boundary, so
		// that the decompressor can get all input data available so far. (In particular
		// avail_in is zero after the call if enough output space has been provided
		// before the call.)  Flushing may degrade compression for some compression
		// algorithms and so it should be used only when necessary.

		//   If flush is set to Z_FULL_FLUSH, all output is flushed as with
		// Z_SYNC_FLUSH, and the compression state is reset so that decompression can
		// restart from this point if previous compressed data has been damaged or if
		// random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
		// compression.

		//   If deflate returns with avail_out == 0, this function must be called again
		// with the same value of the flush parameter and more output space (updated
		// avail_out), until the flush is complete (deflate returns with non-zero
		// avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
		// avail_out is greater than six to avoid repeated flush markers due to
		// avail_out == 0 on return.

		//   If the parameter flush is set to Z_FINISH, pending input is processed,
		// pending output is flushed and deflate returns with Z_STREAM_END if there
		// was enough output space; if deflate returns with Z_OK, this function must be
		// called again with Z_FINISH and more output space (updated avail_out) but no
		// more input data, until it returns with Z_STREAM_END or an error. After
		// deflate has returned Z_STREAM_END, the only possible operations on the
		// stream are deflateReset or deflateEnd.

		//   Z_FINISH can be used immediately after deflateInit if all the compression
		// is to be done in a single step. In this case, avail_out must be at least
		// the value returned by deflateBound (see below). If deflate does not return
		// Z_STREAM_END, then it must be called again as described above.

		//   deflate() sets strm.adler to the adler32 checksum of all input read
		// so far (that is, total_in bytes).

		//   deflate() returns Z_OK if some progress has been made (more input
		// processed or more output produced), Z_STREAM_END if all input has been
		// consumed and all output has been produced (only when flush is set to
		// Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
		// if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible
		// (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not
		// fatal, and deflate() can be called again with more input and more output
		// space to continue compressing.

		public static int deflate(z_stream strm, int flush)
		{
			int old_flush; // value of flush param for previous deflate call

			if(strm==null||strm.state==null||flush>Z_BLOCK||flush<0) return Z_STREAM_ERROR;
			deflate_state s=(deflate_state)strm.state;

			if(strm.out_buf==null||(strm.in_buf==null&&strm.avail_in!=0)||(s.status==FINISH_STATE&&flush!=Z_FINISH))
			{
				strm.msg=zError(Z_STREAM_ERROR);
				return Z_STREAM_ERROR;
			}

			if(strm.avail_out==0)
			{
				strm.msg=zError(Z_BUF_ERROR);
				return Z_BUF_ERROR;
			}

			s.strm=strm; // just in case
			old_flush=s.last_flush;
			s.last_flush=flush;

			// Write the header
			if(s.status==INIT_STATE)
			{
				if(s.wrap==2)
				{
					strm.adler=crc32(0, null, 0);
					s.pending_buf[s.pending++]=31;
					s.pending_buf[s.pending++]=139;
					s.pending_buf[s.pending++]=8;
					if(s.gzhead==null)
					{
						s.pending_buf[s.pending++]=0;

						s.pending_buf[s.pending++]=0;
						s.pending_buf[s.pending++]=0;
						s.pending_buf[s.pending++]=0;
						s.pending_buf[s.pending++]=0;

						s.pending_buf[s.pending++]=(byte)(s.level==9?2:(s.strategy>=Z_HUFFMAN_ONLY||s.level<2?4:0));
						s.pending_buf[s.pending++]=OS_CODE;
						s.status=BUSY_STATE;
					}
					else
					{
						s.pending_buf[s.pending++]=(byte)((s.gzhead.text!=0?1:0)+(s.gzhead.hcrc!=0?2:0)+(s.gzhead.extra==null?0:4)+
									(s.gzhead.name==null?0:8)+(s.gzhead.comment==null?0:16));
						s.pending_buf[s.pending++]=(byte)(s.gzhead.time&0xff);
						s.pending_buf[s.pending++]=(byte)((s.gzhead.time>>8)&0xff);
						s.pending_buf[s.pending++]=(byte)((s.gzhead.time>>16)&0xff);
						s.pending_buf[s.pending++]=(byte)((s.gzhead.time>>24)&0xff);
						s.pending_buf[s.pending++]=(byte)(s.level==9?2:(s.strategy>=Z_HUFFMAN_ONLY||s.level<2?4:0));
						s.pending_buf[s.pending++]=(byte)(s.gzhead.os&0xff);
						if(s.gzhead.extra!=null)
						{
							s.pending_buf[s.pending++]=(byte)(s.gzhead.extra_len&0xff);
							s.pending_buf[s.pending++]=(byte)((s.gzhead.extra_len>>8)&0xff);
						}
						if(s.gzhead.hcrc!=0) strm.adler=crc32(strm.adler, s.pending_buf, s.pending);
						s.gzindex=0;
						s.status=EXTRA_STATE;
					}
				}
				else
				{
					uint header=(Z_DEFLATED+((s.w_bits-8)<<4))<<8;
					uint level_flags;

					if(s.strategy>=Z_HUFFMAN_ONLY||s.level<2) level_flags=0;
					else if(s.level<6) level_flags=1;
					else if(s.level==6) level_flags=2;
					else level_flags=3;

					header|=(level_flags<<6);
					if(s.strstart!=0) header|=PRESET_DICT;
					header+=31-(header%31);

					s.status=BUSY_STATE;
					putShortMSB(s, header);

					// Save the adler32 of the preset dictionary:
					if(s.strstart!=0)
					{
						putShortMSB(s, (uint)(strm.adler>>16));
						putShortMSB(s, (uint)(strm.adler&0xffff));
					}
					strm.adler=adler32(0, null, 0);
				}
			}
			if(s.status==EXTRA_STATE)
			{
				if(s.gzhead.extra!=null)
				{
					uint beg=s.pending;  // start of bytes to update crc

					while(s.gzindex<(s.gzhead.extra_len&0xffff))
					{
						if(s.pending==s.pending_buf_size)
						{
							if(s.gzhead.hcrc!=0&&s.pending>beg) strm.adler=crc32(strm.adler, s.pending_buf, beg, s.pending-beg);
							flush_pending(strm);
							beg=s.pending;
							if(s.pending==s.pending_buf_size) break;
						}
						s.pending_buf[s.pending++]=s.gzhead.extra[s.gzindex];
						s.gzindex++;
					}
					if(s.gzhead.hcrc!=0&&s.pending>beg) strm.adler=crc32(strm.adler, s.pending_buf, beg, s.pending-beg);
					if(s.gzindex==s.gzhead.extra_len)
					{
						s.gzindex=0;
						s.status=NAME_STATE;
					}
				}
				else s.status=NAME_STATE;
			}
			if(s.status==NAME_STATE)
			{
				if(s.gzhead.name!=null)
				{
					uint beg=s.pending;  // start of bytes to update crc
					byte val;

					do
					{
						if(s.pending==s.pending_buf_size)
						{
							if(s.gzhead.hcrc!=0&&s.pending>beg) strm.adler=crc32(strm.adler, s.pending_buf, beg, s.pending-beg);
							flush_pending(strm);
							beg=s.pending;
							if(s.pending==s.pending_buf_size)
							{
								val=1;
								break;
							}
						}
						val=s.gzhead.name[s.gzindex++];
						s.pending_buf[s.pending++]=val;
					} while(val!=0);
					if(s.gzhead.hcrc!=0&&s.pending>beg) strm.adler=crc32(strm.adler, s.pending_buf, beg, s.pending-beg);
					if(val==0)
					{
						s.gzindex=0;
						s.status=COMMENT_STATE;
					}
				}
				else s.status=COMMENT_STATE;
			}
			if(s.status==COMMENT_STATE)
			{
				if(s.gzhead.comment!=null)
				{
					uint beg=s.pending;  // start of bytes to update crc
					byte val;

					do
					{
						if(s.pending==s.pending_buf_size)
						{
							if(s.gzhead.hcrc!=0&&s.pending>beg) strm.adler=crc32(strm.adler, s.pending_buf, beg, s.pending-beg);
							flush_pending(strm);
							beg=s.pending;
							if(s.pending==s.pending_buf_size)
							{
								val=1;
								break;
							}
						}
						val=s.gzhead.comment[s.gzindex++];
						s.pending_buf[s.pending++]=val;
					} while(val!=0);
					if(s.gzhead.hcrc!=0&&s.pending>beg) strm.adler=crc32(strm.adler, s.pending_buf, beg, s.pending-beg);
					if(val==0) s.status=HCRC_STATE;
				}
				else s.status=HCRC_STATE;
			}
			if(s.status==HCRC_STATE)
			{
				if(s.gzhead.hcrc!=0)
				{
					if(s.pending+2>s.pending_buf_size) flush_pending(strm);
					if(s.pending+2<=s.pending_buf_size)
					{
						s.pending_buf[s.pending++]=(byte)(strm.adler&0xff);
						s.pending_buf[s.pending++]=(byte)((strm.adler>>8)&0xff);
						strm.adler=crc32(0, null, 0);
						s.status=BUSY_STATE;
					}
				}
				else s.status=BUSY_STATE;
			}

			// Flush as much pending output as possible
			if(s.pending!=0)
			{
				flush_pending(strm);
				if(strm.avail_out==0)
				{
					// Since avail_out is 0, deflate will be called again with
					// more output space, but possibly with both pending and
					// avail_in equal to zero. There won't be anything to do,
					// but this is not an error situation so make sure we
					// return OK instead of BUF_ERROR at next call of deflate:
					s.last_flush=-1;
					return Z_OK;
				}

				// Make sure there is something to do and avoid duplicate consecutive
				// flushes. For repeated and useless calls with Z_FINISH, we keep
				// returning Z_STREAM_END instead of Z_BUF_ERROR.
			}
			else if(strm.avail_in==0&&flush<=old_flush&&flush!=Z_FINISH)
			{
				strm.msg=zError(Z_BUF_ERROR);
				return Z_BUF_ERROR;
			}

			// User must not provide more input after the first FINISH:
			if(s.status==FINISH_STATE&&strm.avail_in!=0)
			{
				strm.msg=zError(Z_BUF_ERROR);
				return Z_BUF_ERROR;
			}

			// Start a new block or continue the current one.
			if(strm.avail_in!=0||s.lookahead!=0||(flush!=Z_NO_FLUSH&&s.status!=FINISH_STATE))
			{
				block_state bstate=s.strategy==Z_HUFFMAN_ONLY?deflate_huff(s, flush):(s.strategy==Z_RLE?deflate_rle(s, flush):configuration_table[s.level].func(s, flush));

				if(bstate==block_state.finish_started||bstate==block_state.finish_done) s.status=FINISH_STATE;
				if(bstate==block_state.need_more||bstate==block_state.finish_started)
				{
					if(strm.avail_out==0) s.last_flush=-1; // avoid BUF_ERROR next call, see above
					return Z_OK;
					// If flush != Z_NO_FLUSH && avail_out == 0, the next call
					// of deflate should use the same flush parameter to make sure
					// that the flush is complete. So we don't have to output an
					// empty block here, this will be done at next call. This also
					// ensures that for a very small output buffer, we emit at most
					// one empty block.
				}
				if(bstate==block_state.block_done)
				{
					if(flush==Z_PARTIAL_FLUSH) _tr_align(s);
					else if(flush!=Z_BLOCK)
					{ // FULL_FLUSH or SYNC_FLUSH
						_tr_stored_block(s, null, 0, 0);
						// For a full flush, this empty block will be recognized
						// as a special marker by inflate_sync().
						if(flush==Z_FULL_FLUSH)
						{
							s.head[s.hash_size-1]=NIL; // forget history

							//was memset((byte*)s.head, 0, (uint)(s.hash_size-1)*sizeof(*s.head));
							for(int i=0; i<s.hash_size-1; i++) s.head[i]=0;

							if(s.lookahead==0)
							{
								s.strstart=0;
								s.block_start=0;
							}
						}
					}
					flush_pending(strm);
					if(strm.avail_out==0)
					{
						s.last_flush=-1; // avoid BUF_ERROR at next call, see above
						return Z_OK;
					}
				}
			}
			//Assert(strm.avail_out>0, "bug2");

			if(flush!=Z_FINISH) return Z_OK;
			if(s.wrap<=0) return Z_STREAM_END;

			// Write the trailer
			if(s.wrap==2)
			{
				s.pending_buf[s.pending++]=(byte)(strm.adler&0xff);
				s.pending_buf[s.pending++]=(byte)((strm.adler>>8)&0xff);
				s.pending_buf[s.pending++]=(byte)((strm.adler>>16)&0xff);
				s.pending_buf[s.pending++]=(byte)((strm.adler>>24)&0xff);
				s.pending_buf[s.pending++]=(byte)(strm.total_in&0xff);
				s.pending_buf[s.pending++]=(byte)((strm.total_in>>8)&0xff);
				s.pending_buf[s.pending++]=(byte)((strm.total_in>>16)&0xff);
				s.pending_buf[s.pending++]=(byte)((strm.total_in>>24)&0xff);
			}
			else
			{
				putShortMSB(s, (uint)(strm.adler>>16));
				putShortMSB(s, (uint)(strm.adler&0xffff));
			}

			flush_pending(strm);
			// If avail_out is zero, the application will call deflate again
			// to flush the rest.
			if(s.wrap>0) s.wrap=-s.wrap; // write the trailer only once!
			return s.pending!=0?Z_OK:Z_STREAM_END;
		}
		#endregion

		// =========================================================================
		//   All dynamically allocated data structures for this stream are freed.
		// This function discards any unprocessed input and does not flush any
		// pending output.

		//   deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
		// stream state was inconsistent, Z_DATA_ERROR if the stream was freed
		// prematurely (some input or output was discarded). In the error case,
		// msg may be set but then points to a static string (which must not be
		// deallocated).

		public static int deflateEnd(z_stream strm)
		{
			if(strm==null||strm.state==null) return Z_STREAM_ERROR;

			deflate_state s=(deflate_state)strm.state;
			int status=s.status;
			if(status!=INIT_STATE&& status!=EXTRA_STATE&& status!=NAME_STATE&& status!=COMMENT_STATE&&
				status!=HCRC_STATE&& status!=BUSY_STATE&&status!=FINISH_STATE) return Z_STREAM_ERROR;

			// Deallocate in reverse order of allocations:
			//if(s.pending_buf!=null) free(s.pending_buf);
			//if(s.l_buf!=null) free(s.l_buf);
			//if(s.d_buf!=null) free(s.d_buf);
			//if(s.head!=null) free(s.head);
			//if(s.prev!=null) free(s.prev);
			//if(s.window!=null) free(s.window);
			s.pending_buf=s.l_buf=s.window=null;
			s.d_buf=s.head=s.prev=null;

			//free(strm.state);
			strm.state=s=null;

			return status==BUSY_STATE?Z_DATA_ERROR:Z_OK;
		}

		// =========================================================================
		//   Sets the destination stream as a complete copy of the source stream.

		//   This function can be useful when several compression strategies will be
		// tried, for example when there are several ways of pre-processing the input
		// data with a filter. The streams that will be discarded should then be freed
		// by calling deflateEnd.  Note that deflateCopy duplicates the internal
		// compression state which can be quite large, so this strategy is slow and
		// can consume lots of memory.

		//   deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
		// enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
		// (such as zalloc being NULL). msg is left unchanged in both source and
		// destination.

		// Copy the source state to the destination state.
		public static int deflateCopy(z_stream dest, z_stream source)
		{
			if(source==null||dest==null||source.state==null) return Z_STREAM_ERROR;

			deflate_state ss=(deflate_state)source.state;

			//was memcpy(dest, source, sizeof(z_stream));
			source.CopyTo(dest);

			deflate_state ds;
			try
			{
				ds=ss.Clone();
			}
			catch(Exception)
			{
				return Z_MEM_ERROR;
			}
			dest.state=ds;
			//(done above) memcpy(ds, ss, sizeof(deflate_state));
			ds.strm=dest;

			try
			{
				ds.window=new byte[ds.w_size*2];
				ds.prev=new ushort[ds.w_size];
				ds.head=new ushort[ds.hash_size];
				ds.pending_buf=new byte[ds.lit_bufsize*4];
				ds.d_buf=new ushort[ds.lit_bufsize];
				ds.l_buf=new byte[ds.lit_bufsize];
			}
			catch(Exception)
			{
				deflateEnd(dest);
				return Z_MEM_ERROR;
			}

			//was memcpy(ds.window, ss.window, ds.w_size*2*sizeof(byte));
			ss.window.CopyTo(ds.window, 0);

			//was memcpy(ds.prev, ss.prev, ds.w_size*sizeof(ushort));
			ss.prev.CopyTo(ds.prev, 0);
			
			//was memcpy(ds.head, ss.head, ds.hash_size*sizeof(ushort));
			ss.head.CopyTo(ds.head, 0);

			//was memcpy(ds.pending_buf, ss.pending_buf, (uint)ds.pending_buf_size);
			ss.pending_buf.CopyTo(ds.pending_buf, 0);
			ss.d_buf.CopyTo(ds.d_buf, 0);
			ss.l_buf.CopyTo(ds.l_buf, 0);

			ds.l_desc.dyn_tree=ds.dyn_ltree;
			ds.d_desc.dyn_tree=ds.dyn_dtree;
			ds.bl_desc.dyn_tree=ds.bl_tree;

			return Z_OK;
		}

		// ===========================================================================
		// Read a new buffer from the current input stream, update the adler32
		// and total number of bytes read.  All deflate() input goes through
		// this function so some applications may wish to modify it to avoid
		// allocating a large strm.next_in buffer and copying from it.
		// (See also flush_pending()).
		static int read_buf(z_stream strm, byte[] buf, uint size)
		{
			return read_buf(strm, buf, 0, size);
		}

		static int read_buf(z_stream strm, byte[] buf, int buf_ind, uint size)
		{
			uint len=strm.avail_in;

			if(len>size) len=size;
			if(len==0) return 0;

			strm.avail_in-=len;

			deflate_state s=(deflate_state)strm.state;

			if(s.wrap==1) strm.adler=adler32(strm.adler, strm.in_buf, (uint)strm.next_in, len);
			else if(s.wrap==2) strm.adler=crc32(strm.adler, strm.in_buf, strm.next_in, len);

			//was memcpy(buf, strm.in_buf+strm.next_in, len);
			Array.Copy(strm.in_buf, strm.next_in, buf, buf_ind, len);
			strm.next_in+=len;
			strm.total_in+=len;

			return (int)len;
		}

		// ===========================================================================
		// Initialize the "longest match" routines for a new zlib stream
		static void lm_init(deflate_state s)
		{
			s.window_size=(uint)2*s.w_size;

			s.head[s.hash_size-1]=NIL;

			//was memset((byte*)s.head, 0, (uint)(s.hash_size-1)*sizeof(*s.head));
			for(int i=0; i<(s.hash_size-1); i++) s.head[i]=0;

			// Set the default configuration parameters:
			s.max_lazy_match=configuration_table[s.level].max_lazy;
			s.good_match=configuration_table[s.level].good_length;
			s.nice_match=configuration_table[s.level].nice_length;
			s.max_chain_length=configuration_table[s.level].max_chain;

			s.strstart=0;
			s.block_start=0;
			s.lookahead=0;
			s.match_length=s.prev_length=MIN_MATCH-1;
			s.match_available=0;
			s.ins_h=0;
		}

		// ===========================================================================
		// Set match_start to the longest match starting at the given string and
		// return its length. Matches shorter or equal to prev_length are discarded,
		// in which case the result is equal to prev_length and match_start is
		// garbage.
		// IN assertions: cur_match is the head of the hash chain for the current
		//   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
		// OUT assertion: the match length is not greater than s.lookahead.
		static uint longest_match(deflate_state s, uint cur_match)
		{
			uint chain_length=s.max_chain_length;	// max hash chain length
			byte[] scan=s.window;					// current string
			int scan_ind=(int)s.strstart;
			int len;								// length of current match
			int best_len=(int)s.prev_length;		// best match length so far
			int nice_match=s.nice_match;			// stop if match long enough
			uint limit=s.strstart>(uint)(s.w_size-MIN_LOOKAHEAD)?s.strstart-(uint)(s.w_size-MIN_LOOKAHEAD):NIL;
			// Stop when cur_match becomes <= limit. To simplify the code,
			// we prevent matches with the string of window index 0.
			ushort[] prev=s.prev;
			uint wmask=s.w_mask;

			int strend_ind=(int)s.strstart+MAX_MATCH;
			byte scan_end1=scan[scan_ind+best_len-1];
			byte scan_end=scan[scan_ind+best_len];

			// The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
			// It is easy to get rid of this optimization if necessary.
			//Assert(s.hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");

			// Do not waste too much time if we already have a good match:
			if(s.prev_length>=s.good_match) chain_length>>=2;

			// Do not look for matches beyond the end of the input. This is necessary
			// to make deflate deterministic.
			if((uint)nice_match>s.lookahead) nice_match=(int)s.lookahead;

			//Assert((uint)s.strstart <= s.window_size-MIN_LOOKAHEAD, "need lookahead");

			byte[] match=s.window;
			do
			{
				//Assert(cur_match<s.strstart, "no future");
				int match_ind=(int)cur_match;

				// Skip to next match if the match length cannot increase
				// or if the match length is less than 2.  Note that the checks below
				// for insufficient lookahead only occur occasionally for performance
				// reasons.  Therefore uninitialized memory will be accessed, and
				// conditional jumps will be made that depend on those values.
				// However the length of the match is limited to the lookahead, so
				// the output of deflate is not affected by the uninitialized values.
				if(match[match_ind+best_len]!=scan_end||match[match_ind+best_len-1]!=scan_end1||
					match[match_ind]!=scan[scan_ind]||match[++match_ind]!=scan[scan_ind+1]) continue;

				// The check at best_len-1 can be removed because it will be made
				// again later. (This heuristic is not always a win.)
				// It is not necessary to compare scan[2] and match[2] since they
				// are always equal when the other bytes match, given that
				// the hash keys are equal and that HASH_BITS >= 8.
				scan_ind+=2;
				match_ind++;
				//Assert(scan[scan_ind]==match[match_ind], "match[2]?");

				// We check for insufficient lookahead only every 8th comparison;
				// the 256th check will be made at strstart+258.
				do
				{
				} while(scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&&
						 scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&&
						 scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&&
						 scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&&
						 scan_ind<strend_ind);

				//Assert(scan_ind <= (uint)(s.window_size-1), "wild scan");

				len=MAX_MATCH-(int)(strend_ind-scan_ind);
				scan_ind=strend_ind-MAX_MATCH;

				if(len>best_len)
				{
					s.match_start=cur_match;
					best_len=len;
					if(len>=nice_match) break;

					scan_end1=scan[scan_ind+best_len-1];
					scan_end=scan[scan_ind+best_len];
				}
			} while((cur_match=prev[cur_match&wmask])>limit&&--chain_length!=0);

			if((uint)best_len<=s.lookahead) return (uint)best_len;
			return s.lookahead;
		}

		// ---------------------------------------------------------------------------
		// Optimized version for FASTEST only
		static uint longest_match_fast(deflate_state s, uint cur_match)
		{
			byte[] scan=s.window;
			int scan_ind=(int)s.strstart;	// current string
			int len;						// length of current match
			int strend_ind=(int)s.strstart+MAX_MATCH;

			// The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
			// It is easy to get rid of this optimization if necessary.
			//Assert(s.hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");

			//Assert((uint)s.strstart <= s.window_size-MIN_LOOKAHEAD, "need lookahead");

			//Assert(cur_match < s.strstart, "no future");

			byte[] match=s.window;
			int match_ind=(int)cur_match;

			// Return failure if the match length is less than 2:
			if(match[match_ind]!=scan[scan_ind]||match[match_ind+1]!=scan[scan_ind+1]) return MIN_MATCH-1;

			// The check at best_len-1 can be removed because it will be made
			// again later. (This heuristic is not always a win.)
			// It is not necessary to compare scan[2] and match[2] since they
			// are always equal when the other bytes match, given that
			// the hash keys are equal and that HASH_BITS >= 8.
			scan_ind+=2;
			match_ind+=2;
			//Assert(scan[scan_ind] == match[match_ind], "match[2]?");

			// We check for insufficient lookahead only every 8th comparison;
			// the 256th check will be made at strstart+258.
			do
			{
			} while(scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&&
					 scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&&
					 scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&&
					 scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&&
					 scan_ind<strend_ind);

			//Assert(scan_ind <= (uint)(s.window_size-1), "wild scan");

			len=MAX_MATCH-(int)(strend_ind-scan_ind);

			if(len<MIN_MATCH) return MIN_MATCH-1;

			s.match_start=cur_match;
			return (uint)len<=s.lookahead?(uint)len:s.lookahead;
		}

		// ===========================================================================
		// Fill the window when the lookahead becomes insufficient.
		// Updates strstart and lookahead.
		//
		// IN assertion: lookahead < MIN_LOOKAHEAD
		// OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
		//    At least one byte has been read, or avail_in == 0; reads are
		//    performed for at least two bytes (required for the zip translate_eol
		//    option -- not supported here).
		static void fill_window(deflate_state s)
		{
			uint n, m;
			uint more;    // Amount of free space at the end of the window.
			uint wsize=s.w_size;

			do
			{
				more=(uint)(s.window_size-(uint)s.lookahead-(uint)s.strstart);

				// If the window is almost full and there is insufficient lookahead,
				// move the upper half to the lower one to make room in the upper half.
				if(s.strstart>=wsize+s.w_size-MIN_LOOKAHEAD)
				{
					//was memcpy(s.window, s.window+wsize, (uint)wsize);
					Array.Copy(s.window, wsize, s.window, 0, wsize);

					s.match_start-=wsize;
					s.strstart-=wsize; // we now have strstart >= MAX_DIST
					s.block_start-=(int)wsize;

					// Slide the hash table (could be avoided with 32 bit values
					// at the expense of memory usage). We slide even when level == 0
					// to keep the hash table consistent if we switch back to level > 0
					// later. (Using level 0 permanently is not an optimal usage of
					// zlib, so we don't care about this pathological case.)
					n=s.hash_size;
					uint p=n;
					do
					{
						m=s.head[--p];
						s.head[p]=(ushort)(m>=wsize?m-wsize:NIL);
					} while((--n)!=0);

					n=wsize;
					p=n;
					do
					{
						m=s.prev[--p];
						s.prev[p]=(ushort)(m>=wsize?m-wsize:NIL);
						// If n is not on any hash chain, prev[n] is garbage but
						// its value will never be used.
					} while((--n)!=0);
					more+=wsize;
				}
				if(s.strm.avail_in==0) return;

				// If there was no sliding:
				//    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
				//    more == window_size - lookahead - strstart
				// => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
				// => more >= window_size - 2*WSIZE + 2
				// In the BIG_MEM or MMAP case (not yet supported),
				//   window_size == input_size + MIN_LOOKAHEAD  &&
				//   strstart + s.lookahead <= input_size => more >= MIN_LOOKAHEAD.
				// Otherwise, window_size == 2*WSIZE so more >= 2.
				// If there was sliding, more >= WSIZE. So in all cases, more >= 2.
				//Assert(more>=2, "more < 2");

				n=(uint)read_buf(s.strm, s.window, (int)(s.strstart+s.lookahead), more);
				s.lookahead+=n;

				// Initialize the hash value now that we have some input:
				if(s.lookahead>=MIN_MATCH)
				{
					s.ins_h=s.window[s.strstart];
					//was UPDATE_HASH(s, s.ins_h, s.window[s.strstart+1]);
					s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+1])&s.hash_mask;

				}
				// If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
				// but this is not important since only literal bytes will be emitted.
			} while(s.lookahead<MIN_LOOKAHEAD&&s.strm.avail_in!=0);

			// If the WIN_INIT bytes after the end of the current data have never been
			// written, then zero those bytes in order to avoid memory check reports of
			// the use of uninitialized (or uninitialised as Julian writes) bytes by
			// the longest match routines.  Update the high water mark for the next
			// time through here.  WIN_INIT is set to MAX_MATCH since the longest match
			// routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
			if(s.high_water<s.window_size)
			{
				uint curr=s.strstart+s.lookahead;
				uint init;

				if(s.high_water<curr)
				{
					// Previous high water mark below current data -- zero WIN_INIT
					// bytes or up to end of window, whichever is less.
					init=s.window_size-curr;
					if(init>WIN_INIT) init=WIN_INIT;
					for(int i=0; i<init; i++) s.window[curr+i]=0;
					s.high_water=curr+init;
				}
				else if(s.high_water<curr+WIN_INIT)
				{
					// High water mark at or above current data, but below current data
					// plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
					// to end of window, whichever is less.
					init=curr+WIN_INIT-s.high_water;
					if(init>s.window_size-s.high_water) init=s.window_size-s.high_water;
					for(int i=0; i<init; i++) s.window[s.high_water+i]=0;
					s.high_water+=init;
				}
			}
		}

		// ===========================================================================
		// Flush the current block, with given end-of-file flag.
		// IN assertion: strstart is set to the end of the current match.
		//#define FLUSH_BLOCK_ONLY(s, last) \
		//{ \
		//    _tr_flush_block(s, s.block_start >= 0 ? s.window : null, s.block_start >= 0?s.block_start:0, \
		//		(uint)((int)s.strstart - s.block_start), (last)); \
		//    s.block_start = s.strstart; \
		//    flush_pending(s.strm); \
		//    Tracev((stderr,"[FLUSH]")); \
		//}

		// Same but force premature exit if necessary.
		//#define FLUSH_BLOCK(s, last) \
		//{ \
		//    _tr_flush_block(s, s.block_start >= 0 ? s.window : null, s.block_start >= 0?s.block_start:0, \
		//		(uint)((int)s.strstart - s.block_start), (last)); \
		//    s.block_start = s.strstart; \
		//    flush_pending(s.strm); \
		//    Tracev((stderr,"[FLUSH]")); \
		//    if (s.strm.avail_out == 0) return (last) ? finish_started : need_more; \
		//}

		// ===========================================================================
		// Copy without compression as much as possible from the input stream, return
		// the current block state.
		// This function does not insert new strings in the dictionary since
		// uncompressible data is probably not useful. This function is used
		// only for the level=0 compression option.
		// NOTE: this function should be optimized to avoid extra copying from
		// window to pending_buf.
		static block_state deflate_stored(deflate_state s, int flush)
		{
			// Stored blocks are limited to 0xffff bytes, pending_buf is limited
			// to pending_buf_size, and each stored block has a 5 byte header:
			uint max_block_size=0xffff;
			uint max_start;

			if(max_block_size>s.pending_buf_size-5) max_block_size=s.pending_buf_size-5;

			// Copy as much as possible from input to output:
			for(; ; )
			{
				// Fill the window as much as possible:
				if(s.lookahead<=1)
				{
					//Assert(s.strstart<s.w_size+MAX_DIST(s)||s.block_start>=(int)s.w_size, "slide too late");

					fill_window(s);
					if(s.lookahead==0&&flush==Z_NO_FLUSH) return block_state.need_more;

					if(s.lookahead==0) break; // flush the current block
				}
				//Assert(s.block_start>=0, "block gone");

				s.strstart+=s.lookahead;
				s.lookahead=0;

				// Emit a stored block if pending_buf will be full:
				max_start=(uint)s.block_start+max_block_size;
				if(s.strstart==0||(uint)s.strstart>=max_start)
				{
					// strstart == 0 is possible when wraparound on 16-bit machine
					s.lookahead=(uint)(s.strstart-max_start);
					s.strstart=(uint)max_start;

					//was FLUSH_BLOCK(s, 0);
					_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
						(uint)((int)s.strstart-s.block_start), 0);
					s.block_start=(int)s.strstart;
					flush_pending(s.strm);
					//Tracev((stderr,"[FLUSH]"));
					if(s.strm.avail_out==0) return block_state.need_more;
				}
				// Flush if we may have to slide, otherwise block_start may become
				// negative and the data will be gone:
				if(s.strstart-(uint)s.block_start>=(s.w_size-MIN_LOOKAHEAD))
				{
					//was FLUSH_BLOCK(s, 0);
					_tr_flush_block(s, s.block_start >= 0 ? s.window : null, s.block_start >= 0?s.block_start:0,
						(uint)((int)s.strstart - s.block_start), 0);
					s.block_start = (int)s.strstart;
					flush_pending(s.strm);
					//Tracev((stderr,"[FLUSH]"));
					if (s.strm.avail_out == 0) return block_state.need_more;
				}
			}
			
			//was FLUSH_BLOCK(s, flush==Z_FINISH);
			_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
				(uint)((int)s.strstart-s.block_start), flush==Z_FINISH?1:0);
			s.block_start=(int)s.strstart;
			flush_pending(s.strm);
			//Tracev((stderr,"[FLUSH]"));
			if(s.strm.avail_out==0) return flush==Z_FINISH?block_state.finish_started:block_state.need_more;

			return flush==Z_FINISH?block_state.finish_done:block_state.block_done;
		}

		// ===========================================================================
		// Compress as much as possible from the input stream, return the current
		// block state.
		// This function does not perform lazy evaluation of matches and inserts
		// new strings in the dictionary only for unmatched strings or for short
		// matches. It is used only for the fast compression options.
		static block_state deflate_fast(deflate_state s, int flush)
		{
			uint hash_head=NIL; // head of the hash chain
			int bflush;           // set if current block must be flushed

			for(; ; )
			{
				// Make sure that we always have enough lookahead, except
				// at the end of the input file. We need MAX_MATCH bytes
				// for the next match, plus MIN_MATCH bytes to insert the
				// string following the next match.
				if(s.lookahead<MIN_LOOKAHEAD)
				{
					fill_window(s);
					if(s.lookahead<MIN_LOOKAHEAD&&flush==Z_NO_FLUSH) return block_state.need_more;
					if(s.lookahead==0) break; // flush the current block
				}

				// Insert the string window[strstart .. strstart+2] in the
				// dictionary, and set hash_head to the head of the hash chain:
				hash_head=NIL;
				if(s.lookahead>=MIN_MATCH)
				{
					//was INSERT_STRING(s, s.strstart, hash_head);
					s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+(MIN_MATCH-1)])&s.hash_mask;
					hash_head=s.prev[s.strstart&s.w_mask]=s.head[s.ins_h];
					s.head[s.ins_h]=(ushort)s.strstart;
				}

				// Find the longest match, discarding those <= prev_length.
				// At this point we have always match_length < MIN_MATCH
				if(hash_head!=NIL&&s.strstart-hash_head<=(s.w_size-MIN_LOOKAHEAD))
				{
					// To simplify the code, we prevent matches with the string
					// of window index 0 (in particular we have to avoid a match
					// of the string with itself at the start of the input file).
					s.match_length=longest_match_fast(s, hash_head);
					// longest_match_fast() sets match_start
				}
				if(s.match_length>=MIN_MATCH)
				{
					//was _tr_tally_dist(s, s.strstart - s.match_start, s.match_length - MIN_MATCH, bflush);
					{
						byte len=(byte)(s.match_length-MIN_MATCH);
						ushort dist=(ushort)(s.strstart-s.match_start);
						s.d_buf[s.last_lit]=dist;
						s.l_buf[s.last_lit++]=len;
						dist--;
						s.dyn_ltree[_length_code[len]+LITERALS+1].Freq++;
						s.dyn_dtree[(dist<256?_dist_code[dist]:_dist_code[256+(dist>>7)])].Freq++;
						bflush=(s.last_lit==s.lit_bufsize-1)?1:0;
					}

					s.lookahead-=s.match_length;

					// Insert new strings in the hash table only if the match length
					// is not too large. This saves time but degrades compression.
					if(s.match_length<=s.max_lazy_match&&s.lookahead>=MIN_MATCH) // max_lazy_match was max_insert_length as #define
					{
						s.match_length--; // string at strstart already in table
						do
						{
							s.strstart++;
							//was INSERT_STRING(s, s.strstart, hash_head);
							s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+(MIN_MATCH-1)])&s.hash_mask;
							hash_head=s.prev[s.strstart&s.w_mask]=s.head[s.ins_h];
							s.head[s.ins_h]=(ushort)s.strstart;

							// strstart never exceeds WSIZE-MAX_MATCH, so there are
							// always MIN_MATCH bytes ahead.
						} while(--s.match_length!=0);
						s.strstart++;
					}
					else
					{
						s.strstart+=s.match_length;
						s.match_length=0;
						s.ins_h=s.window[s.strstart];
						//was UPDATE_HASH(s, s.ins_h, s.window[s.strstart+1]);
						s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+1])&s.hash_mask;

						// If lookahead < MIN_MATCH, ins_h is garbage, but it does not
						// matter since it will be recomputed at next deflate call.
					}
				}
				else
				{
					// No match, output a literal byte
					//Tracevv((stderr,"%c", s.window[s.strstart]));

					//was _tr_tally_lit (s, s.window[s.strstart], bflush);
					{
						byte cc=s.window[s.strstart];
						s.d_buf[s.last_lit]=0;
						s.l_buf[s.last_lit++]=cc;
						s.dyn_ltree[cc].Freq++;
						bflush=(s.last_lit==s.lit_bufsize-1)?1:0;
					}

					s.lookahead--;
					s.strstart++;
				}

				if(bflush!=0)
				{
					//was FLUSH_BLOCK(s, 0);
					_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
						(uint)((int)s.strstart-s.block_start), 0);
					s.block_start=(int)s.strstart;
					flush_pending(s.strm);
					//Tracev((stderr,"[FLUSH]"));
					if(s.strm.avail_out==0) return block_state.need_more;
				}
			}
			//was FLUSH_BLOCK(s, flush==Z_FINISH);
			_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
				(uint)((int)s.strstart-s.block_start), flush==Z_FINISH?1:0);
			s.block_start=(int)s.strstart;
			flush_pending(s.strm);
			//Tracev((stderr,"[FLUSH]"));
			if(s.strm.avail_out==0) return flush==Z_FINISH?block_state.finish_started:block_state.need_more;

			return flush==Z_FINISH?block_state.finish_done:block_state.block_done;
		}

		// ===========================================================================
		// Same as above, but achieves better compression. We use a lazy
		// evaluation for matches: a match is finally adopted only if there is
		// no better match at the next window position.
		static block_state deflate_slow(deflate_state s, int flush)
		{
			uint hash_head=NIL;	// head of hash chain
			int bflush;			// set if current block must be flushed

			// Process the input block.
			for(; ; )
			{
				// Make sure that we always have enough lookahead, except
				// at the end of the input file. We need MAX_MATCH bytes
				// for the next match, plus MIN_MATCH bytes to insert the
				// string following the next match.
				if(s.lookahead<MIN_LOOKAHEAD)
				{
					fill_window(s);
					if(s.lookahead<MIN_LOOKAHEAD&&flush==Z_NO_FLUSH) return block_state.need_more;
					if(s.lookahead==0) break; // flush the current block
				}

				// Insert the string window[strstart .. strstart+2] in the
				// dictionary, and set hash_head to the head of the hash chain:
				hash_head=NIL;
				if(s.lookahead>=MIN_MATCH)
				{
					//was INSERT_STRING(s, s.strstart, hash_head);
					s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+(MIN_MATCH-1)])&s.hash_mask;
					hash_head=s.prev[s.strstart&s.w_mask]=s.head[s.ins_h];
					s.head[s.ins_h]=(ushort)s.strstart;
				}

				// Find the longest match, discarding those <= prev_length.
				s.prev_length=s.match_length;
				s.prev_match=s.match_start;
				s.match_length=MIN_MATCH-1;

				if(hash_head!=NIL&&s.prev_length<s.max_lazy_match&&s.strstart-hash_head<=(s.w_size-MIN_LOOKAHEAD))
				{
					// To simplify the code, we prevent matches with the string
					// of window index 0 (in particular we have to avoid a match
					// of the string with itself at the start of the input file).
					s.match_length=longest_match(s, hash_head);
					// longest_match() sets match_start

					if(s.match_length<=5&&(s.strategy==Z_FILTERED||
						(s.match_length==MIN_MATCH&&s.strstart-s.match_start>TOO_FAR)))
					{
						// If prev_match is also MIN_MATCH, match_start is garbage
						// but we will ignore the current match anyway.
						s.match_length=MIN_MATCH-1;
					}
				}

				// If there was a match at the previous step and the current
				// match is not better, output the previous match:
				if(s.prev_length>=MIN_MATCH&&s.match_length<=s.prev_length)
				{
					uint max_insert=s.strstart+s.lookahead-MIN_MATCH;
					// Do not insert strings in hash table beyond this.

					//was _tr_tally_dist(s, s.strstart -1 - s.prev_match, s.prev_length - MIN_MATCH, bflush);
					{
						byte len=(byte)(s.prev_length-MIN_MATCH);
						ushort dist=(ushort)(s.strstart-1-s.prev_match);
						s.d_buf[s.last_lit]=dist;
						s.l_buf[s.last_lit++]=len;
						dist--;
						s.dyn_ltree[_length_code[len]+LITERALS+1].Freq++;
						s.dyn_dtree[(dist<256?_dist_code[dist]:_dist_code[256+(dist>>7)])].Freq++;
						bflush=(s.last_lit==s.lit_bufsize-1)?1:0;
					}

					// Insert in hash table all strings up to the end of the match.
					// strstart-1 and strstart are already inserted. If there is not
					// enough lookahead, the last two strings are not inserted in
					// the hash table.
					s.lookahead-=s.prev_length-1;
					s.prev_length-=2;
					do
					{
						if(++s.strstart<=max_insert)
						{
							//was INSERT_STRING(s, s.strstart, hash_head);
							s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+(MIN_MATCH-1)])&s.hash_mask;
							hash_head=s.prev[s.strstart&s.w_mask]=s.head[s.ins_h];
							s.head[s.ins_h]=(ushort)s.strstart;
						}
					} while(--s.prev_length!=0);
					s.match_available=0;
					s.match_length=MIN_MATCH-1;
					s.strstart++;

					if(bflush!=0)
					{
						//was FLUSH_BLOCK(s, 0);
						_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
							(uint)((int)s.strstart-s.block_start), 0);
						s.block_start=(int)s.strstart;
						flush_pending(s.strm);
						//Tracev((stderr,"[FLUSH]"));
						if(s.strm.avail_out==0) return block_state.need_more;
					}
				}
				else if(s.match_available!=0)
				{
					// If there was no match at the previous position, output a
					// single literal. If there was a match but the current match
					// is longer, truncate the previous match to a single literal.
					//Tracevv((stderr,"%c", s.window[s.strstart-1]));

					//was _tr_tally_lit(s, s.window[s.strstart-1], bflush);
					{
						byte cc=s.window[s.strstart-1];
						s.d_buf[s.last_lit]=0;
						s.l_buf[s.last_lit++]=cc;
						s.dyn_ltree[cc].Freq++;
						bflush=(s.last_lit==s.lit_bufsize-1)?1:0;
					}

					if(bflush!=0)
					{
						//was FLUSH_BLOCK_ONLY(s, 0);
						_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
							(uint)((int)s.strstart-s.block_start), 0);
						s.block_start=(int)s.strstart;
						flush_pending(s.strm);
						//Tracev((stderr,"[FLUSH]"));
					}
					s.strstart++;
					s.lookahead--;
					if(s.strm.avail_out==0) return block_state.need_more;
				}
				else
				{
					// There is no previous match to compare with, wait for
					// the next step to decide.
					s.match_available=1;
					s.strstart++;
					s.lookahead--;
				}
			}
			//Assert(flush!=Z_NO_FLUSH, "no flush?");
			if(s.match_available!=0)
			{
				//Tracevv((stderr,"%c", s.window[s.strstart-1]));

				//was _tr_tally_lit(s, s.window[s.strstart-1], bflush);
				{
					byte cc=s.window[s.strstart-1];
					s.d_buf[s.last_lit]=0;
					s.l_buf[s.last_lit++]=cc;
					s.dyn_ltree[cc].Freq++;
					bflush=(s.last_lit==s.lit_bufsize-1)?1:0;
				}

				s.match_available=0;
			}
			//was FLUSH_BLOCK(s, flush==Z_FINISH);
			_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
				(uint)((int)s.strstart-s.block_start), flush==Z_FINISH?1:0);
			s.block_start=(int)s.strstart;
			flush_pending(s.strm);
			//Tracev((stderr,"[FLUSH]"));
			if(s.strm.avail_out==0) return flush==Z_FINISH?block_state.finish_started:block_state.need_more;

			return flush==Z_FINISH?block_state.finish_done:block_state.block_done;
		}

		// ===========================================================================
		// For Z_RLE, simply look for runs of bytes, generate matches only of distance
		// one.  Do not maintain a hash table.  (It will be regenerated if this run of
		// deflate switches away from Z_RLE.)
		static block_state deflate_rle(deflate_state s, int flush)
		{
			bool bflush;			// set if current block must be flushed
			uint prev;				// byte at distance one to match
			int scan, strend;	// scan goes up to strend for length of run

			for(; ; )
			{
				// Make sure that we always have enough lookahead, except
				// at the end of the input file. We need MAX_MATCH bytes
				// for the longest encodable run.
				if(s.lookahead<MAX_MATCH)
				{
					fill_window(s);
					if(s.lookahead<MAX_MATCH&&flush==Z_NO_FLUSH) return block_state.need_more;
					if(s.lookahead==0) break; // flush the current block
				}

				// See how many times the previous byte repeats
				s.match_length=0;
				if(s.lookahead>=MIN_MATCH&&s.strstart>0)
				{
					scan=(int)(s.strstart-1);
					prev=s.window[scan];
					if(prev==s.window[++scan]&&prev==s.window[++scan]&&prev==s.window[++scan])
					{
						strend=(int)(s.strstart+MAX_MATCH);
						do
						{
						} while(prev==s.window[++scan]&&prev==s.window[++scan]&&
								prev==s.window[++scan]&&prev==s.window[++scan]&&
								prev==s.window[++scan]&&prev==s.window[++scan]&&
								prev==s.window[++scan]&&prev==s.window[++scan]&&
								scan<strend);
						s.match_length=MAX_MATCH-(uint)(strend-scan);
						if(s.match_length>s.lookahead) s.match_length=s.lookahead;
					}
				}

				// Emit match if have run of MIN_MATCH or longer, else emit literal
				if(s.match_length>=MIN_MATCH)
				{
					//was _tr_tally_dist(s, 1, s.match_length-MIN_MATCH, bflush);
					{
						byte len=(byte)(s.match_length-MIN_MATCH);
						ushort dist=1;
						s.d_buf[s.last_lit]=dist;
						s.l_buf[s.last_lit++]=len;
						dist--;
						s.dyn_ltree[_length_code[len]+LITERALS+1].Freq++;
						s.dyn_dtree[(dist<256?_dist_code[dist]:_dist_code[256+(dist>>7)])].Freq++;
						bflush=(s.last_lit==s.lit_bufsize-1)?true:false;
					}

					s.lookahead-=s.match_length;
					s.strstart+=s.match_length;
					s.match_length=0;
				}
				else
				{
					// No match, output a literal byte
					//Tracevv((stderr,"%c", s.window[s.strstart]));
					//was _tr_tally_lit(s, s.window[s.strstart], bflush);
					{
						byte cc=s.window[s.strstart];
						s.d_buf[s.last_lit]=0;
						s.l_buf[s.last_lit++]=cc;
						s.dyn_ltree[cc].Freq++;
						bflush=(s.last_lit==s.lit_bufsize-1)?true:false;
					}

					s.lookahead--;
					s.strstart++;
				}
				if(bflush)
				{
					// FLUSH_BLOCK(s, 0);
					_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
						(uint)((int)s.strstart-s.block_start), 0);
					s.block_start=(int)s.strstart;
					flush_pending(s.strm);
					//Tracev((stderr,"[FLUSH]"));
					if(s.strm.avail_out==0) return block_state.need_more;
				}
			}

			//was FLUSH_BLOCK(s, flush==Z_FINISH);
			_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
				(uint)((int)s.strstart-s.block_start), flush==Z_FINISH?1:0);
			s.block_start=(int)s.strstart;
			flush_pending(s.strm);
			//Tracev((stderr,"[FLUSH]"));
			if(s.strm.avail_out==0) return flush==Z_FINISH?block_state.finish_started:block_state.need_more;

			return flush==Z_FINISH?block_state.finish_done:block_state.block_done;
		}

		// ===========================================================================
		// For Z_HUFFMAN_ONLY, do not look for matches.  Do not maintain a hash table.
		// (It will be regenerated if this run of deflate switches away from Huffman.)
		static block_state deflate_huff(deflate_state s, int flush)
		{
			bool bflush;				// set if current block must be flushed

			for(; ; )
			{
				// Make sure that we have a literal to write.
				if(s.lookahead==0)
				{
					fill_window(s);
					if(s.lookahead==0)
					{
						if(flush==Z_NO_FLUSH)
							return block_state.need_more;
						break; // flush the current block
					}
				}

				// Output a literal byte
				s.match_length=0;
				//Tracevv((stderr,"%c", s.window[s.strstart]));

				//was _tr_tally_lit(s, s.window[s.strstart], bflush);
				{
					byte cc=s.window[s.strstart];
					s.d_buf[s.last_lit]=0;
					s.l_buf[s.last_lit++]=cc;
					s.dyn_ltree[cc].Freq++;
					bflush=(s.last_lit==s.lit_bufsize-1)?true:false;
				}

				s.lookahead--;
				s.strstart++;
				if(bflush)
				{
					// FLUSH_BLOCK(s, 0);
					_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
						(uint)((int)s.strstart-s.block_start), 0);
					s.block_start=(int)s.strstart;
					flush_pending(s.strm);
					//Tracev((stderr,"[FLUSH]"));
					if(s.strm.avail_out==0) return block_state.need_more;
				}
			}

			//was FLUSH_BLOCK(s, flush==Z_FINISH);
			_tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0,
				(uint)((int)s.strstart-s.block_start), flush==Z_FINISH?1:0);
			s.block_start=(int)s.strstart;
			flush_pending(s.strm);
			//Tracev((stderr,"[FLUSH]"));
			if(s.strm.avail_out==0) return flush==Z_FINISH?block_state.finish_started:block_state.need_more;

			return flush==Z_FINISH?block_state.finish_done:block_state.block_done;
		}
	}
}