From 7e0f4f1285b4dfad9468ab3fa36d6aab02f95906 Mon Sep 17 00:00:00 2001
From: Larry Gritz <lg@larrygritz.com>
Date: Fri, 28 Dec 2012 23:52:38 -0800
Subject: [PATCH 1/9] More diagnostics in imagespeed_test to pinpoint
 bottlenecks

---
 src/libOpenImageIO/imagespeed_test.cpp | 83 +++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 3 deletions(-)

diff --git a/src/libOpenImageIO/imagespeed_test.cpp b/src/libOpenImageIO/imagespeed_test.cpp
index 4d926d3eec..a874217c80 100644
--- a/src/libOpenImageIO/imagespeed_test.cpp
+++ b/src/libOpenImageIO/imagespeed_test.cpp
@@ -105,6 +105,42 @@ time_read_image ()
 
 
 
+static void
+time_read_scanline_at_a_time ()
+{
+    ImageInput *in = ImageInput::open (input_filename.c_str());
+    ASSERT (in);
+    const ImageSpec &spec (in->spec());
+    size_t pixelsize = spec.nchannels * sizeof(float);
+    imagesize_t scanlinesize = spec.width * pixelsize;
+    for (int y = 0; y < spec.height;  ++y) {
+        in->read_scanline (y+spec.y, 0, TypeDesc::TypeFloat,
+                           &buffer[scanlinesize*y]);
+    }
+    in->close ();
+    delete in;
+}
+
+
+
+static void
+time_read_64_scanlines_at_a_time ()
+{
+    ImageInput *in = ImageInput::open (input_filename.c_str());
+    ASSERT (in);
+    const ImageSpec &spec (in->spec());
+    size_t pixelsize = spec.nchannels * sizeof(float);
+    imagesize_t scanlinesize = spec.width * pixelsize;
+    for (int y = 0; y < spec.height;  y += 64) {
+        in->read_scanlines (y+spec.y, std::min(y+spec.y+64, spec.y+spec.height),
+                            0, TypeDesc::TypeFloat, &buffer[scanlinesize*y]);
+    }
+    in->close ();
+    delete in;
+}
+
+
+
 static void
 time_read_imagebuf ()
 {
@@ -141,26 +177,67 @@ main (int argc, char **argv)
     imagecache->attribute ("forcefloat", 1);
 
     // Allocate a buffer big enough (for floats)
-    bool ok = imagecache->get_imagespec (input_filename, spec);
+    bool ok = imagecache->get_imagespec (input_filename, spec, 0, 0, true);
     ASSERT (ok);
     imagecache->invalidate_all (true);  // Don't hold anything
     buffer.resize (spec.image_pixels()*spec.nchannels*sizeof(float), 0);
-
+ 
     {
         double t = time_trial (time_read_image, ntrials);
-        std::cout << "image_read speed: " << Strutil::timeintervalformat(t,2) << "\n";
+        std::cout << "read_image speed: " << Strutil::timeintervalformat(t,2) << "\n";
     }
 
+    if (spec.tile_width == 0) {
+        double t = time_trial (time_read_scanline_at_a_time, ntrials);
+        std::cout << "read_scanline (1 at a time) speed: " << Strutil::timeintervalformat(t,2) << "\n";
+    }
+
+    if (spec.tile_width == 0) {
+        double t = time_trial (time_read_64_scanlines_at_a_time, ntrials);
+        std::cout << "read_scanlines (64 at a time) speed: " << Strutil::timeintervalformat(t,2) << "\n";
+    }
+    {
+        imagecache->invalidate_all (true);  // Don't hold anything
+        double t = time_trial (time_read_imagebuf, ntrials);
+        std::cout << "ImageBuf read speed: " << Strutil::timeintervalformat(t,2) << "\n";
+    }
+
+    {
+        imagecache->invalidate_all (true);  // Don't hold anything
+        double t = time_trial (time_ic_get_pixels, ntrials);
+        std::cout << "ImageCache get_pixels speed: " << Strutil::timeintervalformat(t,2) << "\n";
+    }
+
+    std::cout << "With autotile = 64:\n";
+    imagecache->attribute ("autotile", 64);
     {
+        imagecache->invalidate_all (true);  // Don't hold anything
         double t = time_trial (time_read_imagebuf, ntrials);
         std::cout << "ImageBuf read speed: " << Strutil::timeintervalformat(t,2) << "\n";
     }
+    {
+        imagecache->invalidate_all (true);  // Don't hold anything
+        double t = time_trial (time_ic_get_pixels, ntrials);
+        std::cout << "ImageCache get_pixels speed: " << Strutil::timeintervalformat(t,2) << "\n";
+    }
 
+    std::cout << "With autotile = 64, autoscanline = 1:\n";
+    imagecache->attribute ("autotile", 64);
+    imagecache->attribute ("autoscanline", 1);
     {
+        imagecache->invalidate_all (true);  // Don't hold anything
+        double t = time_trial (time_read_imagebuf, ntrials);
+        std::cout << "ImageBuf read speed: " << Strutil::timeintervalformat(t,2) << "\n";
+    }
+    {
+        imagecache->invalidate_all (true);  // Don't hold anything
         double t = time_trial (time_ic_get_pixels, ntrials);
         std::cout << "ImageCache get_pixels speed: " << Strutil::timeintervalformat(t,2) << "\n";
     }
 
+    if (verbose)
+        std::cout << "\n" << imagecache->getstats(2) << "\n";
+
     imagecache->invalidate_all (true);  // Don't hold anything
 
     ImageCache::destroy (imagecache);

From 29290ffa8c184695cd209cb667a4848a1163205b Mon Sep 17 00:00:00 2001
From: Larry Gritz <lg@larrygritz.com>
Date: Sat, 29 Dec 2012 23:01:57 -0800
Subject: [PATCH 2/9] scoped_array template for simple memory management of
 dynamically-alloced array

---
 src/include/imagebuf.h | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/include/imagebuf.h b/src/include/imagebuf.h
index 8ae57cf1dc..aa5f414791 100644
--- a/src/include/imagebuf.h
+++ b/src/include/imagebuf.h
@@ -52,8 +52,34 @@
 OIIO_NAMESPACE_ENTER
 {
 
-class ImageBuf;
+/// Helper template for a scoped array -- just a memory-managed holder
+/// of a dynamically-allocated array. A bit like boost::scoped_array,
+/// but not dependent on Boost. Some day, C++ std::unique_ptr will be used
+/// instead, but for now, this will work with even very old C++
+/// compilers.
+template <class T>
+class scoped_array {
+public:
+    explicit scoped_array (T *t=NULL) : m_ptr(t) { }
+    ~scoped_array () { delete [] m_ptr; }
+    void reset (T *t=NULL) { delete [] m_ptr; m_ptr = t; }
+    T* get () { return m_ptr; }
+    const T* get () const { return m_ptr; }
+    T& operator[] (size_t i) { return m_ptr[i]; }
+    const T& operator[] (size_t i) const { return m_ptr[i]; }
+    void swap (scoped_array &x) { std::swap (m_ptr, x.m_ptr); }
+    operator bool () const { return m_ptr != NULL; }
+private:
+    T *m_ptr;
+    const scoped_array& operator= (const scoped_array &);
+    scoped_array (const scoped_array &x);
+};
 
+template <class T>
+void swap (scoped_array<T> & a, scoped_array<T> & b) { a.swap(b); }
+
+    
+class ImageBuf;
 
 
 /// Helper struct describing a region of interest in an image.

From 333fbb97b68cf9980c18bc66d32e4667892b337e Mon Sep 17 00:00:00 2001
From: Larry Gritz <lg@larrygritz.com>
Date: Fri, 28 Dec 2012 23:59:42 -0800
Subject: [PATCH 3/9] ImageBuf perf improvement -- hold local pixels in
 scoped_array rather than std::vector in order to reduce needless
 initialization of the buffers.

---
 src/include/imagebuf.h          |  2 +-
 src/libOpenImageIO/imagebuf.cpp | 70 ++++-----------------------------
 2 files changed, 8 insertions(+), 64 deletions(-)

diff --git a/src/include/imagebuf.h b/src/include/imagebuf.h
index aa5f414791..cbd736dfba 100644
--- a/src/include/imagebuf.h
+++ b/src/include/imagebuf.h
@@ -1200,7 +1200,7 @@ class OIIO_API ImageBuf {
     int m_nmiplevels;            ///< # of MIP levels in the current subimage
     ImageSpec m_spec;            ///< Describes the image (size, etc)
     ImageSpec m_nativespec;      ///< Describes the true native image
-    std::vector<char> m_pixels;  ///< Pixel data, if local and we own it
+    scoped_array<char> m_pixels; ///< Pixel data, if local and we own it
     char *m_localpixels;         ///< Pointer to local pixels
     bool m_clientpixels;         ///< Local pixels are owned by the client app
     bool m_spec_valid;           ///< Is the spec valid
diff --git a/src/libOpenImageIO/imagebuf.cpp b/src/libOpenImageIO/imagebuf.cpp
index 3b2fbb0092..24a703e97f 100644
--- a/src/libOpenImageIO/imagebuf.cpp
+++ b/src/libOpenImageIO/imagebuf.cpp
@@ -174,8 +174,8 @@ ImageBuf::ImageBuf (const ImageBuf &src)
       m_current_miplevel(src.m_current_miplevel),
       m_nmiplevels(src.m_nmiplevels),
       m_spec(src.m_spec), m_nativespec(src.m_nativespec),
-      m_pixels(src.m_pixels),
-      m_localpixels(src.m_localpixels),
+      m_pixels(src.localpixels() ? new char [src.spec().image_bytes()] : NULL),
+      m_localpixels(m_pixels.get()),
       m_clientpixels(src.m_clientpixels),
       m_spec_valid(src.m_spec_valid), m_pixels_valid(src.m_pixels_valid),
       m_badfile(src.m_badfile),
@@ -191,9 +191,8 @@ ImageBuf::ImageBuf (const ImageBuf &src)
             // Source just wrapped the client app's pixels
             ASSERT (0 && "ImageBuf wrapping client buffer not yet supported");
         } else {
-            // We own our pixels
-            // Make sure our localpixels points to our own owned memory.
-            m_localpixels = &m_pixels[0];
+            // We own our pixels -- copy from source
+            memcpy (m_pixels.get(), src.m_pixels.get(), spec().image_bytes());
         }
     } else {
         // Source was cache-based or deep
@@ -213,54 +212,6 @@ ImageBuf::~ImageBuf ()
 
 
 
-#if 0
-const ImageBuf &
-ImageBuf::operator= (const ImageBuf &src)
-{
-    if (&src != this) {
-        m_name = src.m_name;
-        m_fileformat = src.m_fileformat;
-        m_nsubimages = src.m_nsubimages;
-        m_current_subimage = src.m_current_subimage;
-        m_current_miplevel = src.m_current_miplevel;
-        m_nmiplevels = src.m_nmiplevels;
-        m_spec = src.m_spec;
-        m_nativespec = src.m_nativespec;
-        m_pixels = src.m_pixels;
-        m_localpixels = src.m_localpixels;
-        m_clientpixels = src.m_clientpixels;
-        m_spec_valid = src.m_spec_valid;
-        m_pixels_valid = src.m_pixels_valid;
-        m_badfile = src.m_badfile;
-        m_err.clear();
-        m_orientation = src.m_orientation;
-        m_pixelaspect = src.m_pixelaspect;
-        m_imagecache = src.m_imagecache;
-        m_cachedpixeltype = src.m_cachedpixeltype;
-        if (src.localpixels()) {
-            // Source had the image fully in memory (no cache)
-            if (src.m_clientpixels) {
-                // Source just wrapped the client app's pixels
-                ASSERT (0 && "ImageBuf wrapping client buffer not yet supported");
-                std::vector<char> tmp;
-                std::swap (m_pixels, tmp);  // delete it with prejudice
-            } else {
-                // We own our pixels
-                // Make sure our localpixels points to our own owned memory.
-                m_localpixels = &m_pixels[0];
-            }
-        } else {
-            // Source was cache-based
-            std::vector<char> tmp;
-            std::swap (m_pixels, tmp);  // delete it with prejudice
-        }
-    }
-    return *this;
-}
-#endif
-
-
-
 static spin_mutex err_mutex;      ///< Protect m_err fields
 
 
@@ -307,7 +258,7 @@ ImageBuf::clear ()
     m_current_miplevel = -1;
     m_spec = ImageSpec ();
     m_nativespec = ImageSpec ();
-    std::vector<char>().swap (m_pixels);  // clear it with deallocation
+    m_pixels.reset ();
     m_localpixels = NULL;
     m_clientpixels = false;
     m_spec_valid = false;
@@ -347,15 +298,8 @@ void
 ImageBuf::realloc ()
 {
     size_t newsize = spec().deep ? size_t(0) : spec().image_bytes ();
-    if (((int)m_pixels.size() - (int)newsize) > 4*1024*1024) {
-        // If we are substantially shrinking, try to actually free
-        // memory, which std::vector::resize does not do!
-        std::vector<char> tmp;      // vector with 0 memory
-        std::swap (tmp, m_pixels);  // Now tmp holds the mem, not m_pixels
-        // As tmp leaves scope, it frees m_pixels's old memory
-    }
-    m_pixels.resize (newsize);
-    m_localpixels = newsize ? &m_pixels[0] : NULL;
+    m_pixels.reset (newsize ? new char [newsize] : NULL);
+    m_localpixels = m_pixels.get();
     m_clientpixels = false;
 #if 0
     std::cerr << "ImageBuf " << m_name << " local allocation: " << newsize << "\n";

From e5cb84e5d123afb2656f2f446db30e875dd6f28c Mon Sep 17 00:00:00 2001
From: Larry Gritz <lg@larrygritz.com>
Date: Sat, 29 Dec 2012 23:21:05 -0800
Subject: [PATCH 4/9] ImageCache perf: for tile pixel mem, use scoped_array
 rather than std::vector in order to avoid useless initialization of the
 memory when it's allocated.

---
 src/libtexture/imagecache.cpp   | 6 ++++--
 src/libtexture/imagecache_pvt.h | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/libtexture/imagecache.cpp b/src/libtexture/imagecache.cpp
index ce5ea47a38..7a2e2d173a 100644
--- a/src/libtexture/imagecache.cpp
+++ b/src/libtexture/imagecache.cpp
@@ -1154,6 +1154,7 @@ ImageCacheTile::ImageCacheTile (const TileID &id,
 {
     m_used = true;
     m_pixels_ready = false;
+    m_pixels_size = 0;
     if (read_now) {
         read (thread_info);
     }
@@ -1167,11 +1168,12 @@ ImageCacheTile::ImageCacheTile (const TileID &id, void *pels, TypeDesc format,
     : m_id (id) // , m_used(true)
 {
     m_used = true;
+    m_pixels_size = 0;
     ImageCacheFile &file (m_id.file ());
     const ImageSpec &spec (file.spec(id.subimage(), id.miplevel()));
     size_t size = memsize_needed ();
     ASSERT (size > 0 && memsize() == 0);
-    m_pixels.resize (size);
+    m_pixels.reset (new char [m_pixels_size = size]);
     size_t dst_pelsize = spec.nchannels * file.datatype().size();
     m_valid = convert_image (spec.nchannels, spec.tile_width, spec.tile_height,
                              spec.tile_depth, pels, format, xstride, ystride,
@@ -1199,7 +1201,7 @@ ImageCacheTile::read (ImageCachePerThreadInfo *thread_info)
              "ImageCacheTile::read expects to NOT hold the tile lock");
     size_t size = memsize_needed ();
     ASSERT (memsize() == 0 && size > 0);
-    m_pixels.resize (size);
+    m_pixels.reset (new char [m_pixels_size = size]);
     ImageCacheFile &file (m_id.file());
     m_valid = file.read_tile (thread_info, m_id.subimage(), m_id.miplevel(),
                               m_id.x(), m_id.y(), m_id.z(),
diff --git a/src/libtexture/imagecache_pvt.h b/src/libtexture/imagecache_pvt.h
index f4c43cd338..7cb2b57574 100644
--- a/src/libtexture/imagecache_pvt.h
+++ b/src/libtexture/imagecache_pvt.h
@@ -41,6 +41,7 @@
 #include "texture.h"
 #include "refcnt.h"
 #include "hash.h"
+#include "imagebuf.h"
 
 
 OIIO_NAMESPACE_ENTER
@@ -471,7 +472,7 @@ class ImageCacheTile : public RefCnt {
     /// Return the actual allocated memory size for this tile's pixels.
     ///
     size_t memsize () const {
-        return m_pixels.size();
+        return m_pixels_size;
     }
 
     /// Return the space that will be needed for this tile's pixels.
@@ -511,7 +512,8 @@ class ImageCacheTile : public RefCnt {
 
 private:
     TileID m_id;                  ///< ID of this tile
-    std::vector<char> m_pixels;   ///< The pixel data
+    scoped_array<char> m_pixels;  ///< The pixel data
+    size_t m_pixels_size;         ///< How much m_pixels has allocated
     bool m_valid;                 ///< Valid pixels
     atomic_int m_used;            ///< Used recently
     volatile bool m_pixels_ready; ///< The pixels have been read from disk

From 38774bf27243ca9d5c1a1a19f413eef1720782ad Mon Sep 17 00:00:00 2001
From: Larry Gritz <lg@larrygritz.com>
Date: Sat, 29 Dec 2012 23:29:40 -0800
Subject: [PATCH 5/9] ImageCache perf: use scoped_array rather than vector for
 read_untiled temp memory

---
 src/libtexture/imagecache.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/libtexture/imagecache.cpp b/src/libtexture/imagecache.cpp
index 7a2e2d173a..d8b418cca5 100644
--- a/src/libtexture/imagecache.cpp
+++ b/src/libtexture/imagecache.cpp
@@ -814,12 +814,12 @@ ImageCacheFile::read_untiled (ImageCachePerThreadInfo *thread_info,
         // if not already present, on the assumption that it's highly
         // likely that they will also soon be requested.
         // FIXME -- I don't think this works properly for 3D images
-        int pixelsize = spec.nchannels * format.size();
+        size_t pixelsize = size_t (spec.nchannels * format.size());
         // Because of the way we copy below, we need to allocate the
         // buffer to be an even multiple of the tile width, so round up.
         stride_t scanlinesize = tw * ((spec.width+tw-1)/tw);
         scanlinesize *= pixelsize;
-        std::vector<char> buf (scanlinesize * th); // a whole tile-row size
+        scoped_array<char> buf (new char [scanlinesize * th]); // a whole tile-row size
         int yy = y - spec.y;   // counting from top scanline
         // [y0,y1] is the range of scanlines to read for a tile-row
         int y0 = yy - (yy % th);

From f17cb0d4c5983141a8d0b41357f614ea29855adf Mon Sep 17 00:00:00 2001
From: Larry Gritz <lg@larrygritz.com>
Date: Sat, 29 Dec 2012 23:38:21 -0800
Subject: [PATCH 6/9] ImageCache perf: read_untiled autotile case -- use
 read_scanlines instead of read_scanline

---
 src/libtexture/imagecache.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/libtexture/imagecache.cpp b/src/libtexture/imagecache.cpp
index d8b418cca5..3d20784ea9 100644
--- a/src/libtexture/imagecache.cpp
+++ b/src/libtexture/imagecache.cpp
@@ -827,11 +827,10 @@ ImageCacheFile::read_untiled (ImageCachePerThreadInfo *thread_info,
         y0 += spec.y;
         y1 += spec.y;
         // Read the whole tile-row worth of scanlines
-        for (int scanline = y0, i = 0; scanline <= y1 && ok; ++scanline, ++i) {
-            ok = m_input->read_scanline (scanline, z, format, (void *)&buf[scanlinesize*i]);
-            if (! ok)
-                imagecache().error ("%s", m_input->geterror().c_str());
-        }
+        ok = m_input->read_scanlines (y0, y1+1, z, format, (void *)&buf[0],
+                                      pixelsize, scanlinesize);
+        if (! ok)
+            imagecache().error ("%s", m_input->geterror().c_str());
         size_t b = (y1-y0+1) * spec.scanline_bytes();
         thread_info->m_stats.bytes_read += b;
         m_bytesread += b;

From eabe9c1f1b6f3d37d88722b7f945f6d25e3d2838 Mon Sep 17 00:00:00 2001
From: Larry Gritz <lg@larrygritz.com>
Date: Sat, 29 Dec 2012 23:46:42 -0800
Subject: [PATCH 7/9] ImageCache perf: refactor get_pixels to do fewer tile
 queries

---
 src/libtexture/imagecache.cpp | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/libtexture/imagecache.cpp b/src/libtexture/imagecache.cpp
index 3d20784ea9..60b121f6ea 100644
--- a/src/libtexture/imagecache.cpp
+++ b/src/libtexture/imagecache.cpp
@@ -2267,9 +2267,16 @@ ImageCacheImpl::get_pixels (ImageCacheFile *file,
             }
             continue;
         }
+        int old_tx = -100000, old_ty = -100000, old_tz = -100000;
         int tz = z - ((z - spec.z) % spec.tile_depth);
         char *yptr = zptr;
+        int ty = ybegin - ((ybegin - spec.y) % spec.tile_height);
+        int tyend = ty + spec.tile_height;
         for (int y = ybegin;  y < yend;  ++y, yptr += ystride) {
+            if (y == tyend) {
+                ty = tyend;
+                tyend += spec.tile_height;
+            }
             if (y < spec.y || y >= (spec.y+spec.height)) {
                 // nonexistant scanlines
                 if (xstride == formatpixelsize) {
@@ -2283,9 +2290,8 @@ ImageCacheImpl::get_pixels (ImageCacheFile *file,
                 }
                 continue;
             }
-            int ty = y - ((y - spec.y) % spec.tile_height);
+            // int ty = y - ((y - spec.y) % spec.tile_height);
             char *xptr = yptr;
-            int old_tx = -100000;
             const char *data = NULL;
             for (int x = xbegin;  x < xend;  ++x, xptr += xstride) {
                 if (x < spec.x || x >= (spec.x+spec.width)) {
@@ -2294,19 +2300,24 @@ ImageCacheImpl::get_pixels (ImageCacheFile *file,
                     continue;
                 }
                 int tx = x - ((x - spec.x) % spec.tile_width);
-                if (old_tx != tx) {
+                if (old_tx != tx || old_ty != ty || old_tz != tz) {
                     // Only do a find_tile and re-setup of the data
                     // pointer when we move across a tile boundary.
                     TileID tileid (*file, subimage, miplevel, tx, ty, tz);
                     ok &= find_tile (tileid, thread_info);
                     if (! ok)
                         return false;  // Just stop if file read failed
+                    old_tx = tx;
+                    old_ty = ty;
+                    old_tz = tz;
+                    data = NULL;
+                }
+                if (! data) {
                     ImageCacheTileRef &tile (thread_info->tile);
                     ASSERT (tile);
                     data = (const char *)tile->data (x, y, z)
                                         + chbegin*formatsize;
                     ASSERT (data);
-                    old_tx = tx;
                 }
                 if (xcontig) {
                     // Special case for a contiguous span within one tile

From c0d4adce39241b806e10ee1f3127e4d173400714 Mon Sep 17 00:00:00 2001
From: Larry Gritz <lg@larrygritz.com>
Date: Sun, 30 Dec 2012 17:57:06 -0800
Subject: [PATCH 8/9] ImageCache speedups -- remove pow2 roundups of autotile
 tile sizes. This was an artifact of an old restriction of TextureSystem which
 is no longer necessary.

---
 src/libtexture/imagecache.cpp | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/src/libtexture/imagecache.cpp b/src/libtexture/imagecache.cpp
index 60b121f6ea..902ae73832 100644
--- a/src/libtexture/imagecache.cpp
+++ b/src/libtexture/imagecache.cpp
@@ -371,26 +371,24 @@ ImageCacheFile::open (ImageCachePerThreadInfo *thread_info)
             }
             if (tempspec.tile_width == 0 || tempspec.tile_height == 0) {
                 si.untiled = true;
-                if (imagecache().autotile()) {
+                int autotile = imagecache().autotile();
+                if (autotile) {
                     // Automatically make it appear as if it's tiled
                     if (imagecache().autoscanline()) {
-                        tempspec.tile_width = pow2roundup (tempspec.width);
+                        tempspec.tile_width = tempspec.width;
                     } else {
-                        tempspec.tile_width = imagecache().autotile();
+                        tempspec.tile_width = std::min (tempspec.width, autotile);
                     }
-                    tempspec.tile_height = imagecache().autotile();
-                    if (tempspec.depth > 1)
-                        tempspec.tile_depth = imagecache().autotile();
-                    else
-                        tempspec.tile_depth = 1;
+                    tempspec.tile_height = std::min (tempspec.height, autotile);
+                    tempspec.tile_depth = std::min (std::max(tempspec.depth,1), autotile);
                 } else {
                     // Don't auto-tile -- which really means, make it look like
                     // a single tile that's as big as the whole image.
                     // We round to a power of 2 because the texture system
                     // currently requires power of 2 tile sizes.
-                    tempspec.tile_width = pow2roundup (tempspec.width);
-                    tempspec.tile_height = pow2roundup (tempspec.height);
-                    tempspec.tile_depth = pow2roundup(tempspec.depth);
+                    tempspec.tile_width = tempspec.width;
+                    tempspec.tile_height = tempspec.height;
+                    tempspec.tile_depth = tempspec.depth;
                 }
             }
             thread_info->m_stats.files_totalsize += tempspec.image_bytes();
@@ -448,10 +446,6 @@ ImageCacheFile::open (ImageCachePerThreadInfo *thread_info)
                     s.tile_height = h;
                     s.tile_depth = d;
                 }
-                // Texture system requires pow2 tile sizes
-                s.tile_width = pow2roundup (s.tile_width);
-                s.tile_height = pow2roundup (s.tile_height);
-                s.tile_depth = pow2roundup (s.tile_depth);
                 ++nmip;
                 maxmip = std::max (nmip, maxmip);
                 LevelInfo levelinfo (s, s);

From 6f5744e92c4cb881ae83bc059dc72a21d11ab3b1 Mon Sep 17 00:00:00 2001
From: Larry Gritz <lg@larrygritz.com>
Date: Sun, 30 Dec 2012 19:44:52 -0800
Subject: [PATCH 9/9] oiiotool - fix minor bug where tiled files were output
 inappropriately.

---
 src/oiiotool/oiiotool.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/oiiotool/oiiotool.cpp b/src/oiiotool/oiiotool.cpp
index c1fb675f20..c05f8ffdd8 100644
--- a/src/oiiotool/oiiotool.cpp
+++ b/src/oiiotool/oiiotool.cpp
@@ -195,7 +195,8 @@ input_file (int argc, const char *argv[])
 
 
 static void
-adjust_output_options (ImageSpec &spec, const Oiiotool &ot)
+adjust_output_options (ImageSpec &spec, const Oiiotool &ot,
+                       bool format_supports_tiles)
 {
     if (ot.output_dataformat != TypeDesc::UNKNOWN) {
         spec.set_format (ot.output_dataformat);
@@ -207,12 +208,14 @@ adjust_output_options (ImageSpec &spec, const Oiiotool &ot)
 
 //        spec.channelformats.clear ();   // FIXME: why?
 
-    if (ot.output_scanline)
-        spec.tile_width = spec.tile_height = 0;
-    else if (ot.output_tilewidth) {
+    // If we've had tiled input and scanline was not explicitly
+    // requested, we'll try tiled output.
+    if (ot.output_tilewidth && !ot.output_scanline && format_supports_tiles) {
         spec.tile_width = ot.output_tilewidth;
         spec.tile_height = ot.output_tileheight;
         spec.tile_depth = 1;
+    } else {
+        spec.tile_width = spec.tile_height = spec.tile_depth = 0;
     }
 
     if (! ot.output_compression.empty())
@@ -273,6 +276,7 @@ output_file (int argc, const char *argv[])
         return 0;
     }
     bool supports_displaywindow = out->supports ("displaywindow");
+    bool supports_tiles = out->supports ("tiles");
     ot.read ();
     ImageRecRef saveimg = ot.curimg;
     ImageRecRef ir (ot.curimg);
@@ -291,7 +295,7 @@ output_file (int argc, const char *argv[])
     std::vector<ImageSpec> subimagespecs (ir->subimages());
     for (int s = 0;  s < ir->subimages();  ++s) {
         ImageSpec spec = *ir->spec(s,0);
-        adjust_output_options (spec, ot);
+        adjust_output_options (spec, ot, supports_tiles);
         subimagespecs[s] = spec;
     }
 
@@ -313,7 +317,7 @@ output_file (int argc, const char *argv[])
     for (int s = 0, send = ir->subimages();  s < send;  ++s) {
         for (int m = 0, mend = ir->miplevels(s);  m < mend;  ++m) {
             ImageSpec spec = *ir->spec(s,m);
-            adjust_output_options (spec, ot);
+            adjust_output_options (spec, ot, supports_tiles);
             if (s > 0 || m > 0) {  // already opened first subimage/level
                 if (! out->open (filename, spec, mode)) {
                     std::cerr << "oiiotool ERROR: " << out->geterror() << "\n";